From d7b34a80bbbf98ea11e0d679eeede076421748f1 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 22 Mar 2024 16:09:14 +0000
Subject: [PATCH 001/265] stash

---
 .../Net/include/Poco/Net/HTTPServerSession.h  |  5 +++++
 base/poco/Net/src/HTTPServerSession.cpp       | 22 ++++++++++---------
 .../library-bridge/LibraryBridgeHandlers.cpp  | 16 +++++---------
 .../library-bridge/LibraryBridgeHandlers.h    |  6 ++---
 programs/odbc-bridge/PingHandler.cpp          |  2 +-
 programs/server/Server.cpp                    |  1 +
 src/Core/ServerSettings.h                     |  1 +
 src/IO/HTTPCommon.cpp                         | 15 +++++++++----
 src/IO/HTTPCommon.h                           |  2 +-
 src/Server/HTTP/HTTPServer.cpp                |  3 ++-
 src/Server/HTTP/HTTPServerResponse.h          |  2 ++
 .../WriteBufferFromHTTPServerResponse.cpp     |  4 +---
 .../HTTP/WriteBufferFromHTTPServerResponse.h  |  2 --
 src/Server/HTTPHandler.cpp                    |  3 +--
 src/Server/InterserverIOHTTPHandler.cpp       |  3 +--
 src/Server/PrometheusRequestHandler.cpp       | 10 ++-------
 src/Server/PrometheusRequestHandler.h         |  7 +-----
 src/Server/ReplicasStatusHandler.cpp          |  3 +--
 src/Server/StaticRequestHandler.cpp           | 10 ++++-----
 src/Server/WebUIRequestHandler.cpp            |  9 +++-----
 .../00408_http_keep_alive.reference           |  6 ++---
 .../0_stateless/00501_http_head.reference     |  4 ++--
 22 files changed, 63 insertions(+), 73 deletions(-)

diff --git a/base/poco/Net/include/Poco/Net/HTTPServerSession.h b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
index ec928af304f..192d71962bc 100644
--- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
@@ -56,10 +56,15 @@ namespace Net
         SocketAddress serverAddress();
         /// Returns the server's address.
 
+        size_t getKeepAliveTimeout() const { return _params->getKeepAliveTimeout().totalSeconds(); }
+
+        size_t getMaxKeepAliveRequests() const { return _params->getMaxKeepAliveRequests(); }
+
     private:
         bool _firstRequest;
         Poco::Timespan _keepAliveTimeout;
         int _maxKeepAliveRequests;
+        HTTPServerParams::Ptr _params;
     };
 
 
diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp
index d4f2b24879e..3ea689cb0cf 100644
--- a/base/poco/Net/src/HTTPServerSession.cpp
+++ b/base/poco/Net/src/HTTPServerSession.cpp
@@ -19,11 +19,12 @@ namespace Poco {
 namespace Net {
 
 
-HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParams::Ptr pParams):
-	HTTPSession(socket, pParams->getKeepAlive()),
-	_firstRequest(true),
-	_keepAliveTimeout(pParams->getKeepAliveTimeout()),
-	_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
+HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerParams::Ptr pParams)
+    : HTTPSession(socket, pParams->getKeepAlive())
+    , _firstRequest(true)
+    , _keepAliveTimeout(pParams->getKeepAliveTimeout())
+    , _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
+    , _params(pParams)
 {
 	setTimeout(pParams->getTimeout());
 }
@@ -46,11 +47,12 @@ bool HTTPServerSession::hasMoreRequests()
 	}
 	else if (_maxKeepAliveRequests != 0 && getKeepAlive())
 	{
-		if (_maxKeepAliveRequests > 0) 
-			--_maxKeepAliveRequests;
-		return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
-	}
-	else return false;
+        if (_maxKeepAliveRequests > 0)
+            --_maxKeepAliveRequests;
+        return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
+    }
+    else
+        return false;
 }
 
 
diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp
index 26d887cfc98..094cef6716d 100644
--- a/programs/library-bridge/LibraryBridgeHandlers.cpp
+++ b/programs/library-bridge/LibraryBridgeHandlers.cpp
@@ -374,10 +374,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
 }
 
 
-ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
-    : WithContext(context_)
-    , keep_alive_timeout(keep_alive_timeout_)
-    , log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
+ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_)
+    : WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
 {
 }
 
@@ -401,7 +399,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
 
         String res = library_handler ? "1" : "0";
 
-        setResponseDefaultHeaders(response, keep_alive_timeout);
+        setResponseDefaultHeaders(response);
         LOG_TRACE(log, "Sending ping response: {} (dictionary id: {})", res, dictionary_id);
         response.sendBuffer(res.data(), res.size());
     }
@@ -617,10 +615,8 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
 }
 
 
-CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
-    : WithContext(context_)
-    , keep_alive_timeout(keep_alive_timeout_)
-    , log(getLogger("CatBoostLibraryBridgeExistsHandler"))
+CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(ContextPtr context_)
+    : WithContext(context_), log(getLogger("CatBoostLibraryBridgeExistsHandler"))
 {
 }
 
@@ -634,7 +630,7 @@ void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & reque
 
         String res = "1";
 
-        setResponseDefaultHeaders(response, keep_alive_timeout);
+        setResponseDefaultHeaders(response);
         LOG_TRACE(log, "Sending ping response: {}", res);
         response.sendBuffer(res.data(), res.size());
     }
diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h
index 1db71eb24cb..83bca24ce1f 100644
--- a/programs/library-bridge/LibraryBridgeHandlers.h
+++ b/programs/library-bridge/LibraryBridgeHandlers.h
@@ -34,12 +34,11 @@ private:
 class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
+    ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
-    const size_t keep_alive_timeout;
     LoggerPtr log;
 };
 
@@ -77,12 +76,11 @@ private:
 class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
+    CatBoostLibraryBridgeExistsHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
-    const size_t keep_alive_timeout;
     LoggerPtr log;
 };
 
diff --git a/programs/odbc-bridge/PingHandler.cpp b/programs/odbc-bridge/PingHandler.cpp
index 80d0e2bf4a9..e5d094fb7eb 100644
--- a/programs/odbc-bridge/PingHandler.cpp
+++ b/programs/odbc-bridge/PingHandler.cpp
@@ -10,7 +10,7 @@ void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerRes
 {
     try
     {
-        setResponseDefaultHeaders(response, keep_alive_timeout);
+        setResponseDefaultHeaders(response);
         const char * data = "Ok.\n";
         response.sendBuffer(data, strlen(data));
     }
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index b67a4eccd15..b741cd7f644 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -2251,6 +2251,7 @@ void Server::createServers(
     Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
     http_params->setTimeout(settings.http_receive_timeout);
     http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
+    http_params->setMaxKeepAliveRequests(static_cast<int>(global_context->getServerSettings().max_keep_alive_requests));
 
     Poco::Util::AbstractConfiguration::Keys protocols;
     config.keys("protocols", protocols);
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index da82cdea5a4..7480d94e81d 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -113,6 +113,7 @@ namespace DB
     M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
     M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
     M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
+    M(UInt64, max_keep_alive_requests, 10000, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
     M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
     M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
     M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index 09f7724d613..2b3f7f062bc 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -1,3 +1,4 @@
+#include <cstddef>
 #include <IO/HTTPCommon.h>
 
 #include <Server/HTTP/HTTPServerResponse.h>
@@ -33,14 +34,20 @@ namespace ErrorCodes
     extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS;
 }
 
-void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout)
+void setResponseDefaultHeaders(HTTPServerResponse & response)
 {
     if (!response.getKeepAlive())
         return;
 
-    Poco::Timespan timeout(keep_alive_timeout, 0);
-    if (timeout.totalSeconds())
-        response.set("Keep-Alive", "timeout=" + std::to_string(timeout.totalSeconds()));
+    const size_t keep_alive_timeout = response.getSession().getKeepAliveTimeout();
+    const size_t keep_alive_max_requests = response.getSession().getMaxKeepAliveRequests();
+    if (keep_alive_timeout)
+    {
+        if (keep_alive_max_requests)
+            response.set("Keep-Alive", fmt::format("timeout={}, max={}", keep_alive_timeout, keep_alive_max_requests));
+        else
+            response.set("Keep-Alive", fmt::format("timeout={}", keep_alive_timeout));
+    }
 }
 
 HTTPSessionPtr makeHTTPSession(
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index 63dffcf6878..fa6086224f5 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -54,7 +54,7 @@ private:
 
 using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
 
-void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout);
+void setResponseDefaultHeaders(HTTPServerResponse & response);
 
 /// Create session object to perform requests and set required parameters.
 HTTPSessionPtr makeHTTPSession(
diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp
index 90bdebf6451..9b8feae3e26 100644
--- a/src/Server/HTTP/HTTPServer.cpp
+++ b/src/Server/HTTP/HTTPServer.cpp
@@ -13,7 +13,8 @@ HTTPServer::HTTPServer(
     Poco::Net::HTTPServerParams::Ptr params,
     const ProfileEvents::Event & read_event,
     const ProfileEvents::Event & write_event)
-    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_, read_event, write_event), thread_pool, socket_, params), factory(factory_)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_, read_event, write_event), thread_pool, socket_, params)
+    , factory(factory_)
 {
 }
 
diff --git a/src/Server/HTTP/HTTPServerResponse.h b/src/Server/HTTP/HTTPServerResponse.h
index 8edb785e7c5..9793fc8b24b 100644
--- a/src/Server/HTTP/HTTPServerResponse.h
+++ b/src/Server/HTTP/HTTPServerResponse.h
@@ -245,6 +245,8 @@ public:
 
     void attachRequest(HTTPServerRequest * request_) { request = request_; }
 
+    const Poco::Net::HTTPServerSession & getSession() const { return session; }
+
 private:
     Poco::Net::HTTPServerSession & session;
     HTTPServerRequest * request = nullptr;
diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
index 8098671a903..a39f6de51d0 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp
@@ -30,7 +30,7 @@ void WriteBufferFromHTTPServerResponse::startSendHeaders()
         if (add_cors_header)
             response.set("Access-Control-Allow-Origin", "*");
 
-        setResponseDefaultHeaders(response, keep_alive_timeout);
+        setResponseDefaultHeaders(response);
 
         std::stringstream header; //STYLE_CHECK_ALLOW_STD_STRING_STREAM
         response.beginWrite(header);
@@ -119,12 +119,10 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
 WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
     HTTPServerResponse & response_,
     bool is_http_method_head_,
-    UInt64 keep_alive_timeout_,
     const ProfileEvents::Event & write_event_)
     : HTTPWriteBuffer(response_.getSocket(), write_event_)
     , response(response_)
     , is_http_method_head(is_http_method_head_)
-    , keep_alive_timeout(keep_alive_timeout_)
 {
 }
 
diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
index a3952b7c553..f0c80f24582 100644
--- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
+++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h
@@ -29,7 +29,6 @@ public:
     WriteBufferFromHTTPServerResponse(
         HTTPServerResponse & response_,
         bool is_http_method_head_,
-        UInt64 keep_alive_timeout_,
         const ProfileEvents::Event & write_event_ = ProfileEvents::end());
 
     ~WriteBufferFromHTTPServerResponse() override;
@@ -91,7 +90,6 @@ private:
 
     bool is_http_method_head;
     bool add_cors_header = false;
-    size_t keep_alive_timeout = 0;
 
     bool initialized = false;
 
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index c112eefec6c..ac6c9d6a0a5 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -621,7 +621,6 @@ void HTTPHandler::processQuery(
         std::make_shared<WriteBufferFromHTTPServerResponse>(
             response,
             request.getMethod() == HTTPRequest::HTTP_HEAD,
-            context->getServerSettings().keep_alive_timeout.totalSeconds(),
             write_event);
     used_output.out = used_output.out_holder;
     used_output.out_maybe_compressed = used_output.out_holder;
@@ -926,7 +925,7 @@ try
     if (!used_output.out_holder && !used_output.exception_is_written)
     {
         /// If nothing was sent yet and we don't even know if we must compress the response.
-        WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT).writeln(s);
+        WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD).writeln(s);
     }
     else if (used_output.out_maybe_compressed)
     {
diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp
index 28045380cd7..9a87992731c 100644
--- a/src/Server/InterserverIOHTTPHandler.cpp
+++ b/src/Server/InterserverIOHTTPHandler.cpp
@@ -87,9 +87,8 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         response.setChunkedTransferEncoding(true);
 
     Output used_output;
-    const auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
     used_output.out = std::make_shared<WriteBufferFromHTTPServerResponse>(
-        response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event);
+        response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, write_event);
 
     auto finalize_output = [&]
     {
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index dff960f7031..0ad5f907467 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -18,21 +18,15 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
 {
     try
     {
-        /// Raw config reference is used here to avoid dependency on Context and ServerSettings.
-        /// This is painful, because this class is also used in a build with CLICKHOUSE_KEEPER_STANDALONE_BUILD=1
-        /// And there ordinary Context is replaced with a tiny clone.
-        const auto & config = server.config();
-        unsigned keep_alive_timeout = config.getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT);
-
         /// In order to make keep-alive works.
         if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
             response.setChunkedTransferEncoding(true);
 
-        setResponseDefaultHeaders(response, keep_alive_timeout);
+        setResponseDefaultHeaders(response);
 
         response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
 
-        WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event);
+        WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, write_event);
         try
         {
             metrics_writer->write(wb);
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index d120752c8c5..cc7848d1dd0 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -12,15 +12,10 @@ class IServer;
 class PrometheusRequestHandler : public HTTPRequestHandler
 {
 private:
-    IServer & server;
     PrometheusMetricsWriterPtr metrics_writer;
 
 public:
-    PrometheusRequestHandler(IServer & server_, PrometheusMetricsWriterPtr metrics_writer_)
-        : server(server_)
-        , metrics_writer(std::move(metrics_writer_))
-    {
-    }
+    PrometheusRequestHandler(IServer &, PrometheusMetricsWriterPtr metrics_writer_) : metrics_writer(std::move(metrics_writer_)) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 };
diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp
index 91c6bd722d3..964e3834037 100644
--- a/src/Server/ReplicasStatusHandler.cpp
+++ b/src/Server/ReplicasStatusHandler.cpp
@@ -84,8 +84,7 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe
             }
         }
 
-        const auto & server_settings = getContext()->getServerSettings();
-        setResponseDefaultHeaders(response, server_settings.keep_alive_timeout.totalSeconds());
+        setResponseDefaultHeaders(response);
 
         if (!ok)
         {
diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp
index 67bf3875de4..3d618031875 100644
--- a/src/Server/StaticRequestHandler.cpp
+++ b/src/Server/StaticRequestHandler.cpp
@@ -33,10 +33,9 @@ namespace ErrorCodes
     extern const int INVALID_CONFIG_PARAMETER;
 }
 
-static inline std::unique_ptr<WriteBuffer>
-responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response, UInt64 keep_alive_timeout)
+static inline std::unique_ptr<WriteBuffer> responseWriteBuffer(HTTPServerRequest & request, HTTPServerResponse & response)
 {
-    auto buf = std::unique_ptr<WriteBuffer>(new WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout));
+    auto buf = std::unique_ptr<WriteBuffer>(new WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD));
 
     /// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
     String http_response_compression_methods = request.get("Accept-Encoding", "");
@@ -89,8 +88,7 @@ static inline void trySendExceptionToClient(
 
 void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
-    auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
-    auto out = responseWriteBuffer(request, response, keep_alive_timeout);
+    auto out = responseWriteBuffer(request, response);
 
     try
     {
@@ -105,7 +103,7 @@ void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServer
                             "The Transfer-Encoding is not chunked and there "
                             "is no Content-Length header for POST request");
 
-        setResponseDefaultHeaders(response, keep_alive_timeout);
+        setResponseDefaultHeaders(response);
         response.setStatusAndReason(Poco::Net::HTTPResponse::HTTPStatus(status));
         writeResponse(*out);
     }
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index 68d3ff0b325..faad9d57519 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -29,18 +29,15 @@ DashboardWebUIRequestHandler::DashboardWebUIRequestHandler(IServer & server_) :
 BinaryWebUIRequestHandler::BinaryWebUIRequestHandler(IServer & server_) : server(server_) {}
 JavaScriptWebUIRequestHandler::JavaScriptWebUIRequestHandler(IServer & server_) : server(server_) {}
 
-static void handle(const IServer & server, HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html)
+static void handle(const IServer &, HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html)
 {
-    auto keep_alive_timeout = server.context()->getServerSettings().keep_alive_timeout.totalSeconds();
-
     response.setContentType("text/html; charset=UTF-8");
     if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
         response.setChunkedTransferEncoding(true);
 
-    setResponseDefaultHeaders(response, keep_alive_timeout);
+    setResponseDefaultHeaders(response);
     response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
-    WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD, keep_alive_timeout).write(html.data(), html.size());
-
+    WriteBufferFromHTTPServerResponse(response, request.getMethod() == HTTPRequest::HTTP_HEAD).write(html.data(), html.size());
 }
 
 void PlayWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
diff --git a/tests/queries/0_stateless/00408_http_keep_alive.reference b/tests/queries/0_stateless/00408_http_keep_alive.reference
index 17a7fd690a8..d5d7dacce9e 100644
--- a/tests/queries/0_stateless/00408_http_keep_alive.reference
+++ b/tests/queries/0_stateless/00408_http_keep_alive.reference
@@ -1,6 +1,6 @@
 < Connection: Keep-Alive
-< Keep-Alive: timeout=10
+< Keep-Alive: timeout=10, max=10000
 < Connection: Keep-Alive
-< Keep-Alive: timeout=10
+< Keep-Alive: timeout=10, max=10000
 < Connection: Keep-Alive
-< Keep-Alive: timeout=10
+< Keep-Alive: timeout=10, max=10000
diff --git a/tests/queries/0_stateless/00501_http_head.reference b/tests/queries/0_stateless/00501_http_head.reference
index 8351327b356..db82132b145 100644
--- a/tests/queries/0_stateless/00501_http_head.reference
+++ b/tests/queries/0_stateless/00501_http_head.reference
@@ -2,11 +2,11 @@ HTTP/1.1 200 OK
 Connection: Keep-Alive
 Content-Type: text/tab-separated-values; charset=UTF-8
 Transfer-Encoding: chunked
-Keep-Alive: timeout=10
+Keep-Alive: timeout=10, max=10000
 
 HTTP/1.1 200 OK
 Connection: Keep-Alive
 Content-Type: text/tab-separated-values; charset=UTF-8
 Transfer-Encoding: chunked
-Keep-Alive: timeout=10
+Keep-Alive: timeout=10, max=10000
 

From 7aace4d876173ce18ade57ec1bdc332efff7ce80 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 22 Mar 2024 17:24:25 +0000
Subject: [PATCH 002/265] add test

---
 .../test_server_keep_alive/__init__.py        |  0
 .../configs/keep_alive_settings.xml           |  4 ++
 .../test_server_keep_alive/test.py            | 46 +++++++++++++++++++
 3 files changed, 50 insertions(+)
 create mode 100644 tests/integration/test_server_keep_alive/__init__.py
 create mode 100644 tests/integration/test_server_keep_alive/configs/keep_alive_settings.xml
 create mode 100644 tests/integration/test_server_keep_alive/test.py

diff --git a/tests/integration/test_server_keep_alive/__init__.py b/tests/integration/test_server_keep_alive/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_server_keep_alive/configs/keep_alive_settings.xml b/tests/integration/test_server_keep_alive/configs/keep_alive_settings.xml
new file mode 100644
index 00000000000..06e68044817
--- /dev/null
+++ b/tests/integration/test_server_keep_alive/configs/keep_alive_settings.xml
@@ -0,0 +1,4 @@
+<clickhouse>
+  <keep_alive_timeout>3600</keep_alive_timeout>
+  <max_keep_alive_requests>5</max_keep_alive_requests>
+</clickhouse>
diff --git a/tests/integration/test_server_keep_alive/test.py b/tests/integration/test_server_keep_alive/test.py
new file mode 100644
index 00000000000..0f88fe47673
--- /dev/null
+++ b/tests/integration/test_server_keep_alive/test.py
@@ -0,0 +1,46 @@
+import logging
+import pytest
+import requests
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node", main_configs=["configs/keep_alive_settings.xml"])
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_requests_with_keep_alive(start_cluster):
+    # In this test we have `keep_alive_timeout` set to one hour to never trigger connection reset by timeout, `max_keep_alive_requests` is set to 5.
+    # We expect server to close connection after each 5 requests. We detect connection reset by change in src port.
+    # So the first 5 requests should come from the same port, the following 5 requests should come from another port.
+    session = requests.Session()
+    for i in range(10):
+        session.get(
+            f"http://{node.ip_address}:8123/?query=select%201&log_comment=test_requests_with_keep_alive_{i}"
+        )
+
+    ports = node.query(
+        """
+        SYSTEM FLUSH LOGS;
+
+        SELECT port
+          FROM system.query_log
+         WHERE log_comment like 'test_requests_with_keep_alive_%' AND type = 'QueryFinish'
+      ORDER BY log_comment
+        """
+    ).split("\n")[:-1]
+
+    expected = 5 * [ports[0]] + [ports[5]] * 5
+
+    assert ports == expected

From 146d7603388ca161ee3340ab1f582971a4e45a03 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 22 Mar 2024 17:38:06 +0000
Subject: [PATCH 003/265] rm more

---
 programs/keeper/Keeper.cpp              |  2 +-
 src/IO/HTTPCommon.cpp                   |  1 -
 src/Server/HTTP/HTTPServer.cpp          |  3 +--
 src/Server/HTTPHandlerFactory.cpp       | 10 ++++------
 src/Server/HTTPHandlerFactory.h         |  8 ++------
 src/Server/PrometheusRequestHandler.cpp | 13 +++----------
 src/Server/PrometheusRequestHandler.h   |  2 +-
 src/Server/WebUIRequestHandler.cpp      | 14 +++++++-------
 8 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index a558ed64bf9..238964fb25e 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -507,7 +507,7 @@ try
                     "Prometheus: http://" + address.toString(),
                     std::make_unique<HTTPServer>(
                         std::move(my_http_context),
-                        createPrometheusMainHandlerFactory(*this, config_getter(), metrics_writer, "PrometheusHandler-factory"),
+                        createPrometheusMainHandlerFactory(config_getter(), metrics_writer, "PrometheusHandler-factory"),
                         server_pool,
                         socket,
                         http_params));
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index 2b3f7f062bc..56226941228 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -1,4 +1,3 @@
-#include <cstddef>
 #include <IO/HTTPCommon.h>
 
 #include <Server/HTTP/HTTPServerResponse.h>
diff --git a/src/Server/HTTP/HTTPServer.cpp b/src/Server/HTTP/HTTPServer.cpp
index 9b8feae3e26..90bdebf6451 100644
--- a/src/Server/HTTP/HTTPServer.cpp
+++ b/src/Server/HTTP/HTTPServer.cpp
@@ -13,8 +13,7 @@ HTTPServer::HTTPServer(
     Poco::Net::HTTPServerParams::Ptr params,
     const ProfileEvents::Event & read_event,
     const ProfileEvents::Event & write_event)
-    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_, read_event, write_event), thread_pool, socket_, params)
-    , factory(factory_)
+    : TCPServer(new HTTPServerConnectionFactory(context, params, factory_, read_event, write_event), thread_pool, socket_, params), factory(factory_)
 {
 }
 
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 9a67e576345..23d4c081d2d 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -123,7 +123,7 @@ static inline auto createHandlersFactoryFromConfig(
             }
             else if (handler_type == "prometheus")
             {
-                main_handler_factory->addHandler(createPrometheusHandlerFactory(server, config, async_metrics, prefix + "." + key));
+                main_handler_factory->addHandler(createPrometheusHandlerFactory(config, async_metrics, prefix + "." + key));
             }
             else if (handler_type == "replicas_status")
             {
@@ -202,7 +202,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco::
     else if (name == "PrometheusHandler-factory")
     {
         auto metrics_writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
-        return createPrometheusMainHandlerFactory(server, config, metrics_writer, name);
+        return createPrometheusMainHandlerFactory(config, metrics_writer, name);
     }
 
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown HTTP handler factory name.");
@@ -294,10 +294,8 @@ void addDefaultHandlersFactory(
     if (config.has("prometheus") && config.getInt("prometheus.port", 0) == 0)
     {
         auto writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
-        auto creator = [&server, writer] () -> std::unique_ptr<PrometheusRequestHandler>
-        {
-            return std::make_unique<PrometheusRequestHandler>(server, writer);
-        };
+        auto creator
+            = [writer]() -> std::unique_ptr<PrometheusRequestHandler> { return std::make_unique<PrometheusRequestHandler>(writer); };
         auto prometheus_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(std::move(creator));
         prometheus_handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics"));
         prometheus_handler->allowGetAndHeadRequest();
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index ac18c36e6c9..5c1a12d9e06 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -126,14 +126,10 @@ HTTPRequestHandlerFactoryPtr createReplicasStatusHandlerFactory(IServer & server
     const Poco::Util::AbstractConfiguration & config,
     const std::string & config_prefix);
 
-HTTPRequestHandlerFactoryPtr
-createPrometheusHandlerFactory(IServer & server,
-    const Poco::Util::AbstractConfiguration & config,
-    AsynchronousMetrics & async_metrics,
-    const std::string & config_prefix);
+HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory(
+    const Poco::Util::AbstractConfiguration & config, AsynchronousMetrics & async_metrics, const std::string & config_prefix);
 
 HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory(
-    IServer & server,
     const Poco::Util::AbstractConfiguration & config,
     PrometheusMetricsWriterPtr metrics_writer,
     const std::string & name);
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index 0ad5f907467..1a04311116f 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -44,16 +44,12 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
 }
 
 HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory(
-    IServer & server,
     const Poco::Util::AbstractConfiguration & config,
     AsynchronousMetrics & async_metrics,
     const std::string & config_prefix)
 {
     auto writer = std::make_shared<PrometheusMetricsWriter>(config, config_prefix + ".handler", async_metrics);
-    auto creator = [&server, writer]() -> std::unique_ptr<PrometheusRequestHandler>
-    {
-        return std::make_unique<PrometheusRequestHandler>(server, writer);
-    };
+    auto creator = [writer]() -> std::unique_ptr<PrometheusRequestHandler> { return std::make_unique<PrometheusRequestHandler>(writer); };
 
     auto factory = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(std::move(creator));
     factory->addFiltersFromConfig(config, config_prefix);
@@ -61,13 +57,10 @@ HTTPRequestHandlerFactoryPtr createPrometheusHandlerFactory(
 }
 
 HTTPRequestHandlerFactoryPtr createPrometheusMainHandlerFactory(
-    IServer & server, const Poco::Util::AbstractConfiguration & config, PrometheusMetricsWriterPtr metrics_writer, const std::string & name)
+    const Poco::Util::AbstractConfiguration & config, PrometheusMetricsWriterPtr metrics_writer, const std::string & name)
 {
     auto factory = std::make_shared<HTTPRequestHandlerFactoryMain>(name);
-    auto creator = [&server, metrics_writer]
-    {
-        return std::make_unique<PrometheusRequestHandler>(server, metrics_writer);
-    };
+    auto creator = [metrics_writer] { return std::make_unique<PrometheusRequestHandler>(metrics_writer); };
 
     auto handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(std::move(creator));
     handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics"));
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index cc7848d1dd0..7f4d3c14f62 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -15,7 +15,7 @@ private:
     PrometheusMetricsWriterPtr metrics_writer;
 
 public:
-    PrometheusRequestHandler(IServer &, PrometheusMetricsWriterPtr metrics_writer_) : metrics_writer(std::move(metrics_writer_)) { }
+    PrometheusRequestHandler(PrometheusMetricsWriterPtr metrics_writer_) : metrics_writer(std::move(metrics_writer_)) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 };
diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp
index faad9d57519..e43412550f9 100644
--- a/src/Server/WebUIRequestHandler.cpp
+++ b/src/Server/WebUIRequestHandler.cpp
@@ -29,7 +29,7 @@ DashboardWebUIRequestHandler::DashboardWebUIRequestHandler(IServer & server_) :
 BinaryWebUIRequestHandler::BinaryWebUIRequestHandler(IServer & server_) : server(server_) {}
 JavaScriptWebUIRequestHandler::JavaScriptWebUIRequestHandler(IServer & server_) : server(server_) {}
 
-static void handle(const IServer &, HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html)
+static void handle(HTTPServerRequest & request, HTTPServerResponse & response, std::string_view html)
 {
     response.setContentType("text/html; charset=UTF-8");
     if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
@@ -42,7 +42,7 @@ static void handle(const IServer &, HTTPServerRequest & request, HTTPServerRespo
 
 void PlayWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
 {
-    handle(server, request, response, {reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize});
+    handle(request, response, {reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize});
 }
 
 void DashboardWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
@@ -60,23 +60,23 @@ void DashboardWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HT
     static re2::RE2 lz_string_url = R"(https://[^\s"'`]+lz-string[^\s"'`]*\.js)";
     RE2::Replace(&html, lz_string_url, "/js/lz-string.js");
 
-    handle(server, request, response, html);
+    handle(request, response, html);
 }
 
 void BinaryWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
 {
-    handle(server, request, response, {reinterpret_cast<const char *>(gresource_binary_htmlData), gresource_binary_htmlSize});
+    handle(request, response, {reinterpret_cast<const char *>(gresource_binary_htmlData), gresource_binary_htmlSize});
 }
 
 void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event &)
 {
     if (request.getURI() == "/js/uplot.js")
     {
-        handle(server, request, response, {reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize});
+        handle(request, response, {reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize});
     }
     else if (request.getURI() == "/js/lz-string.js")
     {
-        handle(server, request, response, {reinterpret_cast<const char *>(gresource_lz_string_jsData), gresource_lz_string_jsSize});
+        handle(request, response, {reinterpret_cast<const char *>(gresource_lz_string_jsData), gresource_lz_string_jsSize});
     }
     else
     {
@@ -84,7 +84,7 @@ void JavaScriptWebUIRequestHandler::handleRequest(HTTPServerRequest & request, H
         *response.send() << "Not found.\n";
     }
 
-    handle(server, request, response, {reinterpret_cast<const char *>(gresource_binary_htmlData), gresource_binary_htmlSize});
+    handle(request, response, {reinterpret_cast<const char *>(gresource_binary_htmlData), gresource_binary_htmlSize});
 }
 
 }

From bd04fc5346d83e8450fa98578e325923a609abda Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 22 Mar 2024 17:41:53 +0000
Subject: [PATCH 004/265] rename test

---
 tests/integration/test_server_keep_alive/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_server_keep_alive/test.py b/tests/integration/test_server_keep_alive/test.py
index 0f88fe47673..96f08a37adb 100644
--- a/tests/integration/test_server_keep_alive/test.py
+++ b/tests/integration/test_server_keep_alive/test.py
@@ -20,7 +20,7 @@ def start_cluster():
         cluster.shutdown()
 
 
-def test_requests_with_keep_alive(start_cluster):
+def test_max_keep_alive_requests_on_user_side(start_cluster):
     # In this test we have `keep_alive_timeout` set to one hour to never trigger connection reset by timeout, `max_keep_alive_requests` is set to 5.
     # We expect server to close connection after each 5 requests. We detect connection reset by change in src port.
     # So the first 5 requests should come from the same port, the following 5 requests should come from another port.

From c153fae0b8377aeca5e636a7ad0370e6ada42688 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 22 Mar 2024 19:10:06 +0000
Subject: [PATCH 005/265] add docs

---
 .../server-configuration-parameters/settings.md        | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 07c9a2b88ab..0efb5a9e6e4 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1298,6 +1298,16 @@ The number of seconds that ClickHouse waits for incoming requests before closing
 <keep_alive_timeout>10</keep_alive_timeout>
 ```
 
+## max_keep_alive_requests {#max-keep-alive-requests}
+
+Maximal number of requests through a single keep-alive connection until it will be closed by ClickHouse server. Default to 10000.
+
+**Example**
+
+``` xml
+<max_keep_alive_requests>10</max_keep_alive_requests>
+```
+
 ## listen_host {#listen_host}
 
 Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.

From b93f483a0e2f312d50685fd499d2f52717b83925 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 22 Mar 2024 20:07:12 +0000
Subject: [PATCH 006/265] fix build

---
 programs/library-bridge/LibraryBridge.cpp       |  2 +-
 .../LibraryBridgeHandlerFactory.cpp             | 10 ++++------
 .../LibraryBridgeHandlerFactory.h               |  2 --
 .../library-bridge/LibraryBridgeHandlers.cpp    | 17 ++++++-----------
 programs/library-bridge/LibraryBridgeHandlers.h |  6 ++----
 programs/odbc-bridge/ColumnInfoHandler.cpp      |  5 +----
 programs/odbc-bridge/ColumnInfoHandler.h        |  8 +-------
 programs/odbc-bridge/IdentifierQuoteHandler.cpp |  2 +-
 programs/odbc-bridge/IdentifierQuoteHandler.h   |  8 +-------
 programs/odbc-bridge/MainHandler.cpp            |  2 +-
 programs/odbc-bridge/MainHandler.h              |  3 ---
 programs/odbc-bridge/ODBCHandlerFactory.cpp     | 10 +++++-----
 programs/odbc-bridge/SchemaAllowedHandler.cpp   |  2 +-
 programs/odbc-bridge/SchemaAllowedHandler.h     |  8 +-------
 14 files changed, 25 insertions(+), 60 deletions(-)

diff --git a/programs/library-bridge/LibraryBridge.cpp b/programs/library-bridge/LibraryBridge.cpp
index 8a07ca57104..f86e469a307 100644
--- a/programs/library-bridge/LibraryBridge.cpp
+++ b/programs/library-bridge/LibraryBridge.cpp
@@ -25,7 +25,7 @@ std::string LibraryBridge::bridgeName() const
 
 LibraryBridge::HandlerFactoryPtr LibraryBridge::getHandlerFactoryPtr(ContextPtr context) const
 {
-    return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", keep_alive_timeout, context);
+    return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", context);
 }
 
 }
diff --git a/programs/library-bridge/LibraryBridgeHandlerFactory.cpp b/programs/library-bridge/LibraryBridgeHandlerFactory.cpp
index e5ab22f2d40..234904c6265 100644
--- a/programs/library-bridge/LibraryBridgeHandlerFactory.cpp
+++ b/programs/library-bridge/LibraryBridgeHandlerFactory.cpp
@@ -9,12 +9,10 @@ namespace DB
 {
 LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory(
     const std::string & name_,
-    size_t keep_alive_timeout_,
     ContextPtr context_)
     : WithContext(context_)
     , log(getLogger(name_))
     , name(name_)
-    , keep_alive_timeout(keep_alive_timeout_)
 {
 }
 
@@ -26,17 +24,17 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
     if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
     {
         if (uri.getPath() == "/extdict_ping")
-            return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
+            return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(getContext());
         else if (uri.getPath() == "/catboost_ping")
-            return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
+            return std::make_unique<CatBoostLibraryBridgeExistsHandler>(getContext());
     }
 
     if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
     {
         if (uri.getPath() == "/extdict_request")
-            return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
+            return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(getContext());
         else if (uri.getPath() == "/catboost_request")
-            return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
+            return std::make_unique<CatBoostLibraryBridgeRequestHandler>(getContext());
     }
 
     return nullptr;
diff --git a/programs/library-bridge/LibraryBridgeHandlerFactory.h b/programs/library-bridge/LibraryBridgeHandlerFactory.h
index 5b0f088bc29..c65394efa3b 100644
--- a/programs/library-bridge/LibraryBridgeHandlerFactory.h
+++ b/programs/library-bridge/LibraryBridgeHandlerFactory.h
@@ -13,7 +13,6 @@ class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContex
 public:
     LibraryBridgeHandlerFactory(
         const std::string & name_,
-        size_t keep_alive_timeout_,
         ContextPtr context_);
 
     std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
@@ -21,7 +20,6 @@ public:
 private:
     LoggerPtr log;
     const std::string name;
-    const size_t keep_alive_timeout;
 };
 
 }
diff --git a/programs/library-bridge/LibraryBridgeHandlers.cpp b/programs/library-bridge/LibraryBridgeHandlers.cpp
index 094cef6716d..bd8faf76188 100644
--- a/programs/library-bridge/LibraryBridgeHandlers.cpp
+++ b/programs/library-bridge/LibraryBridgeHandlers.cpp
@@ -86,10 +86,8 @@ static void writeData(Block data, OutputFormatPtr format)
 }
 
 
-ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
-    : WithContext(context_)
-    , keep_alive_timeout(keep_alive_timeout_)
-    , log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
+ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_)
+    : WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
 {
 }
 
@@ -136,7 +134,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
     const String & dictionary_id = params.get("dictionary_id");
 
     LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
-    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
 
     try
     {
@@ -410,11 +408,8 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
 }
 
 
-CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
-    size_t keep_alive_timeout_, ContextPtr context_)
-    : WithContext(context_)
-    , keep_alive_timeout(keep_alive_timeout_)
-    , log(getLogger("CatBoostLibraryBridgeRequestHandler"))
+CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(ContextPtr context_)
+    : WithContext(context_), log(getLogger("CatBoostLibraryBridgeRequestHandler"))
 {
 }
 
@@ -453,7 +448,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
     const String & method = params.get("method");
 
     LOG_TRACE(log, "Library method: '{}'", method);
-    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
 
     try
     {
diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h
index 83bca24ce1f..70e3c9c78da 100644
--- a/programs/library-bridge/LibraryBridgeHandlers.h
+++ b/programs/library-bridge/LibraryBridgeHandlers.h
@@ -18,14 +18,13 @@ namespace DB
 class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
+    ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
     static constexpr inline auto FORMAT = "RowBinary";
 
-    const size_t keep_alive_timeout;
     LoggerPtr log;
 };
 
@@ -62,12 +61,11 @@ private:
 class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
+    CatBoostLibraryBridgeRequestHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
-    const size_t keep_alive_timeout;
     LoggerPtr log;
 };
 
diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp
index 4cb15de3b2c..438062e8169 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@@ -200,10 +200,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
         if (columns.empty())
             throw Exception(ErrorCodes::UNKNOWN_TABLE, "Columns definition was not returned");
 
-        WriteBufferFromHTTPServerResponse out(
-            response,
-            request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
-            keep_alive_timeout);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
         try
         {
             writeStringBinary(columns.toString(), out);
diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h
index ca7044fdf32..f16e09ec3f9 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.h
+++ b/programs/odbc-bridge/ColumnInfoHandler.h
@@ -16,18 +16,12 @@ namespace DB
 class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_)
-        : WithContext(context_)
-        , log(getLogger("ODBCColumnsInfoHandler"))
-        , keep_alive_timeout(keep_alive_timeout_)
-    {
-    }
+    ODBCColumnsInfoHandler(ContextPtr context_) : WithContext(context_), log(getLogger("ODBCColumnsInfoHandler")) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
     LoggerPtr log;
-    size_t keep_alive_timeout;
 };
 
 }
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
index cf5acdc4534..0bd1e8758cd 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
@@ -73,7 +73,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
 
         auto identifier = getIdentifierQuote(std::move(connection));
 
-        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
         try
         {
             writeStringBinary(identifier, out);
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h
index 7b78c5b4b93..c0e07795ea5 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.h
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.h
@@ -14,18 +14,12 @@ namespace DB
 class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_)
-        : WithContext(context_)
-        , log(getLogger("IdentifierQuoteHandler"))
-        , keep_alive_timeout(keep_alive_timeout_)
-    {
-    }
+    IdentifierQuoteHandler(ContextPtr context_) : WithContext(context_), log(getLogger("IdentifierQuoteHandler")) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
     LoggerPtr log;
-    size_t keep_alive_timeout;
 };
 
 }
diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp
index e350afa2b10..b086397446e 100644
--- a/programs/odbc-bridge/MainHandler.cpp
+++ b/programs/odbc-bridge/MainHandler.cpp
@@ -131,7 +131,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
         return;
     }
 
-    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+    WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
 
     try
     {
diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h
index ed0c6b2e28c..0fcad61d274 100644
--- a/programs/odbc-bridge/MainHandler.h
+++ b/programs/odbc-bridge/MainHandler.h
@@ -20,12 +20,10 @@ class ODBCHandler : public HTTPRequestHandler, WithContext
 {
 public:
     ODBCHandler(
-        size_t keep_alive_timeout_,
         ContextPtr context_,
         const String & mode_)
         : WithContext(context_)
         , log(getLogger("ODBCHandler"))
-        , keep_alive_timeout(keep_alive_timeout_)
         , mode(mode_)
     {
     }
@@ -35,7 +33,6 @@ public:
 private:
     LoggerPtr log;
 
-    size_t keep_alive_timeout;
     String mode;
 
     static inline std::mutex mutex;
diff --git a/programs/odbc-bridge/ODBCHandlerFactory.cpp b/programs/odbc-bridge/ODBCHandlerFactory.cpp
index eebb0c24c7a..7f095666447 100644
--- a/programs/odbc-bridge/ODBCHandlerFactory.cpp
+++ b/programs/odbc-bridge/ODBCHandlerFactory.cpp
@@ -30,26 +30,26 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
 
         if (uri.getPath() == "/columns_info")
 #if USE_ODBC
-            return std::make_unique<ODBCColumnsInfoHandler>(keep_alive_timeout, getContext());
+            return std::make_unique<ODBCColumnsInfoHandler>(getContext());
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/identifier_quote")
 #if USE_ODBC
-            return std::make_unique<IdentifierQuoteHandler>(keep_alive_timeout, getContext());
+            return std::make_unique<IdentifierQuoteHandler>(getContext());
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/schema_allowed")
 #if USE_ODBC
-            return std::make_unique<SchemaAllowedHandler>(keep_alive_timeout, getContext());
+            return std::make_unique<SchemaAllowedHandler>(getContext());
 #else
             return nullptr;
 #endif
         else if (uri.getPath() == "/write")
-            return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "write");
+            return std::make_unique<ODBCHandler>(getContext(), "write");
         else
-            return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "read");
+            return std::make_unique<ODBCHandler>(getContext(), "read");
     }
     return nullptr;
 }
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp
index c7025ca4311..5dc0cb3aa2b 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.cpp
+++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp
@@ -86,7 +86,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
 
         bool result = isSchemaAllowed(std::move(connection));
 
-        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
+        WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
         try
         {
             writeBoolText(result, out);
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h
index 8dc725dbb33..e73c0a2cb26 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.h
+++ b/programs/odbc-bridge/SchemaAllowedHandler.h
@@ -17,18 +17,12 @@ class Context;
 class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_)
-        : WithContext(context_)
-        , log(getLogger("SchemaAllowedHandler"))
-        , keep_alive_timeout(keep_alive_timeout_)
-    {
-    }
+    SchemaAllowedHandler(ContextPtr context_) : WithContext(context_), log(getLogger("SchemaAllowedHandler")) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
     LoggerPtr log;
-    size_t keep_alive_timeout;
 };
 
 }

From c33511dcb9314da6b64b11cf21d231c9d3896dad Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 22 Mar 2024 20:24:53 +0000
Subject: [PATCH 007/265] remove more

---
 programs/odbc-bridge/ODBCBridge.cpp         | 2 +-
 programs/odbc-bridge/ODBCHandlerFactory.cpp | 9 +++------
 programs/odbc-bridge/ODBCHandlerFactory.h   | 3 +--
 programs/odbc-bridge/PingHandler.h          | 4 ----
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp
index e91cc3158df..2cde5bbf9f5 100644
--- a/programs/odbc-bridge/ODBCBridge.cpp
+++ b/programs/odbc-bridge/ODBCBridge.cpp
@@ -25,7 +25,7 @@ std::string ODBCBridge::bridgeName() const
 
 ODBCBridge::HandlerFactoryPtr ODBCBridge::getHandlerFactoryPtr(ContextPtr context) const
 {
-    return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context);
+    return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", context);
 }
 
 }
diff --git a/programs/odbc-bridge/ODBCHandlerFactory.cpp b/programs/odbc-bridge/ODBCHandlerFactory.cpp
index 7f095666447..b5d0be908f4 100644
--- a/programs/odbc-bridge/ODBCHandlerFactory.cpp
+++ b/programs/odbc-bridge/ODBCHandlerFactory.cpp
@@ -9,11 +9,8 @@
 namespace DB
 {
 
-ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
-    : WithContext(context_)
-    , log(getLogger(name_))
-    , name(name_)
-    , keep_alive_timeout(keep_alive_timeout_)
+ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_)
+    : WithContext(context_), log(getLogger(name_)), name(name_)
 {
 }
 
@@ -23,7 +20,7 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
     LOG_TRACE(log, "Request URI: {}", uri.toString());
 
     if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
-        return std::make_unique<PingHandler>(keep_alive_timeout);
+        return std::make_unique<PingHandler>();
 
     if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
     {
diff --git a/programs/odbc-bridge/ODBCHandlerFactory.h b/programs/odbc-bridge/ODBCHandlerFactory.h
index 4aaf1b55453..f4a2717dc9f 100644
--- a/programs/odbc-bridge/ODBCHandlerFactory.h
+++ b/programs/odbc-bridge/ODBCHandlerFactory.h
@@ -17,14 +17,13 @@ namespace DB
 class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
 {
 public:
-    ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_);
+    ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_);
 
     std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
 
 private:
     LoggerPtr log;
     std::string name;
-    size_t keep_alive_timeout;
 };
 
 }
diff --git a/programs/odbc-bridge/PingHandler.h b/programs/odbc-bridge/PingHandler.h
index c5447107e0c..4c557bd3cf6 100644
--- a/programs/odbc-bridge/PingHandler.h
+++ b/programs/odbc-bridge/PingHandler.h
@@ -9,11 +9,7 @@ namespace DB
 class PingHandler : public HTTPRequestHandler
 {
 public:
-    explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
-
-private:
-    size_t keep_alive_timeout;
 };
 
 }

From 1115fa4bc74d840e8fc3230908310cb7311dd0d0 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 25 Mar 2024 15:13:13 +0000
Subject: [PATCH 008/265] fix tidy

---
 src/Server/PrometheusRequestHandler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index 7f4d3c14f62..a1bd18b394a 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -15,7 +15,7 @@ private:
     PrometheusMetricsWriterPtr metrics_writer;
 
 public:
-    PrometheusRequestHandler(PrometheusMetricsWriterPtr metrics_writer_) : metrics_writer(std::move(metrics_writer_)) { }
+    explicit PrometheusRequestHandler(PrometheusMetricsWriterPtr metrics_writer_) : metrics_writer(std::move(metrics_writer_)) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 };

From 1cfbc548bb415e89e294a22da0eea59302269c37 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nickita.taranov@gmail.com>
Date: Mon, 25 Mar 2024 17:28:51 +0100
Subject: [PATCH 009/265] Fix copy-paste

Co-authored-by: Michael Lex <mich.lex@gmail.com>
---
 src/Core/ServerSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 7480d94e81d..4a22082cdda 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -113,7 +113,7 @@ namespace DB
     M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
     M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
     M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
-    M(UInt64, max_keep_alive_requests, 10000, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
+    M(UInt64, max_keep_alive_requests, 10000, "The maximum number of requests handled via a single http keepalive connection before the server closes this connection.", 0) \
     M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
     M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
     M(Seconds, replicated_fetches_http_receive_timeout, 0, "HTTP receive timeout for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly.", 0) \

From 3c2915934f31d82176b65e1e998eca030671d872 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 26 Mar 2024 04:29:34 +0000
Subject: [PATCH 010/265] update fuzzers

---
 ..._function_state_deserialization_fuzzer.cpp | 23 +++++++++++++++++++
 src/Client/ClientBase.cpp                     |  2 +-
 src/Core/fuzzers/names_and_types_fuzzer.cpp   | 22 ++++++++++++++++++
 .../data_type_deserialization_fuzzer.cpp      | 22 ++++++++++++++++++
 src/Formats/fuzzers/format_fuzzer.cpp         | 20 ++++++++++++++++
 .../fuzzers/codegen_fuzzer/CMakeLists.txt     |  2 +-
 .../codegen_fuzzer/codegen_select_fuzzer.cpp  |  2 +-
 src/Parsers/fuzzers/create_parser_fuzzer.cpp  |  2 +-
 .../fuzzers/columns_description_fuzzer.cpp    | 22 ++++++++++++++++++
 9 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
index 425364efb9c..9d490432c60 100644
--- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
+++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
@@ -12,10 +12,33 @@
 
 #include <Interpreters/Context.h>
 
+#include <Functions/CastOverloadResolver.h>
+
 #include <AggregateFunctions/registerAggregateFunctions.h>
 
 #include <base/scope_guard.h>
 
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+class IFunctionBase;
+using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
+
+FunctionBasePtr createFunctionBaseCast(
+    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+}
+
+}
+
+
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 767a9b2b9f9..bdee2233b27 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -2729,7 +2729,7 @@ void ClientBase::runLibFuzzer()
     for (auto & arg : fuzzer_args_holder)
         fuzzer_args.emplace_back(arg.data());
 
-    int fuzzer_argc = fuzzer_args.size();
+    int fuzzer_argc = static_cast<int>(fuzzer_args.size());
     char ** fuzzer_argv = fuzzer_args.data();
 
     LLVMFuzzerRunDriver(&fuzzer_argc, &fuzzer_argv, [](const uint8_t * data, size_t size)
diff --git a/src/Core/fuzzers/names_and_types_fuzzer.cpp b/src/Core/fuzzers/names_and_types_fuzzer.cpp
index 6fdd8703014..bc8cb7af61f 100644
--- a/src/Core/fuzzers/names_and_types_fuzzer.cpp
+++ b/src/Core/fuzzers/names_and_types_fuzzer.cpp
@@ -1,7 +1,29 @@
+#include <Core/ColumnsWithTypeAndName.h>
+#include <Functions/CastOverloadResolver.h>
 #include <Core/NamesAndTypes.h>
 #include <IO/ReadBufferFromMemory.h>
 
 
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+class IFunctionBase;
+using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
+
+FunctionBasePtr createFunctionBaseCast(
+    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+}
+
+}
+
+
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
index 0ae325871fb..f1b03147929 100644
--- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
+++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
@@ -8,11 +8,33 @@
 #include <Common/MemoryTracker.h>
 #include <Common/CurrentThread.h>
 
+#include <Functions/CastOverloadResolver.h>
+
 #include <Interpreters/Context.h>
 
 #include <AggregateFunctions/registerAggregateFunctions.h>
 
 
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+class IFunctionBase;
+using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
+
+FunctionBasePtr createFunctionBaseCast(
+    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+}
+
+}
+
+
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/Formats/fuzzers/format_fuzzer.cpp b/src/Formats/fuzzers/format_fuzzer.cpp
index 46661e4828c..4426301b6e7 100644
--- a/src/Formats/fuzzers/format_fuzzer.cpp
+++ b/src/Formats/fuzzers/format_fuzzer.cpp
@@ -21,6 +21,26 @@
 #include <AggregateFunctions/registerAggregateFunctions.h>
 
 
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+class IFunctionBase;
+using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
+
+FunctionBasePtr createFunctionBaseCast(
+    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+}
+
+}
+
+
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
index 20fd951d390..74fdcff79f7 100644
--- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
+++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
@@ -39,7 +39,7 @@ set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
 
 clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
 
-set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")
+set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier -Wno-extra-semi-stmt -Wno-used-but-marked-unused")
 
 # contrib/libprotobuf-mutator/src/libfuzzer/libfuzzer_macro.h:143:44: error: no newline at end of file [-Werror,-Wnewline-eof]
 target_compile_options (codegen_select_fuzzer PRIVATE -Wno-newline-eof)
diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
index 9310d7d59f7..55daa370651 100644
--- a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
+++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
@@ -27,7 +27,7 @@ DEFINE_BINARY_PROTO_FUZZER(const Sentence& main)
         DB::ParserQueryWithOutput parser(input.data() + input.size());
         try
         {
-            DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0);
+            DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0);
 
             DB::WriteBufferFromOStream out(std::cerr, 4096);
             DB::formatAST(*ast, out);
diff --git a/src/Parsers/fuzzers/create_parser_fuzzer.cpp b/src/Parsers/fuzzers/create_parser_fuzzer.cpp
index 854885ad33b..1d5c3e27232 100644
--- a/src/Parsers/fuzzers/create_parser_fuzzer.cpp
+++ b/src/Parsers/fuzzers/create_parser_fuzzer.cpp
@@ -14,7 +14,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
         std::string input = std::string(reinterpret_cast<const char*>(data), size);
 
         DB::ParserCreateQuery parser;
-        DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000);
+        DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000, 0);
 
         const UInt64 max_ast_depth = 1000;
         ast->checkDepth(max_ast_depth);
diff --git a/src/Storages/fuzzers/columns_description_fuzzer.cpp b/src/Storages/fuzzers/columns_description_fuzzer.cpp
index b703a1e7051..cb0c6168225 100644
--- a/src/Storages/fuzzers/columns_description_fuzzer.cpp
+++ b/src/Storages/fuzzers/columns_description_fuzzer.cpp
@@ -1,4 +1,26 @@
+#include <Functions/CastOverloadResolver.h>
 #include <Storages/ColumnsDescription.h>
+#include <iostream>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+class IFunctionBase;
+using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
+
+FunctionBasePtr createFunctionBaseCast(
+    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+}
+
+}
 
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)

From cb40fd7d0c9096a8df8d5e7c2e9924f66d51e061 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 26 Mar 2024 15:27:01 +0000
Subject: [PATCH 011/265] minor fixes

---
 .../fuzzers/aggregate_function_state_deserialization_fuzzer.cpp | 2 +-
 src/Core/fuzzers/names_and_types_fuzzer.cpp                     | 2 +-
 src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp      | 2 +-
 src/Formats/fuzzers/format_fuzzer.cpp                           | 2 +-
 src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp    | 2 +-
 src/Parsers/fuzzers/create_parser_fuzzer.cpp                    | 2 +-
 src/Storages/fuzzers/columns_description_fuzzer.cpp             | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
index 9d490432c60..a956d9906bc 100644
--- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
+++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
@@ -33,7 +33,7 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
 FunctionBasePtr createFunctionBaseCast(
     ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for aggregate_function_state_deserialization_fuzzer");
 }
 
 }
diff --git a/src/Core/fuzzers/names_and_types_fuzzer.cpp b/src/Core/fuzzers/names_and_types_fuzzer.cpp
index bc8cb7af61f..74debedf2a3 100644
--- a/src/Core/fuzzers/names_and_types_fuzzer.cpp
+++ b/src/Core/fuzzers/names_and_types_fuzzer.cpp
@@ -18,7 +18,7 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
 FunctionBasePtr createFunctionBaseCast(
     ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for names_and_types_fuzzer");
 }
 
 }
diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
index f1b03147929..7d9a0513d18 100644
--- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
+++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
@@ -29,7 +29,7 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
 FunctionBasePtr createFunctionBaseCast(
     ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for data_type_deserialization_fuzzer");
 }
 
 }
diff --git a/src/Formats/fuzzers/format_fuzzer.cpp b/src/Formats/fuzzers/format_fuzzer.cpp
index 4426301b6e7..2c1ec65e54d 100644
--- a/src/Formats/fuzzers/format_fuzzer.cpp
+++ b/src/Formats/fuzzers/format_fuzzer.cpp
@@ -35,7 +35,7 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
 FunctionBasePtr createFunctionBaseCast(
     ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for format_fuzzer");
 }
 
 }
diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
index 55daa370651..6b25b581532 100644
--- a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
+++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp
@@ -27,7 +27,7 @@ DEFINE_BINARY_PROTO_FUZZER(const Sentence& main)
         DB::ParserQueryWithOutput parser(input.data() + input.size());
         try
         {
-            DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0);
+            DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, DB::DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
 
             DB::WriteBufferFromOStream out(std::cerr, 4096);
             DB::formatAST(*ast, out);
diff --git a/src/Parsers/fuzzers/create_parser_fuzzer.cpp b/src/Parsers/fuzzers/create_parser_fuzzer.cpp
index 1d5c3e27232..bab8db5671d 100644
--- a/src/Parsers/fuzzers/create_parser_fuzzer.cpp
+++ b/src/Parsers/fuzzers/create_parser_fuzzer.cpp
@@ -14,7 +14,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
         std::string input = std::string(reinterpret_cast<const char*>(data), size);
 
         DB::ParserCreateQuery parser;
-        DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000, 0);
+        DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000, DB::DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
 
         const UInt64 max_ast_depth = 1000;
         ast->checkDepth(max_ast_depth);
diff --git a/src/Storages/fuzzers/columns_description_fuzzer.cpp b/src/Storages/fuzzers/columns_description_fuzzer.cpp
index cb0c6168225..ac285ea50f7 100644
--- a/src/Storages/fuzzers/columns_description_fuzzer.cpp
+++ b/src/Storages/fuzzers/columns_description_fuzzer.cpp
@@ -17,7 +17,7 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
 FunctionBasePtr createFunctionBaseCast(
     ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge");
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for columns_description_fuzzer");
 }
 
 }

From 64e6c6a2fcf2f7017ec3749ad05eed2daeeb4b42 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 26 Mar 2024 22:31:40 +0000
Subject: [PATCH 012/265] fix tidy

---
 programs/library-bridge/LibraryBridgeHandlers.h | 8 ++++----
 programs/odbc-bridge/ColumnInfoHandler.h        | 2 +-
 programs/odbc-bridge/IdentifierQuoteHandler.h   | 2 +-
 programs/odbc-bridge/SchemaAllowedHandler.h     | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h
index 70e3c9c78da..582619e174e 100644
--- a/programs/library-bridge/LibraryBridgeHandlers.h
+++ b/programs/library-bridge/LibraryBridgeHandlers.h
@@ -18,7 +18,7 @@ namespace DB
 class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_);
+    explicit ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
@@ -33,7 +33,7 @@ private:
 class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_);
+    explicit ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
@@ -61,7 +61,7 @@ private:
 class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    CatBoostLibraryBridgeRequestHandler(ContextPtr context_);
+    explicit CatBoostLibraryBridgeRequestHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
@@ -74,7 +74,7 @@ private:
 class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    CatBoostLibraryBridgeExistsHandler(ContextPtr context_);
+    explicit CatBoostLibraryBridgeExistsHandler(ContextPtr context_);
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h
index f16e09ec3f9..bbbf0da218b 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.h
+++ b/programs/odbc-bridge/ColumnInfoHandler.h
@@ -16,7 +16,7 @@ namespace DB
 class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    ODBCColumnsInfoHandler(ContextPtr context_) : WithContext(context_), log(getLogger("ODBCColumnsInfoHandler")) { }
+    explicit ODBCColumnsInfoHandler(ContextPtr context_) : WithContext(context_), log(getLogger("ODBCColumnsInfoHandler")) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h
index c0e07795ea5..a85b56a9f6a 100644
--- a/programs/odbc-bridge/IdentifierQuoteHandler.h
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.h
@@ -14,7 +14,7 @@ namespace DB
 class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    IdentifierQuoteHandler(ContextPtr context_) : WithContext(context_), log(getLogger("IdentifierQuoteHandler")) { }
+    explicit IdentifierQuoteHandler(ContextPtr context_) : WithContext(context_), log(getLogger("IdentifierQuoteHandler")) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h
index e73c0a2cb26..59022151b53 100644
--- a/programs/odbc-bridge/SchemaAllowedHandler.h
+++ b/programs/odbc-bridge/SchemaAllowedHandler.h
@@ -17,7 +17,7 @@ class Context;
 class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
 {
 public:
-    SchemaAllowedHandler(ContextPtr context_) : WithContext(context_), log(getLogger("SchemaAllowedHandler")) { }
+    explicit SchemaAllowedHandler(ContextPtr context_) : WithContext(context_), log(getLogger("SchemaAllowedHandler")) { }
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 

From 8357bc7b1b2d48e808b63cc0aa6fb7c7aa36e98b Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 31 Mar 2024 23:33:35 +0000
Subject: [PATCH 013/265] fix build

---
 base/base/CMakeLists.txt | 2 +-
 cmake/sanitize.cmake     | 2 +-
 programs/CMakeLists.txt  | 2 +-
 src/CMakeLists.txt       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt
index 27aa0bd6baf..7b1da9ab4ad 100644
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
+add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
 
 if (USE_CLANG_TIDY)
     set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 9d53b2004b4..227d96357b5 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -64,7 +64,7 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
 
 if (WITH_COVERAGE)
     message (STATUS "Enabled instrumentation for code coverage")
-    set(COVERAGE_FLAGS "-fprofile-instr-generate -fcoverage-mapping")
+    set(COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
 endif()
 
 option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 0d91de2dad8..aa7781498c8 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
+add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
 
 if (USE_CLANG_TIDY)
     set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 73aa409e995..bd603c9f15e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
+add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
 
 if (USE_INCLUDE_WHAT_YOU_USE)
     set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH})

From 99e25d762c2db3c544dd5590726fc039b1828d16 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 1 Apr 2024 12:28:51 +0000
Subject: [PATCH 014/265] remove WITH_COVERAGE for fuzzers build

---
 docker/packager/packager | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docker/packager/packager b/docker/packager/packager
index 23fc26bc1a4..355149df38c 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -276,7 +276,6 @@ def parse_env_variables(
     elif package_type == "fuzzers":
         cmake_flags.append("-DENABLE_FUZZING=1")
         cmake_flags.append("-DENABLE_PROTOBUF=1")
-        cmake_flags.append("-DWITH_COVERAGE=1")
         # Reduce linking and building time by avoid *install/all dependencies
         cmake_flags.append("-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON")
 

From 8d667ad5a34d1ba3d9008a5a6308598483281b35 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 1 Apr 2024 22:55:51 +0000
Subject: [PATCH 015/265] fix build.sh

---
 docker/packager/binary-builder/build.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/packager/binary-builder/build.sh b/docker/packager/binary-builder/build.sh
index 032aceb0af3..cbd14b1eac2 100755
--- a/docker/packager/binary-builder/build.sh
+++ b/docker/packager/binary-builder/build.sh
@@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then
   bash -x /build/packages/build
 fi
 
-mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
+mv ./programs/clickhouse* /output ||:
+mv ./programs/*_fuzzer /output ||:
 [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
 [ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
 mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds

From db3d923d4cae57254cadcef7f6997f3912d46515 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 3 Apr 2024 20:25:29 +0000
Subject: [PATCH 016/265] return WITH_COVERAGE, fix build

---
 cmake/sanitize.cmake     | 3 ++-
 docker/packager/packager | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 227d96357b5..9f4fa7081c6 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -64,7 +64,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
 
 if (WITH_COVERAGE)
     message (STATUS "Enabled instrumentation for code coverage")
-    set(COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
+    set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
 endif()
 
 option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
diff --git a/docker/packager/packager b/docker/packager/packager
index 355149df38c..23fc26bc1a4 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -276,6 +276,7 @@ def parse_env_variables(
     elif package_type == "fuzzers":
         cmake_flags.append("-DENABLE_FUZZING=1")
         cmake_flags.append("-DENABLE_PROTOBUF=1")
+        cmake_flags.append("-DWITH_COVERAGE=1")
         # Reduce linking and building time by avoid *install/all dependencies
         cmake_flags.append("-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON")
 

From 8b2bd3cfd7654fee98df6f024bcf7e4b6b4f2b49 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 26 Mar 2024 20:48:49 +0000
Subject: [PATCH 017/265] impl

---
 src/Common/AsynchronousMetrics.cpp | 187 +++++++++++++++++++++--------
 src/Common/AsynchronousMetrics.h   |   6 +
 2 files changed, 142 insertions(+), 51 deletions(-)

diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index ab54b180fbf..cf9e8d21bd8 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -1,18 +1,19 @@
-#include <Common/formatReadable.h>
-#include <Common/AsynchronousMetrics.h>
-#include <Common/Exception.h>
-#include <Common/setThreadName.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/filesystemHelpers.h>
-#include <Common/logger_useful.h>
-#include <IO/UncompressedCache.h>
+#include <chrono>
 #include <IO/MMappedFileCache.h>
 #include <IO/ReadHelpers.h>
+#include <IO/UncompressedCache.h>
 #include <base/errnoToString.h>
 #include <base/find_symbols.h>
 #include <base/getPageSize.h>
+#include <boost/locale/date_time_facet.hpp>
 #include <sys/resource.h>
-#include <chrono>
+#include <Common/AsynchronousMetrics.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/Exception.h>
+#include <Common/filesystemHelpers.h>
+#include <Common/formatReadable.h>
+#include <Common/logger_useful.h>
+#include <Common/setThreadName.h>
 
 #include "config.h"
 
@@ -78,6 +79,7 @@ AsynchronousMetrics::AsynchronousMetrics(
         openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes);
     }
     openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max);
+    openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat);
 
     /// CGroups v1
     if (!cgroupmem_limit_in_bytes)
@@ -90,6 +92,8 @@ AsynchronousMetrics::AsynchronousMetrics(
         openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_period_us", cgroupcpu_cfs_period);
         openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota);
     }
+    if (!cgroupcpu_stat)
+        openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat);
 
     openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
     openFileIfExists("/proc/self/maps", vm_maps);
@@ -561,6 +565,82 @@ AsynchronousMetrics::NetworkInterfaceStatValues::operator-(const AsynchronousMet
 #endif
 
 
+void AsynchronousMetrics::applyCPUMetricsUpdate(
+    AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier)
+{
+    new_values["OSUserTime" + cpu_suffix]
+        = {delta_values.user * multiplier,
+           "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the "
+           "host machine, not just clickhouse-server."
+           " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline "
+           "stalls, branch mispredictions, running another SMT core)."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSNiceTime" + cpu_suffix]
+        = {delta_values.nice * multiplier,
+           "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all "
+           "the processes on the host machine, not just clickhouse-server."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSSystemTime" + cpu_suffix]
+        = {delta_values.system * multiplier,
+           "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the "
+           "processes on the host machine, not just clickhouse-server."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSIdleTime" + cpu_suffix]
+        = {delta_values.idle * multiplier,
+           "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This "
+           "is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+           " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline "
+           "stalls, branch mispredictions, running another SMT core)."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSIOWaitTime" + cpu_suffix]
+        = {delta_values.iowait * multiplier,
+           "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as "
+           "the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just "
+           "clickhouse-server."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSIrqTime" + cpu_suffix]
+        = {delta_values.irq * multiplier,
+           "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the "
+           "processes on the host machine, not just clickhouse-server."
+           " A high number of this metric may indicate hardware misconfiguration or a very high network load."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSSoftIrqTime" + cpu_suffix]
+        = {delta_values.softirq * multiplier,
+           "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the "
+           "processes on the host machine, not just clickhouse-server."
+           " A high number of this metric may indicate inefficient software running on the system."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSStealTime" + cpu_suffix]
+        = {delta_values.steal * multiplier,
+           "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide "
+           "metric, it includes all the processes on the host machine, not just clickhouse-server."
+           " Not every virtualized environments present this metric, and most of them don't."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSGuestTime" + cpu_suffix]
+        = {delta_values.guest * multiplier,
+           "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man "
+           "procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+           " This metric is irrelevant for ClickHouse, but still exists for completeness."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSGuestNiceTime" + cpu_suffix]
+        = {delta_values.guest_nice * multiplier,
+           "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest "
+           "was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host "
+           "machine, not just clickhouse-server."
+           " This metric is irrelevant for ClickHouse, but still exists for completeness."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+}
+
 void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 {
     Stopwatch watch;
@@ -821,16 +901,57 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
         new_values["CGroupMaxCPU"] = { max_cpu_cgroups, "The maximum number of CPU cores according to CGroups."};
     }
 
-    if (proc_stat)
+    int64_t hz = sysconf(_SC_CLK_TCK);
+    if (-1 == hz)
+        throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
+
+    if (cgroupcpu_stat || cgroupcpuacct_stat)
+    {
+        ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat;
+        ProcStatValuesCPU current_values{};
+
+        /// We re-read the file from the beginning each time
+        in.rewind();
+
+        while (!in.eof())
+        {
+            String name;
+            readStringUntilWhitespace(name, in);
+            skipWhitespaceIfAny(in);
+
+            /// `user_usec` for cgroup v2 and `user` for cgroup v1
+            if (name.starts_with("user"))
+            {
+                readText(current_values.user, in);
+                skipToNextLineOrEOF(in);
+            }
+            /// `system_usec` for cgroup v2 and `system` for cgroup v1
+            else if (name.starts_with("system"))
+            {
+                readText(current_values.system, in);
+                skipToNextLineOrEOF(in);
+            }
+            else
+                skipToNextLineOrEOF(in);
+        }
+
+        if (!first_run)
+        {
+            const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus;
+            const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz;
+            const double multiplier = 1.0 / cgroup_specific_divisor
+                / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
+            applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier);
+        }
+
+        proc_stat_values_all_cpus = current_values;
+    }
+    else if (proc_stat)
     {
         try
         {
             proc_stat->rewind();
 
-            int64_t hz = sysconf(_SC_CLK_TCK);
-            if (-1 == hz)
-                throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
-
             double multiplier = 1.0 / hz / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
             size_t num_cpus = 0;
 
@@ -876,43 +997,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
                         else
                             delta_values_all_cpus = delta_values;
 
-                        new_values["OSUserTime" + cpu_suffix] = { delta_values.user * multiplier,
-                            "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSNiceTime" + cpu_suffix] = { delta_values.nice * multiplier,
-                            "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSSystemTime" + cpu_suffix] = { delta_values.system * multiplier,
-                            "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSIdleTime" + cpu_suffix] = { delta_values.idle * multiplier,
-                            "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSIOWaitTime" + cpu_suffix] = { delta_values.iowait * multiplier,
-                            "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSIrqTime" + cpu_suffix] = { delta_values.irq * multiplier,
-                            "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " A high number of this metric may indicate hardware misconfiguration or a very high network load."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSSoftIrqTime" + cpu_suffix] = { delta_values.softirq * multiplier,
-                            "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " A high number of this metric may indicate inefficient software running on the system."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSStealTime" + cpu_suffix] = { delta_values.steal * multiplier,
-                            "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " Not every virtualized environments present this metric, and most of them don't."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSGuestTime" + cpu_suffix] = { delta_values.guest * multiplier,
-                            "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This metric is irrelevant for ClickHouse, but still exists for completeness."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSGuestNiceTime" + cpu_suffix] = { delta_values.guest_nice * multiplier,
-                            "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This metric is irrelevant for ClickHouse, but still exists for completeness."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        applyCPUMetricsUpdate(new_values, cpu_suffix, delta_values, multiplier);
                     }
 
                     prev_values = current_values;
diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h
index 4b3d28e80c5..caebcd4cdef 100644
--- a/src/Common/AsynchronousMetrics.h
+++ b/src/Common/AsynchronousMetrics.h
@@ -122,6 +122,8 @@ private:
     std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_period TSA_GUARDED_BY(data_mutex);
     std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex);
     std::optional<ReadBufferFromFilePRead> cgroupcpu_max TSA_GUARDED_BY(data_mutex);
+    std::optional<ReadBufferFromFilePRead> cgroupcpu_stat TSA_GUARDED_BY(data_mutex);
+    std::optional<ReadBufferFromFilePRead> cgroupcpuacct_stat TSA_GUARDED_BY(data_mutex);
 
     std::optional<ReadBufferFromFilePRead> vm_max_map_count TSA_GUARDED_BY(data_mutex);
     std::optional<ReadBufferFromFilePRead> vm_maps TSA_GUARDED_BY(data_mutex);
@@ -217,6 +219,10 @@ private:
     void openBlockDevices();
     void openSensorsChips();
     void openEDAC();
+
+    void applyCPUMetricsUpdate(
+        AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier);
+
 #endif
 
     void run();

From 85e8a5678783521442a6e61bcd00ba6167302b6a Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 27 Mar 2024 17:02:52 +0000
Subject: [PATCH 018/265] normalized metrics

---
 src/Common/AsynchronousMetrics.cpp | 170 ++++++++++++++++++-----------
 src/Common/AsynchronousMetrics.h   |   6 +
 2 files changed, 115 insertions(+), 61 deletions(-)

diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index cf9e8d21bd8..59595e701c1 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -641,6 +641,73 @@ void AsynchronousMetrics::applyCPUMetricsUpdate(
            "them [0..num cores]."};
 }
 
+void AsynchronousMetrics::applyNormalizedCPUMetricsUpdate(
+    AsynchronousMetricValues & new_values, double num_cpus_to_normalize, const ProcStatValuesCPU & delta_values_all_cpus, double multiplier)
+{
+    chassert(num_cpus_to_normalize);
+
+    new_values["OSUserTimeNormalized"]
+        = {delta_values_all_cpus.user * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSNiceTimeNormalized"]
+        = {delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSSystemTimeNormalized"]
+        = {delta_values_all_cpus.system * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSIdleTimeNormalized"]
+        = {delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSIOWaitTimeNormalized"]
+        = {delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSIrqTimeNormalized"]
+        = {delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of "
+           "the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSSoftIrqTimeNormalized"]
+        = {delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval "
+           "regardless of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSStealTimeNormalized"]
+        = {delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSGuestTimeNormalized"]
+        = {delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSGuestNiceTimeNormalized"]
+        = {delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval "
+           "regardless of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+}
+
 void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 {
     Stopwatch watch;
@@ -907,44 +974,56 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 
     if (cgroupcpu_stat || cgroupcpuacct_stat)
     {
-        ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat;
-        ProcStatValuesCPU current_values{};
-
-        /// We re-read the file from the beginning each time
-        in.rewind();
-
-        while (!in.eof())
+        try
         {
-            String name;
-            readStringUntilWhitespace(name, in);
-            skipWhitespaceIfAny(in);
+            ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat;
+            ProcStatValuesCPU current_values{};
 
-            /// `user_usec` for cgroup v2 and `user` for cgroup v1
-            if (name.starts_with("user"))
+            /// We re-read the file from the beginning each time
+            in.rewind();
+
+            while (!in.eof())
             {
-                readText(current_values.user, in);
-                skipToNextLineOrEOF(in);
+                String name;
+                readStringUntilWhitespace(name, in);
+                skipWhitespaceIfAny(in);
+
+                /// `user_usec` for cgroup v2 and `user` for cgroup v1
+                if (name.starts_with("user"))
+                {
+                    readText(current_values.user, in);
+                    skipToNextLineOrEOF(in);
+                }
+                /// `system_usec` for cgroup v2 and `system` for cgroup v1
+                else if (name.starts_with("system"))
+                {
+                    readText(current_values.system, in);
+                    skipToNextLineOrEOF(in);
+                }
+                else
+                    skipToNextLineOrEOF(in);
             }
-            /// `system_usec` for cgroup v2 and `system` for cgroup v1
-            else if (name.starts_with("system"))
+
+            if (!first_run)
             {
-                readText(current_values.system, in);
-                skipToNextLineOrEOF(in);
+                const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus;
+                const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz;
+                const double multiplier = 1.0 / cgroup_specific_divisor
+                    / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
+                applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier);
+                if (max_cpu_cgroups > 0)
+                    applyNormalizedCPUMetricsUpdate(new_values, max_cpu_cgroups, delta_values, multiplier);
             }
-            else
-                skipToNextLineOrEOF(in);
+
+            proc_stat_values_all_cpus = current_values;
         }
-
-        if (!first_run)
+        catch (...)
         {
-            const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus;
-            const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz;
-            const double multiplier = 1.0 / cgroup_specific_divisor
-                / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
-            applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier);
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+            openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat);
+            if (!cgroupcpu_stat)
+                openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat);
         }
-
-        proc_stat_values_all_cpus = current_values;
     }
     else if (proc_stat)
     {
@@ -1053,38 +1132,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
                 Float64 num_cpus_to_normalize = max_cpu_cgroups > 0 ? max_cpu_cgroups : num_cpus;
 
                 if (num_cpus_to_normalize > 0)
-                {
-                    new_values["OSUserTimeNormalized"] = { delta_values_all_cpus.user * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSNiceTimeNormalized"] = { delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSSystemTimeNormalized"] = { delta_values_all_cpus.system * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSIdleTimeNormalized"] = { delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSIOWaitTimeNormalized"] = { delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSIrqTimeNormalized"] = { delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSSoftIrqTimeNormalized"] = { delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSStealTimeNormalized"] = { delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSGuestTimeNormalized"] = { delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSGuestNiceTimeNormalized"] = { delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                }
+                    applyNormalizedCPUMetricsUpdate(new_values, num_cpus_to_normalize, delta_values_all_cpus, multiplier);
             }
 
             proc_stat_values_other = current_other_values;
diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h
index caebcd4cdef..2b58fd78044 100644
--- a/src/Common/AsynchronousMetrics.h
+++ b/src/Common/AsynchronousMetrics.h
@@ -223,6 +223,12 @@ private:
     void applyCPUMetricsUpdate(
         AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier);
 
+    void applyNormalizedCPUMetricsUpdate(
+        AsynchronousMetricValues & new_values,
+        double num_cpus_to_normalize,
+        const ProcStatValuesCPU & delta_values_all_cpus,
+        double multiplier);
+
 #endif
 
     void run();

From 4aaae7fd4d3340131515be83764e56b5f5c17c13 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 27 Mar 2024 19:49:00 +0000
Subject: [PATCH 019/265] add test

---
 .../test_async_metrics_in_cgroup/test.py      | 77 +++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 tests/integration/test_async_metrics_in_cgroup/test.py

diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py
new file mode 100644
index 00000000000..1bba42cb980
--- /dev/null
+++ b/tests/integration/test_async_metrics_in_cgroup/test.py
@@ -0,0 +1,77 @@
+import pytest
+import subprocess
+import time
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node")
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_check_client_logs_level(start_cluster):
+    # check that our metrics sources actually exist
+    assert (
+        subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0
+        or subprocess.Popen(
+            "test -f /sys/fs/cgroup/cpuacct/cpuacct.stat".split(" ")
+        ).wait()
+        == 0
+    )
+
+    # first let's spawn some cpu-intensive process outside of the container and check that it doesn't accounted by ClickHouse server
+    proc = subprocess.Popen(
+        "openssl speed -multi 8".split(" "),
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+
+    time.sleep(5)
+
+    metric = node.query(
+        """
+      SYSTEM FLUSH LOGS;
+
+      SELECT max(value)
+        FROM (
+          SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
+              FROM system.asynchronous_metric_log
+          WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
+          GROUP BY t
+        )
+    """
+    ).strip("\n")
+
+    assert float(metric) <= 2
+
+    proc.kill()
+
+    # then let's test that we will account cpu time spent by the server itself
+    node.query(
+        "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=5, max_threads=8",
+        ignore_error=True,
+    )
+
+    metric = node.query(
+        """
+      SYSTEM FLUSH LOGS;
+
+      SELECT max(value)
+        FROM (
+          SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
+              FROM system.asynchronous_metric_log
+          WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
+          GROUP BY t
+        )
+    """
+    ).strip("\n")
+
+    assert 4 <= float(metric) <= 12

From 75011d6f21e4948bf86fd52e2330fe2f2d8fa922 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 27 Mar 2024 20:22:15 +0000
Subject: [PATCH 020/265] fix style

---
 tests/integration/test_async_metrics_in_cgroup/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/integration/test_async_metrics_in_cgroup/__init__.py

diff --git a/tests/integration/test_async_metrics_in_cgroup/__init__.py b/tests/integration/test_async_metrics_in_cgroup/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d

From d84a01cabfbb97a8b875620292f843c1247e6382 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 28 Mar 2024 20:46:02 +0000
Subject: [PATCH 021/265] better

---
 src/Common/AsynchronousMetrics.cpp | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index 59595e701c1..0943232e776 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -565,6 +565,7 @@ AsynchronousMetrics::NetworkInterfaceStatValues::operator-(const AsynchronousMet
 #endif
 
 
+#if defined(OS_LINUX)
 void AsynchronousMetrics::applyCPUMetricsUpdate(
     AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier)
 {
@@ -707,6 +708,7 @@ void AsynchronousMetrics::applyNormalizedCPUMetricsUpdate(
            " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
            "non-uniform, and still get the average resource utilization metric."};
 }
+#endif
 
 void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 {
@@ -968,10 +970,6 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
         new_values["CGroupMaxCPU"] = { max_cpu_cgroups, "The maximum number of CPU cores according to CGroups."};
     }
 
-    int64_t hz = sysconf(_SC_CLK_TCK);
-    if (-1 == hz)
-        throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
-
     if (cgroupcpu_stat || cgroupcpuacct_stat)
     {
         try
@@ -1006,10 +1004,14 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 
             if (!first_run)
             {
-                const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus;
-                const auto cgroup_specific_divisor = cgroupcpu_stat ? 1e6 : hz;
-                const double multiplier = 1.0 / cgroup_specific_divisor
+                int64_t hz = sysconf(_SC_CLK_TCK);
+                if (-1 == hz)
+                    throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
+                const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : hz;
+                const double multiplier = 1.0 / cgroup_version_specific_divisor
                     / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
+
+                const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus;
                 applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier);
                 if (max_cpu_cgroups > 0)
                     applyNormalizedCPUMetricsUpdate(new_values, max_cpu_cgroups, delta_values, multiplier);
@@ -1031,6 +1033,10 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
         {
             proc_stat->rewind();
 
+            int64_t hz = sysconf(_SC_CLK_TCK);
+            if (-1 == hz)
+                throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
+
             double multiplier = 1.0 / hz / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
             size_t num_cpus = 0;
 

From 77e3ff7ff50b0e78235ab9a8ee88b258bdcaf510 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 28 Mar 2024 21:18:29 +0000
Subject: [PATCH 022/265] fix test

---
 tests/integration/test_async_metrics_in_cgroup/test.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py
index 1bba42cb980..e63d53e1485 100644
--- a/tests/integration/test_async_metrics_in_cgroup/test.py
+++ b/tests/integration/test_async_metrics_in_cgroup/test.py
@@ -17,7 +17,7 @@ def start_cluster():
         cluster.shutdown()
 
 
-def test_check_client_logs_level(start_cluster):
+def test_user_cpu_accounting(start_cluster):
     # check that our metrics sources actually exist
     assert (
         subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0
@@ -50,7 +50,7 @@ def test_check_client_logs_level(start_cluster):
     """
     ).strip("\n")
 
-    assert float(metric) <= 2
+    assert float(metric) < 2
 
     proc.kill()
 
@@ -74,4 +74,5 @@ def test_check_client_logs_level(start_cluster):
     """
     ).strip("\n")
 
-    assert 4 <= float(metric) <= 12
+    # this check is really weak, but CI is tough place and we cannot guarantee that test process will get many cpu time
+    assert float(metric) > 1

From bc6a82d9cd68a8a4af3ef92b9a91eaa3be0aa347 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 9 Apr 2024 18:35:11 +0000
Subject: [PATCH 023/265] fix test

---
 .../test_async_metrics_in_cgroup/test.py      | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py
index e63d53e1485..00951c95a0e 100644
--- a/tests/integration/test_async_metrics_in_cgroup/test.py
+++ b/tests/integration/test_async_metrics_in_cgroup/test.py
@@ -18,6 +18,9 @@ def start_cluster():
 
 
 def test_user_cpu_accounting(start_cluster):
+    if node.is_built_with_sanitizer():
+        pytest.skip("Disabled for sanitizers")
+
     # check that our metrics sources actually exist
     assert (
         subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0
@@ -38,14 +41,12 @@ def test_user_cpu_accounting(start_cluster):
 
     metric = node.query(
         """
-      SYSTEM FLUSH LOGS;
-
       SELECT max(value)
         FROM (
           SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
-              FROM system.asynchronous_metric_log
-          WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
-          GROUP BY t
+            FROM system.asynchronous_metric_log
+           WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
+        GROUP BY t
         )
     """
     ).strip("\n")
@@ -56,20 +57,18 @@ def test_user_cpu_accounting(start_cluster):
 
     # then let's test that we will account cpu time spent by the server itself
     node.query(
-        "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=5, max_threads=8",
+        "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10",
         ignore_error=True,
     )
 
     metric = node.query(
         """
-      SYSTEM FLUSH LOGS;
-
       SELECT max(value)
         FROM (
           SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
-              FROM system.asynchronous_metric_log
-          WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
-          GROUP BY t
+            FROM system.asynchronous_metric_log
+           WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
+        GROUP BY t
         )
     """
     ).strip("\n")

From 05e823a1e9eff9d0df0b6473c19eddc03811d016 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 14 May 2024 15:37:20 +0000
Subject: [PATCH 024/265] add chunked wrapper to native protocol

---
 src/Client/Connection.cpp                  |  30 ++++--
 src/Client/Connection.h                    |   8 +-
 src/Core/ProtocolDefines.h                 |   5 +-
 src/IO/ReadBufferFromPocoSocket.cpp        |  54 +++++++---
 src/IO/ReadBufferFromPocoSocket.h          |   3 +
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 114 +++++++++++++++++++++
 src/IO/ReadBufferFromPocoSocketChunked.h   |  32 ++++++
 src/IO/WriteBufferFromPocoSocketChunked.h  |  56 ++++++++++
 src/Server/TCPHandler.cpp                  |  50 +++++++--
 src/Server/TCPHandler.h                    |   6 +-
 10 files changed, 322 insertions(+), 36 deletions(-)
 create mode 100644 src/IO/ReadBufferFromPocoSocketChunked.cpp
 create mode 100644 src/IO/ReadBufferFromPocoSocketChunked.h
 create mode 100644 src/IO/WriteBufferFromPocoSocketChunked.h

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 483201509c4..970768e515e 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -4,8 +4,6 @@
 #include <Core/Settings.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
-#include <IO/ReadBufferFromPocoSocket.h>
-#include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
@@ -191,10 +189,10 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
                 , tcp_keep_alive_timeout_in_sec);
         }
 
-        in = std::make_shared<ReadBufferFromPocoSocket>(*socket);
+        in = std::make_shared<ReadBufferFromPocoSocketChunked>(*socket);
         in->setAsyncCallback(async_callback);
 
-        out = std::make_shared<WriteBufferFromPocoSocket>(*socket);
+        out = std::make_shared<WriteBufferFromPocoSocketChunked>(*socket);
         out->setAsyncCallback(async_callback);
         connected = true;
         setDescription();
@@ -205,6 +203,12 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
             sendAddendum();
 
+        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+        {
+            in->enableChunked();
+            out->enableChunked();
+        }
+
         LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.",
             server_name, server_version_major, server_version_minor, server_version_patch);
     }
@@ -567,6 +571,7 @@ bool Connection::ping(const ConnectionTimeouts & timeouts)
 
         UInt64 pong = 0;
         writeVarUInt(Protocol::Client::Ping, *out);
+        out->finishPacket();
         out->next();
 
         if (in->eof())
@@ -611,6 +616,7 @@ TablesStatusResponse Connection::getTablesStatus(const ConnectionTimeouts & time
 
     writeVarUInt(Protocol::Client::TablesStatusRequest, *out);
     request.write(*out, server_revision);
+    out->finishPacket();
     out->next();
 
     UInt64 response_type = 0;
@@ -762,6 +768,8 @@ void Connection::sendQuery(
     block_profile_events_in.reset();
     block_out.reset();
 
+    out->finishPacket();
+
     /// Send empty block which means end of data.
     if (!with_pending_data)
     {
@@ -778,6 +786,7 @@ void Connection::sendCancel()
         return;
 
     writeVarUInt(Protocol::Client::Cancel, *out);
+    out->finishPacket();
     out->next();
 }
 
@@ -804,6 +813,8 @@ void Connection::sendData(const Block & block, const String & name, bool scalar)
 
     block_out->write(block);
     maybe_compressed_out->next();
+    if (!block)
+        out->finishPacket();
     out->next();
 
     if (throttler)
@@ -814,6 +825,7 @@ void Connection::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
 {
     writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out);
     writeVectorBinary(uuids, *out);
+    out->finishPacket();
     out->next();
 }
 
@@ -823,6 +835,7 @@ void Connection::sendReadTaskResponse(const String & response)
     writeVarUInt(Protocol::Client::ReadTaskResponse, *out);
     writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out);
     writeStringBinary(response, *out);
+    out->finishPacket();
     out->next();
 }
 
@@ -831,6 +844,7 @@ void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & resp
 {
     writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out);
     response.serialize(*out);
+    out->finishPacket();
     out->next();
 }
 
@@ -848,6 +862,8 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
         copyData(input, *out);
     else
         copyData(input, *out, size);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -876,6 +892,8 @@ void Connection::sendScalarsData(Scalars & data)
         sendData(elem.second, elem.first, true /* scalar */);
     }
 
+    out->finishPacket();
+
     out_bytes = out->count() - out_bytes;
     maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
     double elapsed = watch.elapsedSeconds();
@@ -1018,13 +1036,13 @@ std::optional<Poco::Net::SocketAddress> Connection::getResolvedAddress() const
 
 bool Connection::poll(size_t timeout_microseconds)
 {
-    return static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_microseconds);
+    return in->poll(timeout_microseconds);
 }
 
 
 bool Connection::hasReadPendingData() const
 {
-    return last_input_packet_type.has_value() || static_cast<const ReadBufferFromPocoSocket &>(*in).hasPendingData();
+    return last_input_packet_type.has_value() || in->hasPendingData();
 }
 
 
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index 9632eb9d948..e7a6d948204 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -8,8 +8,8 @@
 #include <Core/Defines.h>
 
 
-#include <IO/ReadBufferFromPocoSocket.h>
-#include <IO/WriteBufferFromPocoSocket.h>
+#include <IO/ReadBufferFromPocoSocketChunked.h>
+#include <IO/WriteBufferFromPocoSocketChunked.h>
 
 #include <Interpreters/TablesStatus.h>
 #include <Interpreters/Context_fwd.h>
@@ -207,8 +207,8 @@ private:
     String server_display_name;
 
     std::unique_ptr<Poco::Net::StreamSocket> socket;
-    std::shared_ptr<ReadBufferFromPocoSocket> in;
-    std::shared_ptr<WriteBufferFromPocoSocket> out;
+    std::shared_ptr<ReadBufferFromPocoSocketChunked> in;
+    std::shared_ptr<WriteBufferFromPocoSocketChunked> out;
     std::optional<UInt64> last_input_packet_type;
 
     String query_id;
diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h
index 159a4c28b6d..837801edcbb 100644
--- a/src/Core/ProtocolDefines.h
+++ b/src/Core/ProtocolDefines.h
@@ -79,6 +79,9 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SSH_AUTHENTICATION = 54466;
 /// Send read-only flag for Replicated tables as well
 static constexpr auto DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK = 54467;
 
+/// Packets size header
+static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54468;
+
 /// Version of ClickHouse TCP protocol.
 ///
 /// Should be incremented manually on protocol changes.
@@ -86,6 +89,6 @@ static constexpr auto DBMS_MIN_REVISION_WITH_TABLE_READ_ONLY_CHECK = 54467;
 /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
 /// later is just a number for server version (one number instead of commit SHA)
 /// for simplicity (sometimes it may be more convenient in some use cases).
-static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54467;
+static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54468;
 
 }
diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 26cdee4140c..5fb7ea0440c 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -32,25 +32,13 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-bool ReadBufferFromPocoSocket::nextImpl()
+size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size)
 {
     ssize_t bytes_read = 0;
-    Stopwatch watch;
-
-    SCOPE_EXIT({
-        /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one
-        ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
-        ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read);
-    });
 
     /// Add more details to exceptions.
     try
     {
-        CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive);
-
-        if (internal_buffer.size() > INT_MAX)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow");
-
         /// If async_callback is specified, set socket to non-blocking mode
         /// and try to read data from it, if socket is not ready for reading,
         /// run async_callback and try again later.
@@ -61,7 +49,7 @@ bool ReadBufferFromPocoSocket::nextImpl()
             socket.setBlocking(false);
             SCOPE_EXIT(socket.setBlocking(true));
             bool secure = socket.secure();
-            bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
+            bytes_read = socket.impl()->receiveBytes(begin, static_cast<int>(size));
 
             /// Check EAGAIN and ERR_SSL_WANT_READ/ERR_SSL_WANT_WRITE for secure socket (reading from secure socket can write too).
             while (bytes_read < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(bytes_read) || checkSSLWantWrite(bytes_read)))))
@@ -73,12 +61,12 @@ bool ReadBufferFromPocoSocket::nextImpl()
                     async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR);
 
                 /// Try to read again.
-                bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
+                bytes_read = socket.impl()->receiveBytes(begin, static_cast<int>(size));
             }
         }
         else
         {
-            bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
+            bytes_read = socket.impl()->receiveBytes(begin, static_cast<int>(size));
         }
     }
     catch (const Poco::Net::NetException & e)
@@ -99,6 +87,40 @@ bool ReadBufferFromPocoSocket::nextImpl()
     if (bytes_read < 0)
         throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket (peer: {}, local: {})", peer_address.toString(), socket.address().toString());
 
+    return bytes_read;
+}
+
+bool ReadBufferFromPocoSocket::readSocketExact(Position begin, size_t size)
+{
+    for (size_t bytes_left = size; bytes_left > 0;)
+    {
+        size_t ret = readSocket(begin + size - bytes_left, bytes_left);
+        if (ret == 0)
+            return false;
+        bytes_left -= ret;
+    }
+
+    return true;
+}
+
+bool ReadBufferFromPocoSocket::nextImpl()
+{
+    ssize_t bytes_read = 0;
+    Stopwatch watch;
+
+    SCOPE_EXIT({
+        /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one
+        ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
+        ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read);
+    });
+
+    CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive);
+
+    if (internal_buffer.size() > INT_MAX)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow");
+
+    bytes_read = readSocket(internal_buffer.begin(), internal_buffer.size());
+
     if (read_event != ProfileEvents::end())
         ProfileEvents::increment(read_event, bytes_read);
 
diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h
index 76156612764..c40a54ed7ae 100644
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@@ -32,6 +32,9 @@ public:
 
     void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
 
+    size_t readSocket(Position begin, size_t size);
+    bool readSocketExact(Position begin, size_t size);
+
 private:
     AsyncCallback async_callback;
     std::string socket_description;
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
new file mode 100644
index 00000000000..f0a157a7e1c
--- /dev/null
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -0,0 +1,114 @@
+#include <IO/ReadBufferFromPocoSocketChunked.h>
+#include <Common/logger_useful.h>
+
+
+namespace DB::ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace DB
+{
+ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size)
+    : ReadBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size)
+{}
+
+ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
+    : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), buffer_socket(socket_, read_event_, buf_size)
+{
+    chassert(buf_size <= std::numeric_limits<decltype(chunk_left)>::max());
+
+    working_buffer = buffer_socket.buffer();
+    pos = buffer_socket.position();
+}
+
+void ReadBufferFromPocoSocketChunked::enableChunked()
+{
+    chunked = true;
+}
+
+bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds)
+{
+    buffer_socket.position() = pos + skip_next;
+    return buffer_socket.poll(timeout_microseconds);
+}
+
+void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callback_)
+{
+    buffer_socket.setAsyncCallback(async_callback_);
+}
+
+bool ReadBufferFromPocoSocketChunked::startChunk()
+{
+    do {
+        if (buffer_socket.read(reinterpret_cast<char *>(&chunk_left), sizeof(chunk_left)) == 0)
+            return false;
+        if (chunk_left == 0)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received");
+    } while (chunk_left == 0);
+
+    return nextChunk();
+}
+
+bool ReadBufferFromPocoSocketChunked::nextChunk()
+{
+    static bool start = false;
+
+    if (chunk_left == 0) {
+        start = true;
+        return startChunk();
+    }
+
+    if (buffer_socket.available() == 0)
+        if (!buffer_socket.next())
+            return false;
+    if (start)
+        LOG_TEST(log, "Packet recieve started. Message {}, size {}", static_cast<unsigned int>(*buffer_socket.position()), chunk_left);
+    else
+        LOG_TEST(log, "Packet recieve continued. Size {}", chunk_left);
+
+    start = false;
+
+    nextimpl_working_buffer_offset = buffer_socket.offset();
+
+    if (buffer_socket.available() < chunk_left)
+    {
+        working_buffer.resize(buffer_socket.offset() + buffer_socket.available());
+        chunk_left -= buffer_socket.available();
+        return true;
+    }
+
+    working_buffer.resize(buffer_socket.offset() + chunk_left);
+    skip_next = std::min(static_cast<size_t>(4), buffer_socket.available() - chunk_left);
+
+    if (skip_next > 0)
+        std::memcpy(&chunk_left, buffer_socket.position() + chunk_left, skip_next);
+    if (4 > skip_next)
+        if (!buffer_socket.readSocketExact(reinterpret_cast<Position>(&chunk_left) + skip_next, 4 - skip_next))
+            return false;
+
+    if (chunk_left == 0)
+        LOG_TEST(log, "Packet recieve ended.");
+
+    return true;
+}
+
+
+bool ReadBufferFromPocoSocketChunked::nextImpl()
+{
+    buffer_socket.position() = pos + skip_next;
+    skip_next = 0;
+
+    if (chunked)
+        return nextChunk();
+
+    if (!buffer_socket.next())
+        return false;
+
+    pos = buffer_socket.position();
+    working_buffer.resize(offset() + buffer_socket.available());
+
+    return true;
+}
+
+}
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
new file mode 100644
index 00000000000..3d7d91ac93a
--- /dev/null
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -0,0 +1,32 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+#include <IO/ReadBufferFromPocoSocket.h>
+
+namespace DB
+{
+
+class ReadBufferFromPocoSocketChunked: public ReadBuffer
+{
+public:
+    explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+
+    void enableChunked();
+    bool poll(size_t timeout_microseconds);
+    void setAsyncCallback(AsyncCallback async_callback_);
+
+protected:
+    bool startChunk();
+    bool nextChunk();
+    bool nextImpl() override;
+
+private:
+    LoggerPtr log;
+    ReadBufferFromPocoSocket buffer_socket;
+    bool chunked = false;
+    UInt32 chunk_left = 0; // chunk left to read from socket
+    UInt8 skip_next = 0; // skip already processed bytes in buffer_socket
+};
+
+}
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
new file mode 100644
index 00000000000..b316393aab6
--- /dev/null
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <Common/logger_useful.h>
+#include <IO/WriteBufferFromPocoSocket.h>
+
+
+namespace DB
+{
+
+class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket
+{
+public:
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {}
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {}
+
+    void enableChunked() { chunked = true; }
+    void finishPacket()
+    {
+        if (!chunked)
+            return;
+
+        next();
+
+        if (finished)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk");
+
+        LOG_TEST(log, "Packet send ended.");
+        finished = true;
+
+        UInt32 s = 0;
+        socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
+    }
+protected:
+    void nextImpl() override
+    {
+        if (chunked)
+        {
+            UInt32 s = static_cast<UInt32>(offset());
+            if (finished)
+                LOG_TEST(log, "Packet send started. Message {}, size {}", static_cast<unsigned int>(*buffer().begin()), s);
+            else
+                LOG_TEST(log, "Packet send continued. Size {}", s);
+
+            finished = false;
+            socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
+        }
+
+        WriteBufferFromPocoSocket::nextImpl();
+    }
+private:
+    LoggerPtr log;
+    bool chunked = false;
+    bool finished = true;
+};
+
+}
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index ae2f150c4a1..aa33988fdc4 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -19,8 +19,6 @@
 #include <IO/Progress.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
-#include <IO/ReadBufferFromPocoSocket.h>
-#include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/LimitReadBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
@@ -253,8 +251,8 @@ void TCPHandler::runImpl()
     socket().setSendTimeout(send_timeout);
     socket().setNoDelay(true);
 
-    in = std::make_shared<ReadBufferFromPocoSocket>(socket(), read_event);
-    out = std::make_shared<WriteBufferFromPocoSocket>(socket(), write_event);
+    in = std::make_shared<ReadBufferFromPocoSocketChunked>(socket(), read_event);
+    out = std::make_shared<WriteBufferFromPocoSocketChunked>(socket(), write_event);
 
     /// Support for PROXY protocol
     if (parse_proxy_protocol && !receiveProxyHeader())
@@ -289,6 +287,12 @@ void TCPHandler::runImpl()
             if (!default_database.empty())
                 session->sessionContext()->setCurrentDatabase(default_database);
         }
+
+        if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+        {
+            in->enableChunked();
+            out->enableChunked();
+        }
     }
     catch (const Exception & e) /// Typical for an incorrect username, password, or address.
     {
@@ -320,7 +324,7 @@ void TCPHandler::runImpl()
         {
             Stopwatch idle_time;
             UInt64 timeout_ms = std::min(poll_interval, idle_connection_timeout) * 1000000;
-            while (tcp_server.isOpen() && !server.isCancelled() && !static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_ms))
+            while (tcp_server.isOpen() && !server.isCancelled() && !in->poll(timeout_ms))
             {
                 if (idle_time.elapsedSeconds() > idle_connection_timeout)
                 {
@@ -788,7 +792,7 @@ bool TCPHandler::readDataNext()
     /// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down.
     while (true)
     {
-        if (static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_us))
+        if (in->poll(timeout_us))
         {
             /// If client disconnected.
             if (in->eof())
@@ -1154,6 +1158,8 @@ void TCPHandler::processTablesStatusRequest()
     }
 
     response.write(*out, client_tcp_protocol_version);
+
+    out->finishPacket();
 }
 
 void TCPHandler::receiveUnexpectedTablesStatusRequest()
@@ -1174,6 +1180,8 @@ void TCPHandler::sendPartUUIDs()
 
         writeVarUInt(Protocol::Server::PartUUIDs, *out);
         writeVectorBinary(uuids, *out);
+
+        out->finishPacket();
         out->next();
     }
 }
@@ -1182,6 +1190,8 @@ void TCPHandler::sendPartUUIDs()
 void TCPHandler::sendReadTaskRequestAssumeLocked()
 {
     writeVarUInt(Protocol::Server::ReadTaskRequest, *out);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -1190,6 +1200,8 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges
 {
     writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out);
     announcement.serialize(*out);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -1198,6 +1210,8 @@ void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest re
 {
     writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out);
     request.serialize(*out);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -1206,6 +1220,8 @@ void TCPHandler::sendProfileInfo(const ProfileInfo & info)
 {
     writeVarUInt(Protocol::Server::ProfileInfo, *out);
     info.write(*out);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -1221,6 +1237,8 @@ void TCPHandler::sendTotals(const Block & totals)
 
         state.block_out->write(totals);
         state.maybe_compressed_out->next();
+
+        out->finishPacket();
         out->next();
     }
 }
@@ -1237,6 +1255,8 @@ void TCPHandler::sendExtremes(const Block & extremes)
 
         state.block_out->write(extremes);
         state.maybe_compressed_out->next();
+
+        out->finishPacket();
         out->next();
     }
 }
@@ -1254,6 +1274,8 @@ void TCPHandler::sendProfileEvents()
         writeStringBinary("", *out);
 
         state.profile_events_block_out->write(block);
+
+        out->finishPacket();
         out->next();
 
         auto elapsed_milliseconds = stopwatch.elapsedMilliseconds();
@@ -1291,6 +1313,8 @@ void TCPHandler::sendTimezone()
     LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz);
     writeVarUInt(Protocol::Server::TimezoneUpdate, *out);
     writeStringBinary(tz, *out);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -1636,6 +1660,7 @@ bool TCPHandler::receivePacket()
 
         case Protocol::Client::Ping:
             writeVarUInt(Protocol::Server::Pong, *out);
+            out->finishPacket();
             out->next();
             return false;
 
@@ -2152,7 +2177,7 @@ QueryState::CancellationStatus TCPHandler::getQueryCancellationStatus()
     after_check_cancelled.restart();
 
     /// During request execution the only packet that can come from the client is stopping the query.
-    if (static_cast<ReadBufferFromPocoSocket &>(*in).poll(0))
+    if (in->poll(0))
     {
         if (in->eof())
         {
@@ -2216,6 +2241,8 @@ void TCPHandler::sendData(const Block & block)
 
         state.block_out->write(block);
         state.maybe_compressed_out->next();
+
+        out->finishPacket();
         out->next();
     }
     catch (...)
@@ -2251,6 +2278,8 @@ void TCPHandler::sendLogData(const Block & block)
     writeStringBinary("", *out);
 
     state.logs_block_out->write(block);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -2262,6 +2291,7 @@ void TCPHandler::sendTableColumns(const ColumnsDescription & columns)
     writeStringBinary("", *out);
     writeStringBinary(columns.toString(), *out);
 
+    out->finishPacket();
     out->next();
 }
 
@@ -2271,6 +2301,8 @@ void TCPHandler::sendException(const Exception & e, bool with_stack_trace)
 
     writeVarUInt(Protocol::Server::Exception, *out);
     writeException(e, *out, with_stack_trace);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -2281,6 +2313,8 @@ void TCPHandler::sendEndOfStream()
     state.io.setAllDataSent();
 
     writeVarUInt(Protocol::Server::EndOfStream, *out);
+
+    out->finishPacket();
     out->next();
 }
 
@@ -2299,6 +2333,8 @@ void TCPHandler::sendProgress()
     increment.elapsed_ns = current_elapsed_ns - state.prev_elapsed_ns;
     state.prev_elapsed_ns = current_elapsed_ns;
     increment.write(*out, client_tcp_protocol_version);
+
+    out->finishPacket();
     out->next();
 }
 
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 191617f1905..67d77381167 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -19,6 +19,8 @@
 #include <Interpreters/ProfileEventsExt.h>
 #include <Formats/NativeReader.h>
 #include <Formats/NativeWriter.h>
+#include <IO/ReadBufferFromPocoSocketChunked.h>
+#include <IO/WriteBufferFromPocoSocketChunked.h>
 
 #include "IServer.h"
 #include "Interpreters/AsynchronousInsertQueue.h"
@@ -204,8 +206,8 @@ private:
     ClientInfo::QueryKind query_kind = ClientInfo::QueryKind::NO_QUERY;
 
     /// Streams for reading/writing from/to client connection socket.
-    std::shared_ptr<ReadBuffer> in;
-    std::shared_ptr<WriteBuffer> out;
+    std::shared_ptr<ReadBufferFromPocoSocketChunked> in;
+    std::shared_ptr<WriteBufferFromPocoSocketChunked> out;
 
     ProfileEvents::Event read_event;
     ProfileEvents::Event write_event;

From daf8277e55058e42fddafc49416164d5cb0ab601 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 14 May 2024 16:00:58 +0000
Subject: [PATCH 025/265] fix

---
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 17 ++++++++---------
 src/IO/ReadBufferFromPocoSocketChunked.h   |  1 +
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index f0a157a7e1c..33bed2a32c4 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -52,22 +52,21 @@ bool ReadBufferFromPocoSocketChunked::startChunk()
 
 bool ReadBufferFromPocoSocketChunked::nextChunk()
 {
-    static bool start = false;
-
-    if (chunk_left == 0) {
-        start = true;
+    if (chunk_left == 0)
+    {
+        started = true;
         return startChunk();
     }
 
     if (buffer_socket.available() == 0)
         if (!buffer_socket.next())
             return false;
-    if (start)
-        LOG_TEST(log, "Packet recieve started. Message {}, size {}", static_cast<unsigned int>(*buffer_socket.position()), chunk_left);
+    if (started)
+        LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast<unsigned int>(*buffer_socket.position()), chunk_left);
     else
-        LOG_TEST(log, "Packet recieve continued. Size {}", chunk_left);
+        LOG_TEST(log, "Packet receive continued. Size {}", chunk_left);
 
-    start = false;
+    started = false;
 
     nextimpl_working_buffer_offset = buffer_socket.offset();
 
@@ -88,7 +87,7 @@ bool ReadBufferFromPocoSocketChunked::nextChunk()
             return false;
 
     if (chunk_left == 0)
-        LOG_TEST(log, "Packet recieve ended.");
+        LOG_TEST(log, "Packet receive ended.");
 
     return true;
 }
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index 3d7d91ac93a..5930285e18a 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -27,6 +27,7 @@ private:
     bool chunked = false;
     UInt32 chunk_left = 0; // chunk left to read from socket
     UInt8 skip_next = 0; // skip already processed bytes in buffer_socket
+    bool started = false;
 };
 
 }

From dfdf31f1b6efbbda847a693a22969c2187a949f7 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 14 May 2024 18:09:11 +0000
Subject: [PATCH 026/265] host-net conversion

---
 src/IO/NetUtils.h                          | 26 ++++++++++++++++++++++
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 16 ++++++++-----
 src/IO/WriteBufferFromPocoSocketChunked.h  |  2 ++
 3 files changed, 38 insertions(+), 6 deletions(-)
 create mode 100644 src/IO/NetUtils.h

diff --git a/src/IO/NetUtils.h b/src/IO/NetUtils.h
new file mode 100644
index 00000000000..ac6b5eec9a7
--- /dev/null
+++ b/src/IO/NetUtils.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <concepts>
+#include <bit>
+
+
+namespace DB
+{
+
+template<std::integral T>
+constexpr T netToHost(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T hostToNet(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+}
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 33bed2a32c4..27903761934 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -1,5 +1,6 @@
 #include <IO/ReadBufferFromPocoSocketChunked.h>
 #include <Common/logger_useful.h>
+#include <IO/NetUtils.h>
 
 
 namespace DB::ErrorCodes
@@ -9,6 +10,7 @@ namespace DB::ErrorCodes
 
 namespace DB
 {
+
 ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size)
     : ReadBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size)
 {}
@@ -40,12 +42,12 @@ void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callb
 
 bool ReadBufferFromPocoSocketChunked::startChunk()
 {
-    do {
-        if (buffer_socket.read(reinterpret_cast<char *>(&chunk_left), sizeof(chunk_left)) == 0)
-            return false;
-        if (chunk_left == 0)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received");
-    } while (chunk_left == 0);
+    if (buffer_socket.read(reinterpret_cast<char *>(&chunk_left), sizeof(chunk_left)) == 0)
+        return false;
+    if (chunk_left == 0)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received");
+
+    chunk_left = netToHost(chunk_left);
 
     return nextChunk();
 }
@@ -86,6 +88,8 @@ bool ReadBufferFromPocoSocketChunked::nextChunk()
         if (!buffer_socket.readSocketExact(reinterpret_cast<Position>(&chunk_left) + skip_next, 4 - skip_next))
             return false;
 
+    chunk_left = netToHost(chunk_left);
+
     if (chunk_left == 0)
         LOG_TEST(log, "Packet receive ended.");
 
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index b316393aab6..4481dfdedfc 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -2,6 +2,7 @@
 
 #include <Common/logger_useful.h>
 #include <IO/WriteBufferFromPocoSocket.h>
+#include <IO/NetUtils.h>
 
 
 namespace DB
@@ -42,6 +43,7 @@ protected:
                 LOG_TEST(log, "Packet send continued. Size {}", s);
 
             finished = false;
+            s = hostToNet(s);
             socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
         }
 

From 88a833335f7e7e9fae85e74d250677f415905292 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 14 May 2024 20:25:26 +0000
Subject: [PATCH 027/265] fix

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 4481dfdedfc..39cdd93501b 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -8,6 +8,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket
 {
 public:

From ad204887a2516e5053035c709735bf6c99ddba21 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 15 May 2024 20:47:54 +0000
Subject: [PATCH 028/265] bugs fixed, switch chunk length to little endian

---
 src/Client/Connection.cpp                  |  2 +-
 src/IO/NetUtils.h                          | 32 +++++++++++++++++
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 42 +++++++++++++++-------
 src/IO/ReadBufferFromPocoSocketChunked.h   |  3 +-
 src/IO/WriteBufferFromPocoSocketChunked.h  |  2 +-
 5 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 970768e515e..3a0f3771e7a 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -1042,7 +1042,7 @@ bool Connection::poll(size_t timeout_microseconds)
 
 bool Connection::hasReadPendingData() const
 {
-    return last_input_packet_type.has_value() || in->hasPendingData();
+    return last_input_packet_type.has_value() || in->hasBufferedData();
 }
 
 
diff --git a/src/IO/NetUtils.h b/src/IO/NetUtils.h
index ac6b5eec9a7..12f09524ae7 100644
--- a/src/IO/NetUtils.h
+++ b/src/IO/NetUtils.h
@@ -23,4 +23,36 @@ constexpr T hostToNet(T value) noexcept
     return value;
 }
 
+template<std::integral T>
+constexpr T toLittleEndian(T value) noexcept
+{
+    if constexpr (std::endian::native == std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T toBigEndian(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T fromLittleEndian(T value) noexcept
+{
+    if constexpr (std::endian::native == std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T fromBigEndian(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
 }
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 27903761934..247d8c8ec6a 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -27,11 +27,14 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock
 void ReadBufferFromPocoSocketChunked::enableChunked()
 {
     chunked = true;
+    buffer_socket.position() = pos;
 }
 
 bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds)
 {
-    buffer_socket.position() = pos + skip_next;
+    if (!chunked)
+        buffer_socket.position() = pos;
+
     return buffer_socket.poll(timeout_microseconds);
 }
 
@@ -42,12 +45,12 @@ void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callb
 
 bool ReadBufferFromPocoSocketChunked::startChunk()
 {
-    if (buffer_socket.read(reinterpret_cast<char *>(&chunk_left), sizeof(chunk_left)) == 0)
+    if (buffer_socket.read(reinterpret_cast<char *>(&chunk_left), sizeof(chunk_left)) < sizeof(chunk_left))
         return false;
     if (chunk_left == 0)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received");
 
-    chunk_left = netToHost(chunk_left);
+    chunk_left = fromLittleEndian(chunk_left);
 
     return nextChunk();
 }
@@ -76,19 +79,23 @@ bool ReadBufferFromPocoSocketChunked::nextChunk()
     {
         working_buffer.resize(buffer_socket.offset() + buffer_socket.available());
         chunk_left -= buffer_socket.available();
+        buffer_socket.position() += buffer_socket.available();
         return true;
     }
 
     working_buffer.resize(buffer_socket.offset() + chunk_left);
-    skip_next = std::min(static_cast<size_t>(4), buffer_socket.available() - chunk_left);
+    UInt8 buffered = std::min(static_cast<size_t>(4), buffer_socket.available() - chunk_left);
 
-    if (skip_next > 0)
-        std::memcpy(&chunk_left, buffer_socket.position() + chunk_left, skip_next);
-    if (4 > skip_next)
-        if (!buffer_socket.readSocketExact(reinterpret_cast<Position>(&chunk_left) + skip_next, 4 - skip_next))
+    buffer_socket.position() += chunk_left;
+    if (buffered > 0)
+        std::memcpy(&chunk_left, buffer_socket.position(), buffered);
+    buffer_socket.position() += buffered;
+
+    if (4 > buffered)
+        if (!buffer_socket.readSocketExact(reinterpret_cast<Position>(&chunk_left) + buffered, 4 - buffered))
             return false;
 
-    chunk_left = netToHost(chunk_left);
+    chunk_left = fromLittleEndian(chunk_left);
 
     if (chunk_left == 0)
         LOG_TEST(log, "Packet receive ended.");
@@ -99,14 +106,23 @@ bool ReadBufferFromPocoSocketChunked::nextChunk()
 
 bool ReadBufferFromPocoSocketChunked::nextImpl()
 {
-    buffer_socket.position() = pos + skip_next;
-    skip_next = 0;
-
     if (chunked)
-        return nextChunk();
+    {
+        if (!nextChunk())
+        {
+            pos = buffer_socket.position();
+            return false;
+        }
+        return true;
+    }
+
+    buffer_socket.position() = pos;
 
     if (!buffer_socket.next())
+    {
+        pos = buffer_socket.position();
         return false;
+    }
 
     pos = buffer_socket.position();
     working_buffer.resize(offset() + buffer_socket.available());
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index 5930285e18a..6f99db4489a 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -16,6 +16,8 @@ public:
     bool poll(size_t timeout_microseconds);
     void setAsyncCallback(AsyncCallback async_callback_);
 
+    bool hasBufferedData() const { return hasPendingData() || buffer_socket.hasPendingData(); }
+
 protected:
     bool startChunk();
     bool nextChunk();
@@ -26,7 +28,6 @@ private:
     ReadBufferFromPocoSocket buffer_socket;
     bool chunked = false;
     UInt32 chunk_left = 0; // chunk left to read from socket
-    UInt8 skip_next = 0; // skip already processed bytes in buffer_socket
     bool started = false;
 };
 
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 39cdd93501b..070e87feff2 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -48,7 +48,7 @@ protected:
                 LOG_TEST(log, "Packet send continued. Size {}", s);
 
             finished = false;
-            s = hostToNet(s);
+            s = toLittleEndian(s);
             socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
         }
 

From 6378184c7f004e211d86c3fd7a4f482e45b01a59 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 20 May 2024 14:15:47 +0000
Subject: [PATCH 029/265] fix, add some introspection functionality

---
 src/IO/ReadBufferFromPocoSocketChunked.cpp |  3 ++-
 src/IO/ReadBufferFromPocoSocketChunked.h   |  5 +++++
 src/IO/WriteBufferFromPocoSocketChunked.h  | 10 ++++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 247d8c8ec6a..4d40d8b4f14 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -16,7 +16,7 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock
 {}
 
 ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
-    : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), buffer_socket(socket_, read_event_, buf_size)
+    : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), peer_address(socket_.peerAddress()), our_address(socket_.address()), buffer_socket(socket_, read_event_, buf_size)
 {
     chassert(buf_size <= std::numeric_limits<decltype(chunk_left)>::max());
 
@@ -28,6 +28,7 @@ void ReadBufferFromPocoSocketChunked::enableChunked()
 {
     chunked = true;
     buffer_socket.position() = pos;
+    working_buffer.resize(offset());
 }
 
 bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds)
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index 6f99db4489a..c70363cf7d8 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -18,6 +18,9 @@ public:
 
     bool hasBufferedData() const { return hasPendingData() || buffer_socket.hasPendingData(); }
 
+    Poco::Net::SocketAddress peerAddress() { return peer_address; }
+    Poco::Net::SocketAddress ourAddress() { return our_address; }
+
 protected:
     bool startChunk();
     bool nextChunk();
@@ -25,6 +28,8 @@ protected:
 
 private:
     LoggerPtr log;
+    Poco::Net::SocketAddress peer_address;
+    Poco::Net::SocketAddress our_address;
     ReadBufferFromPocoSocket buffer_socket;
     bool chunked = false;
     UInt32 chunk_left = 0; // chunk left to read from socket
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 070e87feff2..6c35db62c0c 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -54,6 +54,16 @@ protected:
 
         WriteBufferFromPocoSocket::nextImpl();
     }
+
+    Poco::Net::SocketAddress peerAddress()
+    {
+        return peer_address;
+    }
+
+    Poco::Net::SocketAddress ourAddress()
+    {
+        return our_address;
+    }
 private:
     LoggerPtr log;
     bool chunked = false;

From 5308256c67c5781916018c321273f04fd21c4545 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 20 May 2024 16:25:19 +0000
Subject: [PATCH 030/265] enable chunked before processing defaul database

---
 src/Server/TCPHandler.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index b3dbd118d8b..070cd0e3247 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -277,6 +277,12 @@ void TCPHandler::runImpl()
         if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
             receiveAddendum();
 
+        if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+        {
+            in->enableChunked();
+            out->enableChunked();
+        }
+
         if (!is_interserver_mode)
         {
             /// If session created, then settings in session context has been updated.
@@ -287,12 +293,6 @@ void TCPHandler::runImpl()
             if (!default_database.empty())
                 session->sessionContext()->setCurrentDatabase(default_database);
         }
-
-        if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
-        {
-            in->enableChunked();
-            out->enableChunked();
-        }
     }
     catch (const Exception & e) /// Typical for an incorrect username, password, or address.
     {

From 9e747cd45312302935cbf15ea518808d4ac9c8c8 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 22 May 2024 01:20:00 +0000
Subject: [PATCH 031/265] fix bug with profile stats in
 WriteBufferFromPocoSocket

---
 src/IO/WriteBufferFromPocoSocket.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp
index 10d9fd131cd..e29b3b2cddd 100644
--- a/src/IO/WriteBufferFromPocoSocket.cpp
+++ b/src/IO/WriteBufferFromPocoSocket.cpp
@@ -183,6 +183,7 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_
     , socket(socket_)
     , peer_address(socket.peerAddress())
     , our_address(socket.address())
+    , write_event(ProfileEvents::end())
     , socket_description("socket (" + peer_address.toString() + ")")
 {
 }

From 34702b30bcfe3401991fe7c792c02a80185acdf2 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 22 May 2024 03:21:10 +0000
Subject: [PATCH 032/265] fix test

---
 .../0_stateless/02532_send_logs_level_test.reference      | 3 ---
 tests/queries/0_stateless/02532_send_logs_level_test.sh   | 8 ++++++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference
index 7e51b888d9c..e69de29bb2d 100644
--- a/tests/queries/0_stateless/02532_send_logs_level_test.reference
+++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference
@@ -1,3 +0,0 @@
-<Test> MergeTreeMarksLoader: Loading marks from path data.cmrk3
-<Test> MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]:  Int32(size = 1), requested columns: key
-<Test> MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]:  Int32(size = 1), sample block key
diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh
index 4afc6d4496b..f2940e9c005 100755
--- a/tests/queries/0_stateless/02532_send_logs_level_test.sh
+++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh
@@ -17,6 +17,10 @@ $CLICKHOUSE_CLIENT -nm -q "
 # instead of "last" value, hence you cannot simply append another
 # --send_logs_level here.
 CLICKHOUSE_CLIENT_CLEAN=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=test/g')
-$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -o -e '<Unknown>.*' -e '<Test>.*'
 
-$CLICKHOUSE_CLIENT -q "drop table data"
+set -e
+
+trap "$CLICKHOUSE_CLIENT -q 'drop table data'" EXIT
+
+$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& (! grep -q -o -e '<Unknown>.*')
+$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -q -o -e '<Test>.*'

From 6c3556dfda92ea9d04ff5db8427a58aa7ab35750 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 22 May 2024 04:07:52 +0000
Subject: [PATCH 033/265] fix test

---
 tests/queries/0_stateless/02532_send_logs_level_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh
index f2940e9c005..b74fcf78ad1 100755
--- a/tests/queries/0_stateless/02532_send_logs_level_test.sh
+++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh
@@ -20,7 +20,7 @@ CLICKHOUSE_CLIENT_CLEAN=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level
 
 set -e
 
-trap "$CLICKHOUSE_CLIENT -q 'drop table data'" EXIT
+trap '$CLICKHOUSE_CLIENT -q "drop table data"' EXIT
 
 $CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& (! grep -q -o -e '<Unknown>.*')
 $CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -q -o -e '<Test>.*'

From 69cd5ae549cf7acc4de756a70c9b632d139e50fe Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 22 May 2024 16:39:25 +0000
Subject: [PATCH 034/265] process possibly remaining message after network
 error

---
 src/Client/ClientBase.cpp | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index b6f821794f1..f3e53efd994 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -951,6 +951,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
     }
     catch (Exception & e)
     {
+        if (server_exception)
+            server_exception->rethrow();
         if (!is_interactive)
             e.addMessage("(in query: {})", full_query);
         throw;
@@ -1069,19 +1071,28 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
             QueryInterruptHandler::start(signals_before_stop);
             SCOPE_EXIT({ QueryInterruptHandler::stop(); });
 
-            connection->sendQuery(
-                connection_parameters.timeouts,
-                query,
-                query_parameters,
-                global_context->getCurrentQueryId(),
-                query_processing_stage,
-                &global_context->getSettingsRef(),
-                &global_context->getClientInfo(),
-                true,
-                [&](const Progress & progress) { onProgress(progress); });
+            try {
+                connection->sendQuery(
+                    connection_parameters.timeouts,
+                    query,
+                    query_parameters,
+                    global_context->getCurrentQueryId(),
+                    query_processing_stage,
+                    &global_context->getSettingsRef(),
+                    &global_context->getClientInfo(),
+                    true,
+                    [&](const Progress & progress) { onProgress(progress); });
+
+                if (send_external_tables)
+                    sendExternalTables(parsed_query);
+            }
+            catch (const NetException &)
+            {
+                // We still want to attempt to process whatever we already recieved or can recieve (socket receive buffer can be not empty)
+                receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel);
+                throw;
+            }
 
-            if (send_external_tables)
-                sendExternalTables(parsed_query);
             receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel);
 
             break;

From 99bd796011aee169f3c4de25b07b330094c4a41a Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 22 May 2024 16:58:50 +0000
Subject: [PATCH 035/265] fix spelling

---
 src/Client/ClientBase.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index f3e53efd994..1b8fe83eb51 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1088,7 +1088,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
             }
             catch (const NetException &)
             {
-                // We still want to attempt to process whatever we already recieved or can recieve (socket receive buffer can be not empty)
+                // We still want to attempt to process whatever we already received or can receive (socket receive buffer can be not empty)
                 receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel);
                 throw;
             }

From 94bc0a1e966d95b8a2180f9504ed93592d2026ed Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Thu, 23 May 2024 22:01:32 +0000
Subject: [PATCH 036/265] add config parameters and client arguments, make
 default notchunked_optional

---
 programs/benchmark/Benchmark.cpp              | 52 +++++++++++++++++-
 src/Client/ClientBase.cpp                     | 39 ++++++++++++++
 src/Client/Connection.cpp                     | 54 ++++++++++++++++++-
 src/Client/Connection.h                       |  5 ++
 src/Client/ConnectionParameters.cpp           |  3 ++
 src/Client/ConnectionParameters.h             |  2 +
 src/Client/ConnectionPool.cpp                 |  6 ++-
 src/Client/ConnectionPool.h                   | 15 +++++-
 .../ClickHouseDictionarySource.cpp            |  8 ++-
 src/Dictionaries/ClickHouseDictionarySource.h |  2 +
 src/Interpreters/Cluster.cpp                  | 11 +++-
 src/Interpreters/Cluster.h                    |  2 +
 src/Server/TCPHandler.cpp                     | 44 ++++++++++++++-
 src/Server/TCPHandler.h                       |  2 +
 .../DistributedAsyncInsertDirectoryQueue.cpp  |  2 +
 src/Storages/StorageReplicatedMergeTree.cpp   |  3 +-
 16 files changed, 240 insertions(+), 10 deletions(-)

diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index 48dca82eb2b..251761e0bad 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -75,6 +75,8 @@ public:
             const String & default_database_,
             const String & user_,
             const String & password_,
+            const String & proto_send_chunked_,
+            const String & proto_recv_chunked_,
             const String & quota_key_,
             const String & stage,
             bool randomize_,
@@ -128,7 +130,9 @@ public:
             connections.emplace_back(std::make_unique<ConnectionPool>(
                 concurrency,
                 cur_host, cur_port,
-                default_database_, user_, password_, quota_key_,
+                default_database_, user_, password_,
+                proto_send_chunked_, proto_recv_chunked_,
+                quota_key_,
                 /* cluster_= */ "",
                 /* cluster_secret_= */ "",
                 /* client_name_= */ std::string(DEFAULT_CLIENT_NAME),
@@ -662,6 +666,50 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
 
         Strings hosts = options.count("host") ? options["host"].as<Strings>() : Strings({"localhost"});
 
+        String proto_send_chunked {"notchunked_optional"};
+        String proto_recv_chunked {"notchunked_optional"};
+
+        if (options.count("proto_caps"))
+        {
+            std::string proto_caps_str = options["proto_caps"].as<std::string>();
+
+            std::vector<std::string_view> proto_caps;
+            splitInto<','>(proto_caps, proto_caps_str);
+
+            for (auto cap_str : proto_caps)
+            {
+                std::string direction;
+
+                if (cap_str.starts_with("send_"))
+                {
+                    direction = "send";
+                    cap_str = cap_str.substr(std::string_view("send_").size());
+                }
+                else if (cap_str.starts_with("recv_"))
+                {
+                    direction = "recv";
+                    cap_str = cap_str.substr(std::string_view("recv_").size());
+                }
+
+                if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional")
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str);
+
+                if (direction.empty())
+                {
+                    proto_send_chunked = cap_str;
+                    proto_recv_chunked = cap_str;
+                }
+                else
+                {
+                    if (direction == "send")
+                        proto_send_chunked = cap_str;
+                    else
+                        proto_recv_chunked = cap_str;
+                }
+            }
+        }
+
+
         Benchmark benchmark(
             options["concurrency"].as<unsigned>(),
             options["delay"].as<double>(),
@@ -673,6 +721,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
             options["database"].as<std::string>(),
             options["user"].as<std::string>(),
             options["password"].as<std::string>(),
+            proto_send_chunked,
+            proto_recv_chunked,
             options["quota_key"].as<std::string>(),
             options["stage"].as<std::string>(),
             options.count("randomize"),
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 1b8fe83eb51..0bceee6ea4d 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -75,9 +75,11 @@
 #include <limits>
 #include <map>
 #include <memory>
+#include <string_view>
 #include <unordered_map>
 
 #include <Common/config_version.h>
+#include <base/find_symbols.h>
 #include "config.h"
 
 namespace fs = std::filesystem;
@@ -2993,6 +2995,8 @@ void ClientBase::init(int argc, char ** argv)
 
         ("config-file,C", po::value<std::string>(), "config-file path")
 
+        ("proto_caps", po::value<std::string>(), "enable/disable chunked protocol: chunked_optional, notchunked, notchunked_optional, send_chunked, send_chunked_optional, send_notchunked, send_notchunked_optional, recv_chunked, recv_chunked_optional, recv_notchunked, recv_notchunked_optional")
+
         ("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(query; can be specified multiple times (--query "SELECT 1" --query "SELECT 2"...))")
         ("queries-file", po::value<std::vector<std::string>>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
         ("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.")
@@ -3162,6 +3166,41 @@ void ClientBase::init(int argc, char ** argv)
     if (options.count("server_logs_file"))
         server_logs_file = options["server_logs_file"].as<std::string>();
 
+    if (options.count("proto_caps"))
+    {
+        std::string proto_caps_str = options["proto_caps"].as<std::string>();
+
+        std::vector<std::string_view> proto_caps;
+        splitInto<','>(proto_caps, proto_caps_str);
+
+        for (auto cap_str : proto_caps)
+        {
+            std::string direction;
+
+            if (cap_str.starts_with("send_"))
+            {
+                direction = "send";
+                cap_str = cap_str.substr(std::string_view("send_").size());
+            }
+            else if (cap_str.starts_with("recv_"))
+            {
+                direction = "recv";
+                cap_str = cap_str.substr(std::string_view("recv_").size());
+            }
+
+            if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional")
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str);
+
+            if (direction.empty())
+            {
+                config().setString("proto_caps.send", std::string(cap_str));
+                config().setString("proto_caps.recv", std::string(cap_str));
+            }
+            else
+                config().setString("proto_caps." + direction, std::string(cap_str));
+        }
+    }
+
     query_processing_stage = QueryProcessingStage::fromString(options["stage"].as<std::string>());
     query_kind = parseQueryKind(options["query_kind"].as<std::string>());
     profile_events.print = options.count("print-profile-events");
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 082fe8d5098..9327b694d29 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -71,6 +71,7 @@ Connection::~Connection() = default;
 Connection::Connection(const String & host_, UInt16 port_,
     const String & default_database_,
     const String & user_, const String & password_,
+    const String & proto_send_chunked_, const String & proto_recv_chunked_,
     [[maybe_unused]] const SSHKey & ssh_private_key_,
     const String & quota_key_,
     const String & cluster_,
@@ -80,6 +81,7 @@ Connection::Connection(const String & host_, UInt16 port_,
     Protocol::Secure secure_)
     : host(host_), port(port_), default_database(default_database_)
     , user(user_), password(password_)
+    , proto_send_chunked(proto_send_chunked_), proto_recv_chunked(proto_recv_chunked_)
 #if USE_SSH
     , ssh_private_key(ssh_private_key_)
 #endif
@@ -206,13 +208,46 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         sendHello();
         receiveHello(timeouts.handshake_timeout);
 
+        bool out_chunked = false;
+        bool in_chunked = false;
+
+        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+        {
+            auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)
+            {
+                bool chunked_srv = chunked_srv_str.starts_with("chunked");
+                bool optional_srv = chunked_srv_str.ends_with("_optional");
+                bool chunked_cl = chunked_cl_str.starts_with("chunked");
+                bool optional_cl = chunked_cl_str.ends_with("_optional");
+
+                if (optional_srv)
+                    return chunked_cl;
+                if (optional_cl)
+                    return chunked_srv;
+                if (chunked_cl != chunked_srv)
+                    throw NetException(
+                        ErrorCodes::NETWORK_ERROR,
+                        "Incompatible protocol: {} set to {}, server requires {}",
+                        direction,
+                        chunked_cl ? "chunked" : "notchunked",
+                        chunked_srv ? "chunked" : "notchunked");
+
+                return chunked_srv;
+            };
+
+            out_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send");
+            in_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv");
+        }
+
         if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
             sendAddendum();
 
         if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
         {
-            in->enableChunked();
-            out->enableChunked();
+            if (out_chunked)
+                out->enableChunked();
+            if (in_chunked)
+                in->enableChunked();
         }
 
         LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.",
@@ -359,6 +394,13 @@ void Connection::sendAddendum()
 {
     if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY)
         writeStringBinary(quota_key, *out);
+
+    if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+    {
+        writeStringBinary(proto_send_chunked, *out);
+        writeStringBinary(proto_recv_chunked, *out);
+    }
+
     out->next();
 }
 
@@ -438,6 +480,12 @@ void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
         else
             server_version_patch = server_revision;
 
+        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+        {
+            readStringBinary(proto_send_chunked_srv, *in);
+            readStringBinary(proto_recv_chunked_srv, *in);
+        }
+
         if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
         {
             UInt64 rules_size;
@@ -1327,6 +1375,8 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa
         parameters.default_database,
         parameters.user,
         parameters.password,
+        parameters.proto_send_chunked,
+        parameters.proto_recv_chunked,
         parameters.ssh_private_key,
         parameters.quota_key,
         "", /* cluster */
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index e7a6d948204..a04ccd44627 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -52,6 +52,7 @@ public:
     Connection(const String & host_, UInt16 port_,
         const String & default_database_,
         const String & user_, const String & password_,
+        const String & proto_send_chunked_, const String & proto_recv_chunked_,
         const SSHKey & ssh_private_key_,
         const String & quota_key_,
         const String & cluster_,
@@ -169,6 +170,10 @@ private:
     String default_database;
     String user;
     String password;
+    String proto_send_chunked;
+    String proto_recv_chunked;
+    String proto_send_chunked_srv;
+    String proto_recv_chunked_srv;
 #if USE_SSH
     SSHKey ssh_private_key;
 #endif
diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index 774f3375f63..430c462084a 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -103,6 +103,9 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
 #endif
     }
 
+    proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional");
+    proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional");
+
     quota_key = config.getString("quota_key", "");
 
     /// By default compression is disabled if address looks like localhost.
diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h
index f23522d48b3..85174924016 100644
--- a/src/Client/ConnectionParameters.h
+++ b/src/Client/ConnectionParameters.h
@@ -20,6 +20,8 @@ struct ConnectionParameters
     std::string default_database;
     std::string user;
     std::string password;
+    std::string proto_send_chunked;
+    std::string proto_recv_chunked;
     std::string quota_key;
     SSHKey ssh_private_key;
     Protocol::Secure security = Protocol::Secure::Disable;
diff --git a/src/Client/ConnectionPool.cpp b/src/Client/ConnectionPool.cpp
index 5cabb1465d1..05cb97cadc7 100644
--- a/src/Client/ConnectionPool.cpp
+++ b/src/Client/ConnectionPool.cpp
@@ -12,6 +12,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
     String default_database,
     String user,
     String password,
+    String proto_send_chunked,
+    String proto_recv_chunked,
     String quota_key,
     String cluster,
     String cluster_secret,
@@ -21,7 +23,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
     Priority priority)
 {
     Key key{
-        max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority};
+        max_connections, host, port, default_database, user, password, proto_send_chunked, proto_recv_chunked, quota_key, cluster, cluster_secret, client_name, compression, secure, priority};
 
     std::lock_guard lock(mutex);
     auto [it, inserted] = pools.emplace(key, ConnectionPoolPtr{});
@@ -38,6 +40,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
             default_database,
             user,
             password,
+            proto_send_chunked,
+            proto_recv_chunked,
             quota_key,
             cluster,
             cluster_secret,
diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h
index d35c2552461..2df97dfb454 100644
--- a/src/Client/ConnectionPool.h
+++ b/src/Client/ConnectionPool.h
@@ -72,6 +72,8 @@ public:
         const String & default_database_,
         const String & user_,
         const String & password_,
+        const String & proto_send_chunked_,
+        const String & proto_recv_chunked_,
         const String & quota_key_,
         const String & cluster_,
         const String & cluster_secret_,
@@ -84,6 +86,8 @@ public:
         , default_database(default_database_)
         , user(user_)
         , password(password_)
+        , proto_send_chunked(proto_send_chunked_)
+        , proto_recv_chunked(proto_recv_chunked_)
         , quota_key(quota_key_)
         , cluster(cluster_)
         , cluster_secret(cluster_secret_)
@@ -123,7 +127,9 @@ protected:
     {
         return std::make_shared<Connection>(
             host, port,
-            default_database, user, password, SSHKey(), quota_key,
+            default_database, user, password,
+            proto_send_chunked, proto_recv_chunked,
+            SSHKey(), quota_key,
             cluster, cluster_secret,
             client_name, compression, secure);
     }
@@ -132,6 +138,8 @@ private:
     String default_database;
     String user;
     String password;
+    String proto_send_chunked;
+    String proto_recv_chunked;
     String quota_key;
 
     /// For inter-server authorization
@@ -157,6 +165,8 @@ public:
         String default_database;
         String user;
         String password;
+        String proto_send_chunked;
+        String proto_recv_chunked;
         String quota_key;
         String cluster;
         String cluster_secret;
@@ -180,6 +190,8 @@ public:
         String default_database,
         String user,
         String password,
+        String proto_send_chunked,
+        String proto_recv_chunked,
         String quota_key,
         String cluster,
         String cluster_secret,
@@ -197,6 +209,7 @@ inline bool operator==(const ConnectionPoolFactory::Key & lhs, const ConnectionP
 {
     return lhs.max_connections == rhs.max_connections && lhs.host == rhs.host && lhs.port == rhs.port
         && lhs.default_database == rhs.default_database && lhs.user == rhs.user && lhs.password == rhs.password
+        && lhs.proto_send_chunked == rhs.proto_send_chunked && lhs.proto_recv_chunked == rhs.proto_recv_chunked
         && lhs.quota_key == rhs.quota_key
         && lhs.cluster == rhs.cluster && lhs.cluster_secret == rhs.cluster_secret && lhs.client_name == rhs.client_name
         && lhs.compression == rhs.compression && lhs.secure == rhs.secure && lhs.priority == rhs.priority;
diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp
index bf16f315ddf..3b096da92c6 100644
--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@@ -51,6 +51,8 @@ namespace
             configuration.db,
             configuration.user,
             configuration.password,
+            configuration.proto_send_chunked,
+            configuration.proto_recv_chunked,
             configuration.quota_key,
             "", /* cluster */
             "", /* cluster_secret */
@@ -222,7 +224,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
         {
             validateNamedCollection(
                 *named_collection, {}, ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>{
-                    "secure", "host", "hostname", "port", "user", "username", "password", "quota_key", "name",
+                    "secure", "host", "hostname", "port", "user", "username", "password", "proto_send_chunked", "proto_recv_chunked", "quota_key", "name",
                     "db", "database", "table","query", "where", "invalidate_query", "update_field", "update_lag"});
 
             const auto secure = named_collection->getOrDefault("secure", false);
@@ -234,6 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                 .host = host,
                 .user = named_collection->getAnyOrDefault<String>({"user", "username"}, "default"),
                 .password = named_collection->getOrDefault<String>("password", ""),
+                .proto_send_chunked = named_collection->getOrDefault<String>("proto_send_chunked", "notchunked_optional"),
+                .proto_recv_chunked = named_collection->getOrDefault<String>("proto_recv_chunked", "notchunked_optional"),
                 .quota_key = named_collection->getOrDefault<String>("quota_key", ""),
                 .db = named_collection->getAnyOrDefault<String>({"db", "database"}, default_database),
                 .table = named_collection->getOrDefault<String>("table", ""),
@@ -258,6 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                 .host = host,
                 .user = config.getString(settings_config_prefix + ".user", "default"),
                 .password = config.getString(settings_config_prefix + ".password", ""),
+                .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked_optional"),
+                .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked_optional"),
                 .quota_key = config.getString(settings_config_prefix + ".quota_key", ""),
                 .db = config.getString(settings_config_prefix + ".db", default_database),
                 .table = config.getString(settings_config_prefix + ".table", ""),
diff --git a/src/Dictionaries/ClickHouseDictionarySource.h b/src/Dictionaries/ClickHouseDictionarySource.h
index 3357514eab2..faf9e5f8009 100644
--- a/src/Dictionaries/ClickHouseDictionarySource.h
+++ b/src/Dictionaries/ClickHouseDictionarySource.h
@@ -23,6 +23,8 @@ public:
         const std::string host;
         const std::string user;
         const std::string password;
+        const std::string proto_send_chunked;
+        const std::string proto_recv_chunked;
         const std::string quota_key;
         const std::string db;
         const std::string table;
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 59c98491c14..1d7ccd484d0 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -113,6 +113,9 @@ Cluster::Address::Address(
     secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
     priority = Priority{config.getInt(config_prefix + ".priority", 1)};
 
+    proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked_optional");
+    proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked_optional");
+
     const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
     auto default_port = config.getInt(port_type, 0);
 
@@ -425,7 +428,9 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
             auto pool = ConnectionPoolFactory::instance().get(
                 static_cast<unsigned>(settings.distributed_connections_pool_size),
                 address.host_name, address.port,
-                address.default_database, address.user, address.password, address.quota_key,
+                address.default_database, address.user, address.password,
+                address.proto_send_chunked, address.proto_recv_chunked,
+                address.quota_key,
                 address.cluster, address.cluster_secret,
                 "server", address.compression,
                 address.secure, address.priority);
@@ -589,6 +594,8 @@ void Cluster::addShard(
             replica.default_database,
             replica.user,
             replica.password,
+            replica.proto_send_chunked,
+            replica.proto_recv_chunked,
             replica.quota_key,
             replica.cluster,
             replica.cluster_secret,
@@ -744,6 +751,8 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
                     address.default_database,
                     address.user,
                     address.password,
+                    address.proto_send_chunked,
+                    address.proto_recv_chunked,
                     address.quota_key,
                     address.cluster,
                     address.cluster_secret,
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index dc5790ac339..c993af5fc5e 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -114,6 +114,8 @@ public:
         UInt16 port{0};
         String user;
         String password;
+        String proto_send_chunked;
+        String proto_recv_chunked;
         String quota_key;
 
         /// For inter-server authorization
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 2071eac3a68..c7db25c4c3a 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1,6 +1,7 @@
 #include "Interpreters/AsynchronousInsertQueue.h"
 #include "Interpreters/SquashingTransform.h"
 #include "Parsers/ASTInsertQuery.h"
+#include <base/find_symbols.h>
 #include <algorithm>
 #include <exception>
 #include <iterator>
@@ -99,6 +100,7 @@ namespace DB::ErrorCodes
     extern const int SUPPORT_IS_DISABLED;
     extern const int UNSUPPORTED_METHOD;
     extern const int USER_EXPIRED;
+    extern const int NETWORK_ERROR;
 }
 
 namespace
@@ -279,8 +281,35 @@ void TCPHandler::runImpl()
 
         if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
         {
-            in->enableChunked();
-            out->enableChunked();
+            auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)
+            {
+                bool chunked_srv = chunked_srv_str.starts_with("chunked");
+                bool optional_srv = chunked_srv_str.ends_with("_optional");
+                bool chunked_cl = chunked_cl_str.starts_with("chunked");
+                bool optional_cl = chunked_cl_str.ends_with("_optional");
+
+                if (optional_srv)
+                    return chunked_cl;
+                if (optional_cl)
+                    return chunked_srv;
+                if (chunked_cl != chunked_srv)
+                    throw NetException(
+                        ErrorCodes::NETWORK_ERROR,
+                        "Incompatible protocol: {} is {}, client requested {}",
+                        direction,
+                        chunked_srv ? "chunked" : "notchunked",
+                        chunked_cl ? "chunked" : "notchunked");
+
+                return chunked_srv;
+            };
+
+            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked_optional"), proto_recv_chunked_cl, "send");
+            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked_optional"), proto_send_chunked_cl, "recv");
+
+            if (out_chunked)
+                out->enableChunked();
+            if (in_chunked)
+                in->enableChunked();
         }
 
         if (!is_interserver_mode)
@@ -1575,6 +1604,12 @@ void TCPHandler::receiveAddendum()
 
     if (!is_interserver_mode)
         session->setQuotaClientKey(quota_key);
+
+    if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+    {
+        readStringBinary(proto_send_chunked_cl, *in);
+        readStringBinary(proto_recv_chunked_cl, *in);
+    }
 }
 
 
@@ -1608,6 +1643,11 @@ void TCPHandler::sendHello()
         writeStringBinary(server_display_name, *out);
     if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
         writeVarUInt(VERSION_PATCH, *out);
+    if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+    {
+        writeStringBinary(server.config().getString("proto_caps.send", "notchunked_optional"), *out);
+        writeStringBinary(server.config().getString("proto_caps.recv", "notchunked_optional"), *out);
+    }
     if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
     {
         auto rules = server.context()->getAccessControl().getPasswordComplexityRules();
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 67d77381167..baef92b9fa0 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -188,6 +188,8 @@ private:
     UInt64 client_version_minor = 0;
     UInt64 client_version_patch = 0;
     UInt32 client_tcp_protocol_version = 0;
+    String proto_send_chunked_cl;
+    String proto_recv_chunked_cl;
     String quota_key;
 
     /// Connection settings, which are extracted from a context.
diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
index d471c67553d..dd318f34148 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@@ -273,6 +273,8 @@ ConnectionPoolWithFailoverPtr DistributedAsyncInsertDirectoryQueue::createPool(c
             address.default_database,
             address.user,
             address.password,
+            address.proto_send_chunked,
+            address.proto_recv_chunked,
             address.quota_key,
             address.cluster,
             address.cluster_secret,
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 378b81c6d18..4475e265395 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5664,7 +5664,8 @@ std::optional<QueryPipeline> StorageReplicatedMergeTree::distributedWriteFromClu
         {
             auto connection = std::make_shared<Connection>(
                 node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(),
-                node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret,
+                node.user, node.password, node.proto_send_chunked, node.proto_recv_chunked,
+                SSHKey(), node.quota_key, node.cluster, node.cluster_secret,
                 "ParallelInsertSelectInititiator",
                 node.compression,
                 node.secure

From 147ad42df09f374df971d6bed36ccf67c97d87a9 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 24 May 2024 03:36:29 +0000
Subject: [PATCH 037/265] fix notchunked mode in
 ReadBufferFromPocoSocketChunked

---
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 7 +++++++
 src/IO/ReadBufferFromPocoSocketChunked.h   | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 4d40d8b4f14..a67a5bb41a9 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -44,6 +44,13 @@ void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callb
     buffer_socket.setAsyncCallback(async_callback_);
 }
 
+bool ReadBufferFromPocoSocketChunked::hasBufferedData() const
+{
+    if (chunked)
+        return hasPendingData() || buffer_socket.hasPendingData();
+    return hasPendingData();
+}
+
 bool ReadBufferFromPocoSocketChunked::startChunk()
 {
     if (buffer_socket.read(reinterpret_cast<char *>(&chunk_left), sizeof(chunk_left)) < sizeof(chunk_left))
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index c70363cf7d8..b0f5dd7dc5f 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -16,7 +16,7 @@ public:
     bool poll(size_t timeout_microseconds);
     void setAsyncCallback(AsyncCallback async_callback_);
 
-    bool hasBufferedData() const { return hasPendingData() || buffer_socket.hasPendingData(); }
+    bool hasBufferedData() const;
 
     Poco::Net::SocketAddress peerAddress() { return peer_address; }
     Poco::Net::SocketAddress ourAddress() { return our_address; }

From 89205d78a68879399129b64f78cd27f7602bf373 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 31 May 2024 04:18:36 +0000
Subject: [PATCH 038/265] major refactoring

---
 src/Client/Connection.cpp                  |   2 +-
 src/IO/ReadBufferFromPocoSocket.cpp        |  51 ++----
 src/IO/ReadBufferFromPocoSocket.h          |  20 ++-
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 183 ++++++++++++---------
 src/IO/ReadBufferFromPocoSocketChunked.h   |  98 +++++++++--
 5 files changed, 222 insertions(+), 132 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 9327b694d29..c221124932a 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -1101,7 +1101,7 @@ bool Connection::poll(size_t timeout_microseconds)
 
 bool Connection::hasReadPendingData() const
 {
-    return last_input_packet_type.has_value() || in->hasBufferedData();
+    return last_input_packet_type.has_value() || in->hasPendingData();
 }
 
 
diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 5fb7ea0440c..5c338ef18bc 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -32,9 +32,16 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size)
+ssize_t ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char * ptr, size_t size)
 {
     ssize_t bytes_read = 0;
+    Stopwatch watch;
+
+    SCOPE_EXIT({
+        /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one
+        ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
+        ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read);
+    });
 
     /// Add more details to exceptions.
     try
@@ -49,7 +56,7 @@ size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size)
             socket.setBlocking(false);
             SCOPE_EXIT(socket.setBlocking(true));
             bool secure = socket.secure();
-            bytes_read = socket.impl()->receiveBytes(begin, static_cast<int>(size));
+            bytes_read = socket.impl()->receiveBytes(ptr, static_cast<int>(size));
 
             /// Check EAGAIN and ERR_SSL_WANT_READ/ERR_SSL_WANT_WRITE for secure socket (reading from secure socket can write too).
             while (bytes_read < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(bytes_read) || checkSSLWantWrite(bytes_read)))))
@@ -61,12 +68,12 @@ size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size)
                     async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR);
 
                 /// Try to read again.
-                bytes_read = socket.impl()->receiveBytes(begin, static_cast<int>(size));
+                bytes_read = socket.impl()->receiveBytes(ptr, static_cast<int>(size));
             }
         }
         else
         {
-            bytes_read = socket.impl()->receiveBytes(begin, static_cast<int>(size));
+            bytes_read = socket.impl()->receiveBytes(ptr, static_cast<int>(size));
         }
     }
     catch (const Poco::Net::NetException & e)
@@ -90,36 +97,12 @@ size_t ReadBufferFromPocoSocket::readSocket(Position begin, size_t size)
     return bytes_read;
 }
 
-bool ReadBufferFromPocoSocket::readSocketExact(Position begin, size_t size)
+bool ReadBufferFromPocoSocketBase::nextImpl()
 {
-    for (size_t bytes_left = size; bytes_left > 0;)
-    {
-        size_t ret = readSocket(begin + size - bytes_left, bytes_left);
-        if (ret == 0)
-            return false;
-        bytes_left -= ret;
-    }
-
-    return true;
-}
-
-bool ReadBufferFromPocoSocket::nextImpl()
-{
-    ssize_t bytes_read = 0;
-    Stopwatch watch;
-
-    SCOPE_EXIT({
-        /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one
-        ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
-        ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read);
-    });
-
-    CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive);
-
     if (internal_buffer.size() > INT_MAX)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow");
 
-    bytes_read = readSocket(internal_buffer.begin(), internal_buffer.size());
+    ssize_t bytes_read = socketReceiveBytesImpl(internal_buffer.begin(), internal_buffer.size());
 
     if (read_event != ProfileEvents::end())
         ProfileEvents::increment(read_event, bytes_read);
@@ -132,7 +115,7 @@ bool ReadBufferFromPocoSocket::nextImpl()
     return true;
 }
 
-ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
+ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size)
     : BufferWithOwnMemory<ReadBuffer>(buf_size)
     , socket(socket_)
     , peer_address(socket.peerAddress())
@@ -141,13 +124,13 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_,
 {
 }
 
-ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
-    : ReadBufferFromPocoSocket(socket_, buf_size)
+ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
+    : ReadBufferFromPocoSocketBase(socket_, buf_size)
 {
     read_event = read_event_;
 }
 
-bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const
+bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const
 {
     if (available())
         return true;
diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h
index c40a54ed7ae..a36bea6d679 100644
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@@ -9,7 +9,7 @@ namespace DB
 {
 
 /// Works with the ready Poco::Net::Socket. Blocking operations.
-class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>
+class ReadBufferFromPocoSocketBase : public BufferWithOwnMemory<ReadBuffer>
 {
 protected:
     Poco::Net::Socket & socket;
@@ -25,19 +25,29 @@ protected:
     bool nextImpl() override;
 
 public:
-    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
-    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
 
     bool poll(size_t timeout_microseconds) const;
 
     void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
 
-    size_t readSocket(Position begin, size_t size);
-    bool readSocketExact(Position begin, size_t size);
+    ssize_t socketReceiveBytesImpl(char * ptr, size_t size);
 
 private:
     AsyncCallback async_callback;
     std::string socket_description;
 };
 
+class ReadBufferFromPocoSocket : public ReadBufferFromPocoSocketBase
+{
+public:
+    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
+        : ReadBufferFromPocoSocketBase(socket_, buf_size = DBMS_DEFAULT_BUFFER_SIZE)
+    {}
+    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
+        : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size)
+    {}
+};
+
 }
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index a67a5bb41a9..3cc8710407e 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -16,126 +16,149 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock
 {}
 
 ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
-    : ReadBuffer(nullptr, 0), log(getLogger("Protocol")), peer_address(socket_.peerAddress()), our_address(socket_.address()), buffer_socket(socket_, read_event_, buf_size)
+    : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size), our_address(socket_.address()), log(getLogger("Protocol"))
+
 {
     chassert(buf_size <= std::numeric_limits<decltype(chunk_left)>::max());
-
-    working_buffer = buffer_socket.buffer();
-    pos = buffer_socket.position();
 }
 
 void ReadBufferFromPocoSocketChunked::enableChunked()
 {
-    chunked = true;
-    buffer_socket.position() = pos;
+    if (chunked)
+        return;
+    chunked = 1;
+    data_end = buffer().end();
     working_buffer.resize(offset());
+    chunk_left = 0;
+    next_chunk = 0;
 }
 
-bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds)
-{
-    if (!chunked)
-        buffer_socket.position() = pos;
-
-    return buffer_socket.poll(timeout_microseconds);
-}
-
-void ReadBufferFromPocoSocketChunked::setAsyncCallback(AsyncCallback async_callback_)
-{
-    buffer_socket.setAsyncCallback(async_callback_);
-}
-
-bool ReadBufferFromPocoSocketChunked::hasBufferedData() const
+bool ReadBufferFromPocoSocketChunked::hasPendingData() const
 {
     if (chunked)
-        return hasPendingData() || buffer_socket.hasPendingData();
-    return hasPendingData();
+        return available() || static_cast<size_t>(data_end - working_buffer.end()) > sizeof(next_chunk);
+
+    return ReadBufferFromPocoSocketBase::hasPendingData();
 }
 
-bool ReadBufferFromPocoSocketChunked::startChunk()
+bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const
 {
-    if (buffer_socket.read(reinterpret_cast<char *>(&chunk_left), sizeof(chunk_left)) < sizeof(chunk_left))
-        return false;
-    if (chunk_left == 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received");
+    if (chunked)
+        if (available() || static_cast<size_t>(data_end - working_buffer.end()) > sizeof(next_chunk))
+            return true;
 
-    chunk_left = fromLittleEndian(chunk_left);
-
-    return nextChunk();
+    return ReadBufferFromPocoSocketBase::poll(timeout_microseconds);
 }
 
-bool ReadBufferFromPocoSocketChunked::nextChunk()
-{
-    if (chunk_left == 0)
-    {
-        started = true;
-        return startChunk();
-    }
 
-    if (buffer_socket.available() == 0)
-        if (!buffer_socket.next())
+bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont)
+{
+    auto buffered = std::min(static_cast<size_t>(data_end - c_pos), sizeof(next_chunk));
+
+    if (buffered)
+        std::memcpy(&next_chunk, c_pos, buffered);
+    if (buffered < sizeof(next_chunk))
+        if (socketReceiveBytesImpl(reinterpret_cast<char *>(&next_chunk) + buffered, sizeof(next_chunk) - buffered) < static_cast<ssize_t>(sizeof(next_chunk) - buffered))
             return false;
-    if (started)
-        LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast<unsigned int>(*buffer_socket.position()), chunk_left);
-    else
-        LOG_TEST(log, "Packet receive continued. Size {}", chunk_left);
+    next_chunk = fromLittleEndian(next_chunk);
 
-    started = false;
-
-    nextimpl_working_buffer_offset = buffer_socket.offset();
-
-    if (buffer_socket.available() < chunk_left)
+    if (next_chunk)
     {
-        working_buffer.resize(buffer_socket.offset() + buffer_socket.available());
-        chunk_left -= buffer_socket.available();
-        buffer_socket.position() += buffer_socket.available();
+        if (cont)
+            LOG_TEST(log, "Packet receive continued. Size {}", next_chunk);
+    }
+    else
+        LOG_TEST(log, "Packet receive ended.");
+
+    return true;
+}
+
+bool ReadBufferFromPocoSocketChunked::process_chunk_left(Position c_pos)
+{
+    if (data_end - c_pos < chunk_left)
+    {
+        working_buffer.resize(data_end - buffer().begin());
+        nextimpl_working_buffer_offset = c_pos - buffer().begin();
+        chunk_left -= (data_end - c_pos);
         return true;
     }
 
-    working_buffer.resize(buffer_socket.offset() + chunk_left);
-    UInt8 buffered = std::min(static_cast<size_t>(4), buffer_socket.available() - chunk_left);
+    nextimpl_working_buffer_offset = c_pos - buffer().begin();
+    working_buffer.resize(nextimpl_working_buffer_offset + chunk_left);
 
-    buffer_socket.position() += chunk_left;
-    if (buffered > 0)
-        std::memcpy(&chunk_left, buffer_socket.position(), buffered);
-    buffer_socket.position() += buffered;
+    c_pos += chunk_left;
 
-    if (4 > buffered)
-        if (!buffer_socket.readSocketExact(reinterpret_cast<Position>(&chunk_left) + buffered, 4 - buffered))
-            return false;
-
-    chunk_left = fromLittleEndian(chunk_left);
-
-    if (chunk_left == 0)
-        LOG_TEST(log, "Packet receive ended.");
+    if (!load_next_chunk(c_pos, true))
+        return false;
 
+    chunk_left = 0;
     return true;
 }
 
 
 bool ReadBufferFromPocoSocketChunked::nextImpl()
 {
-    if (chunked)
+    if (!chunked)
+        return ReadBufferFromPocoSocketBase::nextImpl();
+
+    auto c_pos = pos;
+
+    if (chunk_left == 0)
     {
-        if (!nextChunk())
+        if (next_chunk == 0)
         {
-            pos = buffer_socket.position();
-            return false;
+            if (chunked == 1)
+                chunked = 2; // first chunked block - no end marker
+            else
+                c_pos = pos + sizeof(next_chunk); // bypass chunk end marker
+
+            if (c_pos > data_end)
+                c_pos = data_end;
+
+            if (!load_next_chunk(c_pos))
+                return false;
+
+            chunk_left = next_chunk;
+            next_chunk = 0;
+
+            c_pos += sizeof(next_chunk);
+
+            if (c_pos >= data_end)
+            {
+                if (!ReadBufferFromPocoSocketBase::nextImpl())
+                    return false;
+                data_end = buffer().end();
+                c_pos = buffer().begin();
+            }
+
+            LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast<unsigned int>(*c_pos), chunk_left);
+        }
+        else
+        {
+            c_pos += sizeof(next_chunk);
+            if (c_pos >= data_end)
+            {
+                if (!ReadBufferFromPocoSocketBase::nextImpl())
+                    return false;
+                data_end = buffer().end();
+                c_pos = buffer().begin();
+            }
+
+            chunk_left = next_chunk;
+            next_chunk = 0;
         }
-        return true;
     }
-
-    buffer_socket.position() = pos;
-
-    if (!buffer_socket.next())
+    else
     {
-        pos = buffer_socket.position();
-        return false;
+        chassert(c_pos == data_end);
+
+        if (!ReadBufferFromPocoSocketBase::nextImpl())
+            return false;
+        data_end = buffer().end();
+        c_pos = buffer().begin();
     }
 
-    pos = buffer_socket.position();
-    working_buffer.resize(offset() + buffer_socket.available());
-
-    return true;
+    return process_chunk_left(c_pos);
 }
 
 }
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index b0f5dd7dc5f..851a90042ac 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -3,37 +3,111 @@
 #include <IO/ReadBuffer.h>
 #include <IO/ReadBufferFromPocoSocket.h>
 
+/*
+
+Handshake              +=============
+                       | 'Hello' type
+                       |   handshake exchange
+                       |     chunked protocol negotiation
+                       +=============
+
+
+Basic chunk:
+                       +=============
+Chunk begins           | 0x12345678      chunk size, 4 bytes little endian
+                       +-------------
+                       | Packet type     always follows beginning of the chunk
+                       |   packet data
+                       +-------------
+Chunk ends             | 0x00000000      4 zero bytes
+                       +=============
+
+
+
+
+Datastream chunk:
+                       +=============
+Chunk begins           | 0x12345678
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+...arbitrary number        .....
+of packets...              .....
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+Chunk ends             | 0x00000000
+                       +=============
+
+
+
+Multipart chunk:
+                       +=============
+Chunk begins           | 0x12345678      chunk part size, 4 bytes little endian
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+                       | Packet type
+                       |   (partial) packet data
+                       +=============
+Chunk continues        | 0x12345678      chunk next part size, 4 bytes little endian
+                       +=============
+                       |   possibly previous packet's data
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+...arbitrary number        .....
+of chunk parts...          .....
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+Chunk ends             | 0x00000000
+                       +=============
+
+*/
+
 namespace DB
 {
 
-class ReadBufferFromPocoSocketChunked: public ReadBuffer
+class ReadBufferFromPocoSocketChunked: public ReadBufferFromPocoSocketBase
 {
 public:
+    using ReadBufferFromPocoSocketBase::setAsyncCallback;
+
     explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
     explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
 
     void enableChunked();
-    bool poll(size_t timeout_microseconds);
-    void setAsyncCallback(AsyncCallback async_callback_);
 
-    bool hasBufferedData() const;
+    bool hasPendingData() const;
+
+    bool poll(size_t timeout_microseconds) const;
 
     Poco::Net::SocketAddress peerAddress() { return peer_address; }
     Poco::Net::SocketAddress ourAddress() { return our_address; }
 
 protected:
-    bool startChunk();
-    bool nextChunk();
+    bool load_next_chunk(Position c_pos, bool cont = false);
+    bool process_chunk_left(Position c_pos);
     bool nextImpl() override;
 
+protected:
+    Poco::Net::SocketAddress our_address;
+
 private:
     LoggerPtr log;
-    Poco::Net::SocketAddress peer_address;
-    Poco::Net::SocketAddress our_address;
-    ReadBufferFromPocoSocket buffer_socket;
-    bool chunked = false;
-    UInt32 chunk_left = 0; // chunk left to read from socket
-    bool started = false;
+    Position data_end = nullptr; // end position of data in the internal_buffer
+    UInt32 chunk_left = 0;       // chunk left to read from socket
+    UInt32 next_chunk = 0;       // size of the next cnunk
+    UInt8 chunked = 0;           // 0 - disabled; 1 - started; 2 - enabled;
 };
 
 }

From 4545f3af52d8046cd2a1b54fc22fd0d592a48a31 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 31 May 2024 04:35:01 +0000
Subject: [PATCH 039/265] fix

---
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 3cc8710407e..59c56b9d008 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -121,6 +121,9 @@ bool ReadBufferFromPocoSocketChunked::nextImpl()
             chunk_left = next_chunk;
             next_chunk = 0;
 
+            if (chunk_left == 0)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received");
+
             c_pos += sizeof(next_chunk);
 
             if (c_pos >= data_end)

From d1bc58f23254ca781b6645bafb9c7cdf00326a04 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 31 May 2024 05:05:18 +0000
Subject: [PATCH 040/265] fix

---
 src/IO/ReadBufferFromPocoSocket.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 5c338ef18bc..af58efc7e10 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -43,6 +43,8 @@ ssize_t ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char * ptr, size_t
         ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read);
     });
 
+    CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive);
+
     /// Add more details to exceptions.
     try
     {

From 1dc381dbc1f0b7b53d8707b9515a0d3f6ad3f442 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 31 May 2024 05:07:40 +0000
Subject: [PATCH 041/265] fix

---
 src/IO/ReadBufferFromPocoSocketChunked.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index 851a90042ac..749ee042a7c 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -23,8 +23,6 @@ Chunk ends             | 0x00000000      4 zero bytes
                        +=============
 
 
-
-
 Datastream chunk:
                        +=============
 Chunk begins           | 0x12345678
@@ -45,7 +43,6 @@ Chunk ends             | 0x00000000
                        +=============
 
 
-
 Multipart chunk:
                        +=============
 Chunk begins           | 0x12345678      chunk part size, 4 bytes little endian

From fdccba97a3c7d1097034bc6b0994b7f37bc5721e Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 31 May 2024 06:35:04 +0000
Subject: [PATCH 042/265] set chunked for testing

---
 src/Client/ConnectionParameters.cpp | 4 ++--
 src/Server/TCPHandler.cpp           | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index 430c462084a..b6ed242acd4 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -103,8 +103,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
 #endif
     }
 
-    proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional");
-    proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional");
+    proto_send_chunked = config.getString("proto_caps.send", "chunked");
+    proto_recv_chunked = config.getString("proto_caps.recv", "chunked");
 
     quota_key = config.getString("quota_key", "");
 
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index c7db25c4c3a..47e5f982a93 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -303,8 +303,8 @@ void TCPHandler::runImpl()
                 return chunked_srv;
             };
 
-            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked_optional"), proto_recv_chunked_cl, "send");
-            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked_optional"), proto_send_chunked_cl, "recv");
+            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked"), proto_recv_chunked_cl, "send");
+            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked"), proto_send_chunked_cl, "recv");
 
             if (out_chunked)
                 out->enableChunked();
@@ -1645,8 +1645,8 @@ void TCPHandler::sendHello()
         writeVarUInt(VERSION_PATCH, *out);
     if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
     {
-        writeStringBinary(server.config().getString("proto_caps.send", "notchunked_optional"), *out);
-        writeStringBinary(server.config().getString("proto_caps.recv", "notchunked_optional"), *out);
+        writeStringBinary(server.config().getString("proto_caps.send", "chunked"), *out);
+        writeStringBinary(server.config().getString("proto_caps.recv", "chunked"), *out);
     }
     if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
     {

From e3d57ab117391c3b99a8937783320a8c59e0b196 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 31 May 2024 16:05:42 +0000
Subject: [PATCH 043/265] set default protocol to notchunked_optional for
 cluster clients

---
 src/Interpreters/Cluster.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index c993af5fc5e..f3146ac0134 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -114,8 +114,8 @@ public:
         UInt16 port{0};
         String user;
         String password;
-        String proto_send_chunked;
-        String proto_recv_chunked;
+        String proto_send_chunked = "notchunked_optional";
+        String proto_recv_chunked = "notchunked_optional";
         String quota_key;
 
         /// For inter-server authorization

From f11f41491087099c63ee9f98b6bf8a27a8e87ed9 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 2 Jun 2024 07:25:48 +0000
Subject: [PATCH 044/265] fix special case of testing feature for chunked
 protocol

---
 src/Server/TCPHandler.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 47e5f982a93..da276e1c404 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -2268,16 +2268,26 @@ void TCPHandler::sendData(const Block & block)
         }
 
         writeVarUInt(Protocol::Server::Data, *out);
-        /// Send external table name (empty name is the main table)
-        writeStringBinary("", *out);
 
         /// For testing hedged requests
         if (block.rows() > 0 && query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds())
         {
+            /// This strange sequence is needed in case of chunked protocol is enabled, in order for client not to
+            /// hang on recieving of at least packet type - chunk will not be processed unless either chunk footer
+            /// or chunk continuation header is recieved - first 'next' is sending starting chunk containing packet type
+            /// and second 'next' is sending chunk continuation header.
+            out->next();
+            /// Send external table name (empty name is the main table)
+            writeStringBinary("", *out);
             out->next();
             std::chrono::milliseconds ms(query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds());
             std::this_thread::sleep_for(ms);
         }
+        else
+        {
+            /// Send external table name (empty name is the main table)
+            writeStringBinary("", *out);
+        }
 
         state.block_out->write(block);
         state.maybe_compressed_out->next();

From eaeabd8d374e2e28a6208fb9ea1ea7835676c7e5 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 2 Jun 2024 13:03:48 +0000
Subject: [PATCH 045/265] fix typos

---
 src/Server/TCPHandler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index da276e1c404..1a64ec1dd10 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -2273,8 +2273,8 @@ void TCPHandler::sendData(const Block & block)
         if (block.rows() > 0 && query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds())
         {
             /// This strange sequence is needed in case of chunked protocol is enabled, in order for client not to
-            /// hang on recieving of at least packet type - chunk will not be processed unless either chunk footer
-            /// or chunk continuation header is recieved - first 'next' is sending starting chunk containing packet type
+            /// hang on receiving of at least packet type - chunk will not be processed unless either chunk footer
+            /// or chunk continuation header is received - first 'next' is sending starting chunk containing packet type
             /// and second 'next' is sending chunk continuation header.
             out->next();
             /// Send external table name (empty name is the main table)

From e0be652f4de803198b406dcbda5b1f1ac6938a9c Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 3 Jun 2024 07:24:28 +0000
Subject: [PATCH 046/265] fix test, better log, fix defaults for client

---
 src/Client/ConnectionParameters.cpp            | 4 ++--
 src/Client/ConnectionParameters.h              | 4 ++--
 src/IO/ReadBufferFromPocoSocketChunked.cpp     | 6 +++---
 src/IO/WriteBufferFromPocoSocketChunked.h      | 6 +++---
 tests/integration/test_hedged_requests/test.py | 2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index b6ed242acd4..430c462084a 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -103,8 +103,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
 #endif
     }
 
-    proto_send_chunked = config.getString("proto_caps.send", "chunked");
-    proto_recv_chunked = config.getString("proto_caps.recv", "chunked");
+    proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional");
+    proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional");
 
     quota_key = config.getString("quota_key", "");
 
diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h
index 85174924016..52fe7bd9b2b 100644
--- a/src/Client/ConnectionParameters.h
+++ b/src/Client/ConnectionParameters.h
@@ -20,8 +20,8 @@ struct ConnectionParameters
     std::string default_database;
     std::string user;
     std::string password;
-    std::string proto_send_chunked;
-    std::string proto_recv_chunked;
+    std::string proto_send_chunked = "notchunked_optional";
+    std::string proto_recv_chunked = "notchunked_optional";
     std::string quota_key;
     SSHKey ssh_private_key;
     Protocol::Secure security = Protocol::Secure::Disable;
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 59c56b9d008..328b70bdb9b 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -65,10 +65,10 @@ bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont)
     if (next_chunk)
     {
         if (cont)
-            LOG_TEST(log, "Packet receive continued. Size {}", next_chunk);
+            LOG_TEST(log, "{} <- {} Chunk receive continued. Size {}", ourAddress().toString(), peerAddress().toString(), next_chunk);
     }
     else
-        LOG_TEST(log, "Packet receive ended.");
+        LOG_TEST(log, "{} <- {} Chunk receive ended.", ourAddress().toString(), peerAddress().toString());
 
     return true;
 }
@@ -134,7 +134,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl()
                 c_pos = buffer().begin();
             }
 
-            LOG_TEST(log, "Packet receive started. Message {}, size {}", static_cast<unsigned int>(*c_pos), chunk_left);
+            LOG_TEST(log, "{} <- {} Chunk receive started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast<unsigned int>(*c_pos), chunk_left);
         }
         else
         {
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 6c35db62c0c..7c6ab53dc91 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -30,7 +30,7 @@ public:
         if (finished)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk");
 
-        LOG_TEST(log, "Packet send ended.");
+        LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
         finished = true;
 
         UInt32 s = 0;
@@ -43,9 +43,9 @@ protected:
         {
             UInt32 s = static_cast<UInt32>(offset());
             if (finished)
-                LOG_TEST(log, "Packet send started. Message {}, size {}", static_cast<unsigned int>(*buffer().begin()), s);
+                LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast<unsigned int>(*buffer().begin()), s);
             else
-                LOG_TEST(log, "Packet send continued. Size {}", s);
+                LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), s);
 
             finished = false;
             s = toLittleEndian(s);
diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py
index 02ecf3c1367..0d72f7c45b1 100644
--- a/tests/integration/test_hedged_requests/test.py
+++ b/tests/integration/test_hedged_requests/test.py
@@ -333,7 +333,7 @@ def test_receive_timeout2(started_cluster):
     # in packet receiving but there are replicas in process of
     # connection establishing.
     update_configs(
-        node_1_sleep_in_send_data=4000,
+        node_1_sleep_in_send_data=5000,
         node_2_sleep_in_send_tables_status=2000,
         node_3_sleep_in_send_tables_status=2000,
     )

From 66e387562659e9712088e09427d4c050e9f22c1f Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 3 Jun 2024 09:55:51 +0000
Subject: [PATCH 047/265] fix tidy build

---
 src/IO/ReadBufferFromPocoSocket.h        | 2 +-
 src/IO/ReadBufferFromPocoSocketChunked.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h
index a36bea6d679..912388adaac 100644
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@@ -43,7 +43,7 @@ class ReadBufferFromPocoSocket : public ReadBufferFromPocoSocketBase
 {
 public:
     explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
-        : ReadBufferFromPocoSocketBase(socket_, buf_size = DBMS_DEFAULT_BUFFER_SIZE)
+        : ReadBufferFromPocoSocketBase(socket_, buf_size)
     {}
     explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
         : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size)
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index 749ee042a7c..acf0edafe0a 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -96,7 +96,6 @@ protected:
     bool process_chunk_left(Position c_pos);
     bool nextImpl() override;
 
-protected:
     Poco::Net::SocketAddress our_address;
 
 private:

From 1cda4596adfc9ca384a28da80a91159641952e36 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 3 Jun 2024 11:51:01 +0000
Subject: [PATCH 048/265] fix tidy build

---
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 328b70bdb9b..6ed6b63289c 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -101,7 +101,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl()
     if (!chunked)
         return ReadBufferFromPocoSocketBase::nextImpl();
 
-    auto c_pos = pos;
+    auto * c_pos = pos;
 
     if (chunk_left == 0)
     {

From a562118d2a5b66955f44d393949eccb0e8c3b8b7 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 7 Jun 2024 01:45:56 +0000
Subject: [PATCH 049/265] major refactoring of chunked write buffer - more
 buffering, some bugs fixed

---
 src/Client/Connection.cpp                  |  23 +++--
 src/IO/ReadBufferFromPocoSocketChunked.cpp |   2 -
 src/IO/WriteBuffer.h                       |   8 +-
 src/IO/WriteBufferFromPocoSocketChunked.h  | 114 +++++++++++++++++----
 src/Server/TCPHandler.cpp                  |  38 +++----
 5 files changed, 134 insertions(+), 51 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index c221124932a..9f727b974ee 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -625,7 +625,7 @@ bool Connection::ping(const ConnectionTimeouts & timeouts)
 
         UInt64 pong = 0;
         writeVarUInt(Protocol::Client::Ping, *out);
-        out->finishPacket();
+        out->finishChunk();
         out->next();
 
         if (in->eof())
@@ -675,7 +675,7 @@ TablesStatusResponse Connection::getTablesStatus(const ConnectionTimeouts & time
 
     writeVarUInt(Protocol::Client::TablesStatusRequest, *out);
     request.write(*out, server_revision);
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 
     UInt64 response_type = 0;
@@ -827,7 +827,7 @@ void Connection::sendQuery(
     block_profile_events_in.reset();
     block_out.reset();
 
-    out->finishPacket();
+    out->finishChunk();
 
     /// Send empty block which means end of data.
     if (!with_pending_data)
@@ -845,7 +845,7 @@ void Connection::sendCancel()
         return;
 
     writeVarUInt(Protocol::Client::Cancel, *out);
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -871,9 +871,10 @@ void Connection::sendData(const Block & block, const String & name, bool scalar)
     size_t prev_bytes = out->count();
 
     block_out->write(block);
-    maybe_compressed_out->next();
+    if (maybe_compressed_out != out)
+        maybe_compressed_out->next();
     if (!block)
-        out->finishPacket();
+        out->finishChunk();
     out->next();
 
     if (throttler)
@@ -884,7 +885,7 @@ void Connection::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
 {
     writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out);
     writeVectorBinary(uuids, *out);
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -894,7 +895,7 @@ void Connection::sendReadTaskResponse(const String & response)
     writeVarUInt(Protocol::Client::ReadTaskResponse, *out);
     writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out);
     writeStringBinary(response, *out);
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -903,7 +904,7 @@ void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & resp
 {
     writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out);
     response.serialize(*out);
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -922,7 +923,7 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
     else
         copyData(input, *out, size);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -951,7 +952,7 @@ void Connection::sendScalarsData(Scalars & data)
         sendData(elem.second, elem.first, true /* scalar */);
     }
 
-    out->finishPacket();
+    out->finishChunk();
 
     out_bytes = out->count() - out_bytes;
     maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 6ed6b63289c..798be547e99 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -153,8 +153,6 @@ bool ReadBufferFromPocoSocketChunked::nextImpl()
     }
     else
     {
-        chassert(c_pos == data_end);
-
         if (!ReadBufferFromPocoSocketBase::nextImpl())
             return false;
         data_end = buffer().end();
diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index 1ceb938e454..bb3200d2e54 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -63,7 +63,8 @@ public:
         }
 
         bytes += bytes_in_buffer;
-        pos = working_buffer.begin();
+        pos = working_buffer.begin() + nextimpl_working_buffer_offset;
+        nextimpl_working_buffer_offset = 0;
     }
 
     /// Calling finalize() in the destructor of derived classes is a bad practice.
@@ -152,6 +153,11 @@ protected:
 
     bool finalized = false;
 
+    /// The number of bytes to preserve from the initial position of `working_buffer`
+    /// buffer. Apparently this is an additional out-parameter for nextImpl(),
+    /// not a real field.
+    size_t nextimpl_working_buffer_offset = 0;
+
 private:
     /** Write the data in the buffer (from the beginning of the buffer to the current position).
       * Throw an exception if something is wrong.
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 7c6ab53dc91..3fe39487923 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -19,40 +19,114 @@ public:
     explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {}
     explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {}
 
-    void enableChunked() { chunked = true; }
-    void finishPacket()
+    void enableChunked()
+    {
+        chunked = true;
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+        pos += std::min(available(), sizeof(*chunk_size_ptr));
+    }
+
+    void finishChunk()
     {
         if (!chunked)
             return;
 
-        next();
-
-        if (finished)
+        if (pos <= reinterpret_cast<Position>(chunk_size_ptr) + sizeof(*chunk_size_ptr))
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk");
 
-        LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
-        finished = true;
+        /// Fill up current chunk size
+        *chunk_size_ptr = toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr)));
 
-        UInt32 s = 0;
-        socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
+        if (!chunk_started)
+            LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
+                    ourAddress().toString(), peerAddress().toString(),
+                    static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
+                    *chunk_size_ptr);
+        else
+            chunk_started = false;
+
+        LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
+
+        if (available() < sizeof(*chunk_size_ptr))
+        {
+            finishing = available();
+            pos += available();
+            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+            return;
+        }
+
+        /// Buffer end-of-chunk
+        *reinterpret_cast<decltype(chunk_size_ptr)>(pos) = 0;
+        pos += sizeof(*chunk_size_ptr);
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+        pos += std::min(available(), sizeof(*chunk_size_ptr));
     }
+
 protected:
     void nextImpl() override
     {
-        if (chunked)
-        {
-            UInt32 s = static_cast<UInt32>(offset());
-            if (finished)
-                LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast<unsigned int>(*buffer().begin()), s);
-            else
-                LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), s);
+        if (!chunked)
+            return WriteBufferFromPocoSocket::nextImpl();
 
-            finished = false;
-            s = toLittleEndian(s);
+        if (finishing < sizeof(*chunk_size_ptr))
+        {
+            pos -= finishing;
+            /// Send current chunk
+            WriteBufferFromPocoSocket::nextImpl();
+            /// Send end-of-chunk directly
+            UInt32 s = 0;
             socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
+
+            finishing = sizeof(*chunk_size_ptr);
+
+            /// Initialize next chunk
+            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+            return;
         }
 
+        if (offset() == sizeof(*chunk_size_ptr)) // prevent sending empty chunk
+        {
+            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+            return;
+        }
+
+        if (working_buffer.end() - reinterpret_cast<Position>(chunk_size_ptr) <= static_cast<std::ptrdiff_t>(sizeof(*chunk_size_ptr)))
+        {
+            pos = reinterpret_cast<Position>(chunk_size_ptr);
+            /// Send current chunk
+            WriteBufferFromPocoSocket::nextImpl();
+            /// Initialize next chunk
+            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+            return;
+        }
+
+        if (pos - reinterpret_cast<Position>(chunk_size_ptr) == sizeof(*chunk_size_ptr))
+            pos -= sizeof(*chunk_size_ptr);
+        else /// Fill up current chunk size
+        {
+            *chunk_size_ptr = toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr)));
+            if (!chunk_started)
+            {
+                chunk_started = true;
+                LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
+                        ourAddress().toString(), peerAddress().toString(),
+                        static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
+                        *chunk_size_ptr);
+            }
+            else
+                LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
+        }
+        /// Send current chunk
         WriteBufferFromPocoSocket::nextImpl();
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
     }
 
     Poco::Net::SocketAddress peerAddress()
@@ -67,7 +141,9 @@ protected:
 private:
     LoggerPtr log;
     bool chunked = false;
-    bool finished = true;
+    bool chunk_started = false; // chunk started flag
+    UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer 
+    size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker
 };
 
 }
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 1a64ec1dd10..89ad8e856d5 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -1188,7 +1188,7 @@ void TCPHandler::processTablesStatusRequest()
 
     response.write(*out, client_tcp_protocol_version);
 
-    out->finishPacket();
+    out->finishChunk();
 }
 
 void TCPHandler::receiveUnexpectedTablesStatusRequest()
@@ -1210,7 +1210,7 @@ void TCPHandler::sendPartUUIDs()
         writeVarUInt(Protocol::Server::PartUUIDs, *out);
         writeVectorBinary(uuids, *out);
 
-        out->finishPacket();
+        out->finishChunk();
         out->next();
     }
 }
@@ -1220,7 +1220,7 @@ void TCPHandler::sendReadTaskRequestAssumeLocked()
 {
     writeVarUInt(Protocol::Server::ReadTaskRequest, *out);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -1230,7 +1230,7 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges
     writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out);
     announcement.serialize(*out);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -1240,7 +1240,7 @@ void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest re
     writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out);
     request.serialize(*out);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -1250,7 +1250,7 @@ void TCPHandler::sendProfileInfo(const ProfileInfo & info)
     writeVarUInt(Protocol::Server::ProfileInfo, *out);
     info.write(*out);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -1267,7 +1267,7 @@ void TCPHandler::sendTotals(const Block & totals)
         state.block_out->write(totals);
         state.maybe_compressed_out->next();
 
-        out->finishPacket();
+        out->finishChunk();
         out->next();
     }
 }
@@ -1285,7 +1285,7 @@ void TCPHandler::sendExtremes(const Block & extremes)
         state.block_out->write(extremes);
         state.maybe_compressed_out->next();
 
-        out->finishPacket();
+        out->finishChunk();
         out->next();
     }
 }
@@ -1304,7 +1304,7 @@ void TCPHandler::sendProfileEvents()
 
         state.profile_events_block_out->write(block);
 
-        out->finishPacket();
+        out->finishChunk();
         out->next();
 
         auto elapsed_milliseconds = stopwatch.elapsedMilliseconds();
@@ -1343,7 +1343,7 @@ void TCPHandler::sendTimezone()
     writeVarUInt(Protocol::Server::TimezoneUpdate, *out);
     writeStringBinary(tz, *out);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -1700,7 +1700,7 @@ bool TCPHandler::receivePacket()
 
         case Protocol::Client::Ping:
             writeVarUInt(Protocol::Server::Pong, *out);
-            out->finishPacket();
+            out->finishChunk();
             out->next();
             return false;
 
@@ -2290,9 +2290,11 @@ void TCPHandler::sendData(const Block & block)
         }
 
         state.block_out->write(block);
-        state.maybe_compressed_out->next();
 
-        out->finishPacket();
+        if (state.maybe_compressed_out != out)
+            state.maybe_compressed_out->next();
+
+        out->finishChunk();
         out->next();
     }
     catch (...)
@@ -2329,7 +2331,7 @@ void TCPHandler::sendLogData(const Block & block)
 
     state.logs_block_out->write(block);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -2341,7 +2343,7 @@ void TCPHandler::sendTableColumns(const ColumnsDescription & columns)
     writeStringBinary("", *out);
     writeStringBinary(columns.toString(), *out);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -2352,7 +2354,7 @@ void TCPHandler::sendException(const Exception & e, bool with_stack_trace)
     writeVarUInt(Protocol::Server::Exception, *out);
     writeException(e, *out, with_stack_trace);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -2364,7 +2366,7 @@ void TCPHandler::sendEndOfStream()
 
     writeVarUInt(Protocol::Server::EndOfStream, *out);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 
@@ -2384,7 +2386,7 @@ void TCPHandler::sendProgress()
     state.prev_elapsed_ns = current_elapsed_ns;
     increment.write(*out, client_tcp_protocol_version);
 
-    out->finishPacket();
+    out->finishChunk();
     out->next();
 }
 

From 390a2a2488bdd20a87400ec3f5851dfde0f1bac0 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 7 Jun 2024 02:06:26 +0000
Subject: [PATCH 050/265] fix style

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 3fe39487923..9a9d53a1f30 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -142,7 +142,7 @@ private:
     LoggerPtr log;
     bool chunked = false;
     bool chunk_started = false; // chunk started flag
-    UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer 
+    UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer
     size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker
 };
 

From 11d9f7d51b2cd658c495adb11c3b32f6fc5a8cc6 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 7 Jun 2024 12:07:35 +0000
Subject: [PATCH 051/265] allow to set end-of-chunk marker on sent chunk,
 ignore duplicate finish chunk

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 51 +++++++++++++++++++++--
 1 file changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 9a9d53a1f30..40a89416f84 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include "base/defines.h"
 #include <Common/logger_useful.h>
 #include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/NetUtils.h>
@@ -33,7 +34,26 @@ public:
             return;
 
         if (pos <= reinterpret_cast<Position>(chunk_size_ptr) + sizeof(*chunk_size_ptr))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: attempt to send empty chunk");
+        {
+            if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk
+                return;
+            
+            /// If current chunk is empty it means we are finishing a chunk previously sent by next(),
+            /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk.
+            /// We don't need to wary about if it's the end of the buffer because next() always sends the whole buffer
+            /// so it should be a beginning of the buffer.
+
+            chassert(reinterpret_cast<Position>(chunk_size_ptr) == working_buffer.begin());
+
+            *chunk_size_ptr = 0;
+            /// Initialize next chunk
+            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+            pos += std::min(available(), sizeof(*chunk_size_ptr));
+
+            last_finish_chunk = chunk_size_ptr;
+
+            return;
+        }
 
         /// Fill up current chunk size
         *chunk_size_ptr = toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr)));
@@ -62,6 +82,8 @@ public:
         /// Initialize next chunk
         chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
         pos += std::min(available(), sizeof(*chunk_size_ptr));
+
+        last_finish_chunk = chunk_size_ptr;
     }
 
 protected:
@@ -70,6 +92,7 @@ protected:
         if (!chunked)
             return WriteBufferFromPocoSocket::nextImpl();
 
+        /// next() after finishChunk ar the end of the buffer
         if (finishing < sizeof(*chunk_size_ptr))
         {
             pos -= finishing;
@@ -85,15 +108,34 @@ protected:
             chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
             nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
 
+            last_finish_chunk = chunk_size_ptr;
+
             return;
         }
 
-        if (offset() == sizeof(*chunk_size_ptr)) // prevent sending empty chunk
+        /// Send end-of-chunk buffered by finishChunk
+        if (offset() == 2 * sizeof(*chunk_size_ptr))
+        {
+            pos -= sizeof(*chunk_size_ptr);
+            /// Send end-of-chunk
+            WriteBufferFromPocoSocket::nextImpl();
+            /// Initialize next chunk
+            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+            last_finish_chunk = chunk_size_ptr;
+
+            return;
+        }
+
+        /// Prevent sending empty chunk
+        if (offset() == sizeof(*chunk_size_ptr))
         {
             nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
             return;
         }
 
+        /// Finish chunk at the end of the buffer
         if (working_buffer.end() - reinterpret_cast<Position>(chunk_size_ptr) <= static_cast<std::ptrdiff_t>(sizeof(*chunk_size_ptr)))
         {
             pos = reinterpret_cast<Position>(chunk_size_ptr);
@@ -106,9 +148,9 @@ protected:
             return;
         }
 
-        if (pos - reinterpret_cast<Position>(chunk_size_ptr) == sizeof(*chunk_size_ptr))
+        if (pos - reinterpret_cast<Position>(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk
             pos -= sizeof(*chunk_size_ptr);
-        else /// Fill up current chunk size
+        else // fill up current chunk size
         {
             *chunk_size_ptr = toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr)));
             if (!chunk_started)
@@ -141,6 +183,7 @@ protected:
 private:
     LoggerPtr log;
     bool chunked = false;
+    UInt32 * last_finish_chunk = nullptr; // pointer to the last chunk header created by finishChunk
     bool chunk_started = false; // chunk started flag
     UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer
     size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker

From d2dd640beb3ff917352135477e349fd1d379f38e Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 7 Jun 2024 12:25:46 +0000
Subject: [PATCH 052/265] fix style

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 40a89416f84..d1ba492738e 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -9,11 +9,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket
 {
 public:
@@ -37,7 +32,7 @@ public:
         {
             if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk
                 return;
-            
+
             /// If current chunk is empty it means we are finishing a chunk previously sent by next(),
             /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk.
             /// We don't need to wary about if it's the end of the buffer because next() always sends the whole buffer

From 740501b36e58c08d3a6a52348c9b0411d0f5dd90 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 7 Jun 2024 18:23:37 +0000
Subject: [PATCH 053/265] some potential bug fixes

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index d1ba492738e..689389ba2ea 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -109,7 +109,7 @@ protected:
         }
 
         /// Send end-of-chunk buffered by finishChunk
-        if (offset() == 2 * sizeof(*chunk_size_ptr))
+        if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr)
         {
             pos -= sizeof(*chunk_size_ptr);
             /// Send end-of-chunk
@@ -140,6 +140,8 @@ protected:
             chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
             nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
 
+            last_finish_chunk = nullptr;
+
             return;
         }
 
@@ -164,6 +166,8 @@ protected:
         /// Initialize next chunk
         chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
         nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+        last_finish_chunk = nullptr;
     }
 
     Poco::Net::SocketAddress peerAddress()

From 90b5ad3613ea7e3b4dea202975407569d0aaee84 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 9 Jun 2024 19:31:20 +0000
Subject: [PATCH 054/265] fix tidy build

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 689389ba2ea..ecc33180140 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -85,7 +85,10 @@ protected:
     void nextImpl() override
     {
         if (!chunked)
-            return WriteBufferFromPocoSocket::nextImpl();
+        {
+            WriteBufferFromPocoSocket::nextImpl();
+            return;
+        }
 
         /// next() after finishChunk ar the end of the buffer
         if (finishing < sizeof(*chunk_size_ptr))

From fb49cf503e4159549348c76ebf9c3ca686b9f02f Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 11 Jun 2024 16:47:05 +0000
Subject: [PATCH 055/265] some fixes

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 31 ++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index ecc33180140..4325ab2bd4b 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -35,7 +35,7 @@ public:
 
             /// If current chunk is empty it means we are finishing a chunk previously sent by next(),
             /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk.
-            /// We don't need to wary about if it's the end of the buffer because next() always sends the whole buffer
+            /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer
             /// so it should be a beginning of the buffer.
 
             chassert(reinterpret_cast<Position>(chunk_size_ptr) == working_buffer.begin());
@@ -50,6 +50,13 @@ public:
             return;
         }
 
+        /// Previously finished chunk wasn't sent yet
+        if (last_finish_chunk == chunk_size_ptr)
+        {
+            chunk_started = false;
+            LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
+        }
+
         /// Fill up current chunk size
         *chunk_size_ptr = toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr)));
 
@@ -59,7 +66,10 @@ public:
                     static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
                     *chunk_size_ptr);
         else
+        {
             chunk_started = false;
+            LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
+        }
 
         LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
 
@@ -81,6 +91,18 @@ public:
         last_finish_chunk = chunk_size_ptr;
     }
 
+    ~WriteBufferFromPocoSocketChunked() override
+    {
+        try
+        {
+            finalize();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+
 protected:
     void nextImpl() override
     {
@@ -173,6 +195,13 @@ protected:
         last_finish_chunk = nullptr;
     }
 
+    void finalizeImpl() override
+    {
+        if (offset() == sizeof(*chunk_size_ptr))
+            pos -= sizeof(*chunk_size_ptr);
+        WriteBufferFromPocoSocket::finalizeImpl();
+    }
+
     Poco::Net::SocketAddress peerAddress()
     {
         return peer_address;

From ba76a06f5677e7de556781a4c06cc947f392e0c5 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 14 Jun 2024 01:35:08 +0000
Subject: [PATCH 056/265] potentially very serious bug is fixed for secure
 socket

---
 src/IO/ReadBufferFromPocoSocket.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index af58efc7e10..6361fed01dd 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -134,11 +134,14 @@ ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & s
 
 bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const
 {
-    if (available())
+    /// For secure socket it is important to check if any remaining data available in underlying decryption buffer -
+    /// read always retrives the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size -
+    /// further poll() can block though there is still data to read in the underlying decryption buffer.
+    if (available() || socket.impl()->available())
         return true;
 
     Stopwatch watch;
-    bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
+    bool res = socket.impl()->poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
     ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
     return res;
 }

From 97aea863767a58fd65274777913865201ea906e3 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 14 Jun 2024 01:56:05 +0000
Subject: [PATCH 057/265] fix style

---
 src/IO/ReadBufferFromPocoSocket.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 6361fed01dd..bbf9f96404f 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -135,7 +135,7 @@ ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & s
 bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const
 {
     /// For secure socket it is important to check if any remaining data available in underlying decryption buffer -
-    /// read always retrives the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size -
+    /// read always retrieves the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size -
     /// further poll() can block though there is still data to read in the underlying decryption buffer.
     if (available() || socket.impl()->available())
         return true;

From 14a13d54c0ff56b0e6326ac75bb7136e44d814d1 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 14 Jun 2024 15:56:14 +0000
Subject: [PATCH 058/265] fix UB misaligned address

---
 src/IO/WriteBufferFromPocoSocketChunked.h | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 4325ab2bd4b..c668ea2c505 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -6,6 +6,18 @@
 #include <IO/NetUtils.h>
 
 
+namespace
+{
+
+template <typename T>
+const T & setValue(T * typed_ptr, std::type_identity_t<T> val)
+{
+    memcpy(typed_ptr, &val, sizeof(T));
+    return *typed_ptr;
+}
+
+}
+
 namespace DB
 {
 
@@ -40,7 +52,7 @@ public:
 
             chassert(reinterpret_cast<Position>(chunk_size_ptr) == working_buffer.begin());
 
-            *chunk_size_ptr = 0;
+            setValue(chunk_size_ptr, 0);
             /// Initialize next chunk
             chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
             pos += std::min(available(), sizeof(*chunk_size_ptr));
@@ -58,7 +70,7 @@ public:
         }
 
         /// Fill up current chunk size
-        *chunk_size_ptr = toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr)));
+        setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
 
         if (!chunk_started)
             LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
@@ -174,7 +186,7 @@ protected:
             pos -= sizeof(*chunk_size_ptr);
         else // fill up current chunk size
         {
-            *chunk_size_ptr = toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr)));
+            setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
             if (!chunk_started)
             {
                 chunk_started = true;

From 5b082051451356b2c1d3152489e5d51cd75d2d6a Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 15 Jun 2024 00:22:51 +0000
Subject: [PATCH 059/265] some refactoring

---
 src/IO/WriteBufferFromPocoSocketChunked.cpp | 207 ++++++++++++++++++
 src/IO/WriteBufferFromPocoSocketChunked.h   | 220 +-------------------
 2 files changed, 217 insertions(+), 210 deletions(-)
 create mode 100644 src/IO/WriteBufferFromPocoSocketChunked.cpp

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp
new file mode 100644
index 00000000000..324f8ae3a02
--- /dev/null
+++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp
@@ -0,0 +1,207 @@
+#include <IO/WriteBufferFromPocoSocketChunked.h>
+#include <Common/logger_useful.h>
+#include <IO/NetUtils.h>
+
+
+namespace
+{
+
+template <typename T>
+const T & setValue(T * typed_ptr, std::type_identity_t<T> val)
+{
+    memcpy(typed_ptr, &val, sizeof(T));
+    return *typed_ptr;
+}
+
+}
+
+namespace DB
+{
+
+void WriteBufferFromPocoSocketChunked::enableChunked()
+{
+    chunked = true;
+    /// Initialize next chunk
+    chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+    pos += std::min(available(), sizeof(*chunk_size_ptr));
+}
+
+void WriteBufferFromPocoSocketChunked::finishChunk()
+{
+    if (!chunked)
+        return;
+
+    if (pos <= reinterpret_cast<Position>(chunk_size_ptr) + sizeof(*chunk_size_ptr))
+    {
+        if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk
+            return;
+
+        /// If current chunk is empty it means we are finishing a chunk previously sent by next(),
+        /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk.
+        /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer
+        /// so it should be a beginning of the buffer.
+
+        chassert(reinterpret_cast<Position>(chunk_size_ptr) == working_buffer.begin());
+
+        setValue(chunk_size_ptr, 0);
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+        pos += std::min(available(), sizeof(*chunk_size_ptr));
+
+        last_finish_chunk = chunk_size_ptr;
+
+        return;
+    }
+
+    /// Previously finished chunk wasn't sent yet
+    if (last_finish_chunk == chunk_size_ptr)
+    {
+        chunk_started = false;
+        LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
+    }
+
+    /// Fill up current chunk size
+    setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
+
+    if (!chunk_started)
+        LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
+                ourAddress().toString(), peerAddress().toString(),
+                static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
+                *chunk_size_ptr);
+    else
+    {
+        chunk_started = false;
+        LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
+    }
+
+    LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
+
+    if (available() < sizeof(*chunk_size_ptr))
+    {
+        finishing = available();
+        pos += available();
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+        return;
+    }
+
+    /// Buffer end-of-chunk
+    *reinterpret_cast<decltype(chunk_size_ptr)>(pos) = 0;
+    pos += sizeof(*chunk_size_ptr);
+    /// Initialize next chunk
+    chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+    pos += std::min(available(), sizeof(*chunk_size_ptr));
+
+    last_finish_chunk = chunk_size_ptr;
+}
+
+WriteBufferFromPocoSocketChunked::~WriteBufferFromPocoSocketChunked()
+{
+    try
+    {
+        finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
+
+void WriteBufferFromPocoSocketChunked::nextImpl()
+{
+    if (!chunked)
+    {
+        WriteBufferFromPocoSocket::nextImpl();
+        return;
+    }
+
+    /// next() after finishChunk ar the end of the buffer
+    if (finishing < sizeof(*chunk_size_ptr))
+    {
+        pos -= finishing;
+        /// Send current chunk
+        WriteBufferFromPocoSocket::nextImpl();
+        /// Send end-of-chunk directly
+        UInt32 s = 0;
+        socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
+
+        finishing = sizeof(*chunk_size_ptr);
+
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+        last_finish_chunk = chunk_size_ptr;
+
+        return;
+    }
+
+    /// Send end-of-chunk buffered by finishChunk
+    if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr)
+    {
+        pos -= sizeof(*chunk_size_ptr);
+        /// Send end-of-chunk
+        WriteBufferFromPocoSocket::nextImpl();
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+        last_finish_chunk = chunk_size_ptr;
+
+        return;
+    }
+
+    /// Prevent sending empty chunk
+    if (offset() == sizeof(*chunk_size_ptr))
+    {
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+        return;
+    }
+
+    /// Finish chunk at the end of the buffer
+    if (working_buffer.end() - reinterpret_cast<Position>(chunk_size_ptr) <= static_cast<std::ptrdiff_t>(sizeof(*chunk_size_ptr)))
+    {
+        pos = reinterpret_cast<Position>(chunk_size_ptr);
+        /// Send current chunk
+        WriteBufferFromPocoSocket::nextImpl();
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+        last_finish_chunk = nullptr;
+
+        return;
+    }
+
+    if (pos - reinterpret_cast<Position>(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk
+        pos -= sizeof(*chunk_size_ptr);
+    else // fill up current chunk size
+    {
+        setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
+        if (!chunk_started)
+        {
+            chunk_started = true;
+            LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
+                    ourAddress().toString(), peerAddress().toString(),
+                    static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
+                    *chunk_size_ptr);
+        }
+        else
+            LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
+    }
+    /// Send current chunk
+    WriteBufferFromPocoSocket::nextImpl();
+    /// Initialize next chunk
+    chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+    nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+    last_finish_chunk = nullptr;
+}
+
+void WriteBufferFromPocoSocketChunked::finalizeImpl()
+{
+    if (offset() == sizeof(*chunk_size_ptr))
+        pos -= sizeof(*chunk_size_ptr);
+    WriteBufferFromPocoSocket::finalizeImpl();
+}
+
+}
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index c668ea2c505..269c6d66dda 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -1,23 +1,9 @@
 #pragma once
 
-#include "base/defines.h"
 #include <Common/logger_useful.h>
 #include <IO/WriteBufferFromPocoSocket.h>
-#include <IO/NetUtils.h>
 
 
-namespace
-{
-
-template <typename T>
-const T & setValue(T * typed_ptr, std::type_identity_t<T> val)
-{
-    memcpy(typed_ptr, &val, sizeof(T));
-    return *typed_ptr;
-}
-
-}
-
 namespace DB
 {
 
@@ -27,208 +13,22 @@ public:
     explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {}
     explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {}
 
-    void enableChunked()
-    {
-        chunked = true;
-        /// Initialize next chunk
-        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
-        pos += std::min(available(), sizeof(*chunk_size_ptr));
-    }
-
-    void finishChunk()
-    {
-        if (!chunked)
-            return;
-
-        if (pos <= reinterpret_cast<Position>(chunk_size_ptr) + sizeof(*chunk_size_ptr))
-        {
-            if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk
-                return;
-
-            /// If current chunk is empty it means we are finishing a chunk previously sent by next(),
-            /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk.
-            /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer
-            /// so it should be a beginning of the buffer.
-
-            chassert(reinterpret_cast<Position>(chunk_size_ptr) == working_buffer.begin());
-
-            setValue(chunk_size_ptr, 0);
-            /// Initialize next chunk
-            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
-            pos += std::min(available(), sizeof(*chunk_size_ptr));
-
-            last_finish_chunk = chunk_size_ptr;
-
-            return;
-        }
-
-        /// Previously finished chunk wasn't sent yet
-        if (last_finish_chunk == chunk_size_ptr)
-        {
-            chunk_started = false;
-            LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
-        }
-
-        /// Fill up current chunk size
-        setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
-
-        if (!chunk_started)
-            LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
-                    ourAddress().toString(), peerAddress().toString(),
-                    static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
-                    *chunk_size_ptr);
-        else
-        {
-            chunk_started = false;
-            LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
-        }
-
-        LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
-
-        if (available() < sizeof(*chunk_size_ptr))
-        {
-            finishing = available();
-            pos += available();
-            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
-            return;
-        }
-
-        /// Buffer end-of-chunk
-        *reinterpret_cast<decltype(chunk_size_ptr)>(pos) = 0;
-        pos += sizeof(*chunk_size_ptr);
-        /// Initialize next chunk
-        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
-        pos += std::min(available(), sizeof(*chunk_size_ptr));
-
-        last_finish_chunk = chunk_size_ptr;
-    }
-
-    ~WriteBufferFromPocoSocketChunked() override
-    {
-        try
-        {
-            finalize();
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
-    }
+    void enableChunked();
+    void finishChunk();
+    ~WriteBufferFromPocoSocketChunked() override;
 
 protected:
-    void nextImpl() override
-    {
-        if (!chunked)
-        {
-            WriteBufferFromPocoSocket::nextImpl();
-            return;
-        }
+    void nextImpl() override;
+    void finalizeImpl() override;
+    Poco::Net::SocketAddress peerAddress() const { return peer_address; }
+    Poco::Net::SocketAddress ourAddress() const { return our_address; }
 
-        /// next() after finishChunk ar the end of the buffer
-        if (finishing < sizeof(*chunk_size_ptr))
-        {
-            pos -= finishing;
-            /// Send current chunk
-            WriteBufferFromPocoSocket::nextImpl();
-            /// Send end-of-chunk directly
-            UInt32 s = 0;
-            socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
-
-            finishing = sizeof(*chunk_size_ptr);
-
-            /// Initialize next chunk
-            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
-            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
-
-            last_finish_chunk = chunk_size_ptr;
-
-            return;
-        }
-
-        /// Send end-of-chunk buffered by finishChunk
-        if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr)
-        {
-            pos -= sizeof(*chunk_size_ptr);
-            /// Send end-of-chunk
-            WriteBufferFromPocoSocket::nextImpl();
-            /// Initialize next chunk
-            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
-            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
-
-            last_finish_chunk = chunk_size_ptr;
-
-            return;
-        }
-
-        /// Prevent sending empty chunk
-        if (offset() == sizeof(*chunk_size_ptr))
-        {
-            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
-            return;
-        }
-
-        /// Finish chunk at the end of the buffer
-        if (working_buffer.end() - reinterpret_cast<Position>(chunk_size_ptr) <= static_cast<std::ptrdiff_t>(sizeof(*chunk_size_ptr)))
-        {
-            pos = reinterpret_cast<Position>(chunk_size_ptr);
-            /// Send current chunk
-            WriteBufferFromPocoSocket::nextImpl();
-            /// Initialize next chunk
-            chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
-            nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
-
-            last_finish_chunk = nullptr;
-
-            return;
-        }
-
-        if (pos - reinterpret_cast<Position>(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk
-            pos -= sizeof(*chunk_size_ptr);
-        else // fill up current chunk size
-        {
-            setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
-            if (!chunk_started)
-            {
-                chunk_started = true;
-                LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
-                        ourAddress().toString(), peerAddress().toString(),
-                        static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
-                        *chunk_size_ptr);
-            }
-            else
-                LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
-        }
-        /// Send current chunk
-        WriteBufferFromPocoSocket::nextImpl();
-        /// Initialize next chunk
-        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
-        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
-
-        last_finish_chunk = nullptr;
-    }
-
-    void finalizeImpl() override
-    {
-        if (offset() == sizeof(*chunk_size_ptr))
-            pos -= sizeof(*chunk_size_ptr);
-        WriteBufferFromPocoSocket::finalizeImpl();
-    }
-
-    Poco::Net::SocketAddress peerAddress()
-    {
-        return peer_address;
-    }
-
-    Poco::Net::SocketAddress ourAddress()
-    {
-        return our_address;
-    }
 private:
     LoggerPtr log;
     bool chunked = false;
-    UInt32 * last_finish_chunk = nullptr; // pointer to the last chunk header created by finishChunk
-    bool chunk_started = false; // chunk started flag
-    UInt32 * chunk_size_ptr = nullptr; // pointer to the chunk size holder in the buffer
+    UInt32 * last_finish_chunk = nullptr;       // pointer to the last chunk header created by finishChunk
+    bool chunk_started = false;                 // chunk started flag
+    UInt32 * chunk_size_ptr = nullptr;          // pointer to the chunk size holder in the buffer
     size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker
 };
 

From aadf1536a40bd53c6a1b6359cf652854f134599b Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 25 Jun 2024 22:28:01 +0000
Subject: [PATCH 060/265] fix protocol

---
 src/IO/WriteBufferFromPocoSocketChunked.cpp | 30 ++++++++-------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp
index 324f8ae3a02..a83b976ae09 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.cpp
+++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp
@@ -24,6 +24,8 @@ void WriteBufferFromPocoSocketChunked::enableChunked()
     /// Initialize next chunk
     chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
     pos += std::min(available(), sizeof(*chunk_size_ptr));
+    /// Pretend finishChunk() was just called to prevent sending empty chunk if finishChunk() called immediately
+    last_finish_chunk = chunk_size_ptr;
 }
 
 void WriteBufferFromPocoSocketChunked::finishChunk()
@@ -33,7 +35,8 @@ void WriteBufferFromPocoSocketChunked::finishChunk()
 
     if (pos <= reinterpret_cast<Position>(chunk_size_ptr) + sizeof(*chunk_size_ptr))
     {
-        if (chunk_size_ptr == last_finish_chunk) // prevent duplicate finish chunk
+        /// Prevent duplicate finish chunk (and finish chunk right after enableChunked())
+        if (chunk_size_ptr == last_finish_chunk)
             return;
 
         /// If current chunk is empty it means we are finishing a chunk previously sent by next(),
@@ -85,7 +88,7 @@ void WriteBufferFromPocoSocketChunked::finishChunk()
     }
 
     /// Buffer end-of-chunk
-    *reinterpret_cast<decltype(chunk_size_ptr)>(pos) = 0;
+    setValue(reinterpret_cast<decltype(chunk_size_ptr)>(pos), 0);
     pos += sizeof(*chunk_size_ptr);
     /// Initialize next chunk
     chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
@@ -114,7 +117,7 @@ void WriteBufferFromPocoSocketChunked::nextImpl()
         return;
     }
 
-    /// next() after finishChunk ar the end of the buffer
+    /// next() after finishChunk at the end of the buffer
     if (finishing < sizeof(*chunk_size_ptr))
     {
         pos -= finishing;
@@ -135,21 +138,6 @@ void WriteBufferFromPocoSocketChunked::nextImpl()
         return;
     }
 
-    /// Send end-of-chunk buffered by finishChunk
-    if (offset() == 2 * sizeof(*chunk_size_ptr) && last_finish_chunk == chunk_size_ptr)
-    {
-        pos -= sizeof(*chunk_size_ptr);
-        /// Send end-of-chunk
-        WriteBufferFromPocoSocket::nextImpl();
-        /// Initialize next chunk
-        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
-        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
-
-        last_finish_chunk = chunk_size_ptr;
-
-        return;
-    }
-
     /// Prevent sending empty chunk
     if (offset() == sizeof(*chunk_size_ptr))
     {
@@ -172,8 +160,12 @@ void WriteBufferFromPocoSocketChunked::nextImpl()
         return;
     }
 
+    bool initialize_last_finish_chunk = false;
     if (pos - reinterpret_cast<Position>(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk
+    {
         pos -= sizeof(*chunk_size_ptr);
+        initialize_last_finish_chunk = true;
+    }
     else // fill up current chunk size
     {
         setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
@@ -194,7 +186,7 @@ void WriteBufferFromPocoSocketChunked::nextImpl()
     chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
     nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
 
-    last_finish_chunk = nullptr;
+    last_finish_chunk = initialize_last_finish_chunk ? chunk_size_ptr : nullptr;
 }
 
 void WriteBufferFromPocoSocketChunked::finalizeImpl()

From 9eec8344279082a3d02583c092f3c90b85a76fa3 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 26 Jun 2024 03:19:16 +0000
Subject: [PATCH 061/265] better chunked protocol negotiation, comments, review
 suggestions

---
 src/Client/Connection.cpp                  | 37 +++++++++++++++-------
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 11 ++++---
 src/IO/ReadBufferFromPocoSocketChunked.h   |  4 +--
 src/IO/WriteBufferFromPocoSocketChunked.h  | 10 ++++--
 src/Server/TCPHandler.cpp                  | 17 +++++++---
 src/Server/TCPHandler.h                    |  4 +--
 6 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 9f727b974ee..c41229c7226 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -208,11 +208,20 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         sendHello();
         receiveHello(timeouts.handshake_timeout);
 
-        bool out_chunked = false;
-        bool in_chunked = false;
-
         if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
         {
+            /// Client side of chunked protocol negotiation.
+            /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending
+            /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional.
+            /// Not optional types are strict meaning that server only supports this type, optional means that
+            /// server prefer this type but capable to work in opposite.
+            /// Client selects which type it is going to communicate based on the settings from config or arguments,
+            /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake.
+            /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example
+            /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case
+            /// client should interrup this connection. However if client continues with incompatible protocol type request, server
+            /// will send appropriate exception and disconnect client.
+            
             auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)
             {
                 bool chunked_srv = chunked_srv_str.starts_with("chunked");
@@ -235,20 +244,24 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
                 return chunked_srv;
             };
 
-            out_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send");
-            in_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv");
+            proto_send_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send") ? "chunked" : "notchunked";
+            proto_recv_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv") ? "chunked" : "notchunked";
+        }
+        else
+        {
+            if (proto_send_chunked == "chunked" || proto_recv_chunked == "chunked")
+                throw NetException(
+                        ErrorCodes::NETWORK_ERROR,
+                        "Incompatible protocol: server's version is too old and doesn't support chunked protocol while client settings require it.");
         }
 
         if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
             sendAddendum();
 
-        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
-        {
-            if (out_chunked)
-                out->enableChunked();
-            if (in_chunked)
-                in->enableChunked();
-        }
+        if (proto_send_chunked == "chunked")
+            out->enableChunked();
+        if (proto_recv_chunked == "chunked")
+            in->enableChunked();
 
         LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.",
             server_name, server_version_major, server_version_minor, server_version_patch);
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 798be547e99..07598f2adf4 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -28,6 +28,7 @@ void ReadBufferFromPocoSocketChunked::enableChunked()
         return;
     chunked = 1;
     data_end = buffer().end();
+    /// Resize working buffer so any next read will call nextImpl
     working_buffer.resize(offset());
     chunk_left = 0;
     next_chunk = 0;
@@ -51,7 +52,7 @@ bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const
 }
 
 
-bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont)
+bool ReadBufferFromPocoSocketChunked::loadNextChunk(Position c_pos, bool cont)
 {
     auto buffered = std::min(static_cast<size_t>(data_end - c_pos), sizeof(next_chunk));
 
@@ -73,7 +74,7 @@ bool ReadBufferFromPocoSocketChunked::load_next_chunk(Position c_pos, bool cont)
     return true;
 }
 
-bool ReadBufferFromPocoSocketChunked::process_chunk_left(Position c_pos)
+bool ReadBufferFromPocoSocketChunked::processChunkLeft(Position c_pos)
 {
     if (data_end - c_pos < chunk_left)
     {
@@ -88,7 +89,7 @@ bool ReadBufferFromPocoSocketChunked::process_chunk_left(Position c_pos)
 
     c_pos += chunk_left;
 
-    if (!load_next_chunk(c_pos, true))
+    if (!loadNextChunk(c_pos, true))
         return false;
 
     chunk_left = 0;
@@ -115,7 +116,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl()
             if (c_pos > data_end)
                 c_pos = data_end;
 
-            if (!load_next_chunk(c_pos))
+            if (!loadNextChunk(c_pos))
                 return false;
 
             chunk_left = next_chunk;
@@ -159,7 +160,7 @@ bool ReadBufferFromPocoSocketChunked::nextImpl()
         c_pos = buffer().begin();
     }
 
-    return process_chunk_left(c_pos);
+    return processChunkLeft(c_pos);
 }
 
 }
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index acf0edafe0a..943a50f5d08 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -92,8 +92,8 @@ public:
     Poco::Net::SocketAddress ourAddress() { return our_address; }
 
 protected:
-    bool load_next_chunk(Position c_pos, bool cont = false);
-    bool process_chunk_left(Position c_pos);
+    bool loadNextChunk(Position c_pos, bool cont = false);
+    bool processChunkLeft(Position c_pos);
     bool nextImpl() override;
 
     Poco::Net::SocketAddress our_address;
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 269c6d66dda..8270ca445c9 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -10,8 +10,14 @@ namespace DB
 class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket
 {
 public:
-    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol")) {}
-    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol")) {}
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol"))
+    {
+        chassert(buf_size <= std::numeric_limits<std::remove_reference_t<decltype(*chunk_size_ptr)>>::max() && buf_size > sizeof(*chunk_size_ptr));
+    }
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol"))
+    {
+        chassert(buf_size <= std::numeric_limits<std::remove_reference_t<decltype(*chunk_size_ptr)>>::max() && buf_size > sizeof(*chunk_size_ptr));
+    }
 
     void enableChunked();
     void finishChunk();
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 960860a3c13..3093c508c22 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -277,19 +277,28 @@ void TCPHandler::runImpl()
         if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
             receiveAddendum();
 
-        if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
         {
+            /// Server side of chunked protocol negotiation.
+            /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending
+            /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional.
+            /// Not optional types are strict meaning that server only supports this type, optional means that
+            /// server prefer this type but capable to work in opposite.
+            /// Client selects which type it is going to communicate based on the settings from config or arguments,
+            /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake.
+            /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example
+            /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case
+            /// client should interrup this connection. However if client continues with incompatible protocol type request, server
+            /// will send appropriate exception and disconnect client.
+
             auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)
             {
                 bool chunked_srv = chunked_srv_str.starts_with("chunked");
                 bool optional_srv = chunked_srv_str.ends_with("_optional");
                 bool chunked_cl = chunked_cl_str.starts_with("chunked");
-                bool optional_cl = chunked_cl_str.ends_with("_optional");
 
                 if (optional_srv)
                     return chunked_cl;
-                if (optional_cl)
-                    return chunked_srv;
+
                 if (chunked_cl != chunked_srv)
                     throw NetException(
                         ErrorCodes::NETWORK_ERROR,
diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h
index 88c8fc6d52c..f6400161041 100644
--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@@ -187,8 +187,8 @@ private:
     UInt64 client_version_minor = 0;
     UInt64 client_version_patch = 0;
     UInt32 client_tcp_protocol_version = 0;
-    String proto_send_chunked_cl;
-    String proto_recv_chunked_cl;
+    String proto_send_chunked_cl = "notchunked";
+    String proto_recv_chunked_cl = "notchunked";
     String quota_key;
 
     /// Connection settings, which are extracted from a context.

From 6112ef710c2d949c3c8824fcf0e7c148f5deaea4 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 26 Jun 2024 03:43:28 +0000
Subject: [PATCH 062/265] fix style

---
 src/Client/Connection.cpp | 8 ++++----
 src/Server/TCPHandler.cpp | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index c41229c7226..14ffff10081 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -211,17 +211,17 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
         if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
         {
             /// Client side of chunked protocol negotiation.
-            /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending
+            /// Server advertises its protocol capabilities (separate for send and receive channels) by sending
             /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional.
             /// Not optional types are strict meaning that server only supports this type, optional means that
             /// server prefer this type but capable to work in opposite.
             /// Client selects which type it is going to communicate based on the settings from config or arguments,
             /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake.
             /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example
-            /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case
-            /// client should interrup this connection. However if client continues with incompatible protocol type request, server
+            /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case
+            /// client should interrupt this connection. However if client continues with incompatible protocol type request, server
             /// will send appropriate exception and disconnect client.
-            
+
             auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)
             {
                 bool chunked_srv = chunked_srv_str.starts_with("chunked");
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 3093c508c22..d5afb624e77 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -279,15 +279,15 @@ void TCPHandler::runImpl()
 
         {
             /// Server side of chunked protocol negotiation.
-            /// Server advertises its protocol capabilities (separate for send and recieve channels) by sending
+            /// Server advertises its protocol capabilities (separate for send and receive channels) by sending
             /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional.
             /// Not optional types are strict meaning that server only supports this type, optional means that
             /// server prefer this type but capable to work in opposite.
             /// Client selects which type it is going to communicate based on the settings from config or arguments,
             /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake.
             /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example
-            /// server strictly requires chunked protocol but client's settings only allowes notchunked protocol) - in such case
-            /// client should interrup this connection. However if client continues with incompatible protocol type request, server
+            /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case
+            /// client should interrupt this connection. However if client continues with incompatible protocol type request, server
             /// will send appropriate exception and disconnect client.
 
             auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)

From 3f3305a63a1218dc944ac7b3a8540f084a57a039 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 26 Jun 2024 04:33:52 +0000
Subject: [PATCH 063/265] fix server settings

---
 src/Server/TCPHandler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index d5afb624e77..40fd3848455 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -310,8 +310,8 @@ void TCPHandler::runImpl()
                 return chunked_srv;
             };
 
-            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked"), proto_recv_chunked_cl, "send");
-            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked"), proto_send_chunked_cl, "recv");
+            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked_optional"), proto_recv_chunked_cl, "send");
+            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked_optional"), proto_send_chunked_cl, "recv");
 
             if (out_chunked)
                 out->enableChunked();

From 32e6bed4ee8aecf97ddd289ca869f8da096d58af Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 26 Jun 2024 14:04:33 +0000
Subject: [PATCH 064/265] bug fix, ubsan paranoia fix

---
 src/IO/WriteBufferFromPocoSocketChunked.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp
index a83b976ae09..b6d9efda815 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.cpp
+++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp
@@ -7,10 +7,9 @@ namespace
 {
 
 template <typename T>
-const T & setValue(T * typed_ptr, std::type_identity_t<T> val)
+void setValue(T * typed_ptr, std::type_identity_t<T> val)
 {
-    memcpy(typed_ptr, &val, sizeof(T));
-    return *typed_ptr;
+    memcpy(static_cast<void*>(typed_ptr), &val, sizeof(T));
 }
 
 }
@@ -84,6 +83,7 @@ void WriteBufferFromPocoSocketChunked::finishChunk()
         finishing = available();
         pos += available();
         chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+        last_finish_chunk = chunk_size_ptr;
         return;
     }
 

From 30a9c38c9596b40555c8ec041257b53cd10b9abc Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 26 Jun 2024 20:43:13 +0000
Subject: [PATCH 065/265] fix buffer size check

---
 src/IO/ReadBufferFromPocoSocketChunked.cpp  | 10 +++++-----
 src/IO/WriteBufferFromPocoSocketChunked.cpp | 11 +++++++++++
 src/IO/WriteBufferFromPocoSocketChunked.h   | 11 +++--------
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 07598f2adf4..93afeadba60 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -16,11 +16,11 @@ ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Sock
 {}
 
 ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
-    : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size), our_address(socket_.address()), log(getLogger("Protocol"))
-
-{
-    chassert(buf_size <= std::numeric_limits<decltype(chunk_left)>::max());
-}
+    : ReadBufferFromPocoSocketBase(
+        socket_, read_event_,
+        std::min(buf_size, static_cast<size_t>(std::numeric_limits<decltype(chunk_left)>::max()))),
+        our_address(socket_.address()), log(getLogger("Protocol"))
+{}
 
 void ReadBufferFromPocoSocketChunked::enableChunked()
 {
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp
index b6d9efda815..98c5126c24b 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.cpp
+++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp
@@ -17,6 +17,17 @@ void setValue(T * typed_ptr, std::type_identity_t<T> val)
 namespace DB
 {
 
+WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size)
+    : WriteBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size)
+{}
+
+WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size)
+    : WriteBufferFromPocoSocket(
+        socket_, write_event_,
+        std::clamp(buf_size, sizeof(*chunk_size_ptr) + 1, static_cast<size_t>(std::numeric_limits<std::remove_reference_t<decltype(*chunk_size_ptr)>>::max()))),
+        log(getLogger("Protocol"))
+{}
+
 void WriteBufferFromPocoSocketChunked::enableChunked()
 {
     chunked = true;
diff --git a/src/IO/WriteBufferFromPocoSocketChunked.h b/src/IO/WriteBufferFromPocoSocketChunked.h
index 8270ca445c9..13a277e3bfb 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@@ -2,6 +2,7 @@
 
 #include <Common/logger_useful.h>
 #include <IO/WriteBufferFromPocoSocket.h>
+#include <algorithm>
 
 
 namespace DB
@@ -10,14 +11,8 @@ namespace DB
 class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket
 {
 public:
-    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, buf_size), log(getLogger("Protocol"))
-    {
-        chassert(buf_size <= std::numeric_limits<std::remove_reference_t<decltype(*chunk_size_ptr)>>::max() && buf_size > sizeof(*chunk_size_ptr));
-    }
-    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) : WriteBufferFromPocoSocket(socket_, write_event_, buf_size), log(getLogger("Protocol"))
-    {
-        chassert(buf_size <= std::numeric_limits<std::remove_reference_t<decltype(*chunk_size_ptr)>>::max() && buf_size > sizeof(*chunk_size_ptr));
-    }
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
 
     void enableChunked();
     void finishChunk();

From 04b8b1e76c467ae527202a75141ac8981a1c4ac5 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 2 Jul 2024 14:01:19 +0000
Subject: [PATCH 066/265] initial commit for Hive-style partitioning

---
 src/Core/Settings.h                           |   5 +
 src/Core/SettingsChangesHistory.h             |   5 +
 .../ObjectStorage/StorageObjectStorage.cpp    |  32 ++-
 .../StorageObjectStorageSource.cpp            |  14 +-
 src/Storages/StorageFile.cpp                  |  39 +++-
 src/Storages/StorageURL.cpp                   |  16 +-
 src/Storages/VirtualColumnUtils.cpp           |  52 ++++-
 src/Storages/VirtualColumnUtils.h             |   7 +-
 .../__init__.py                               |   0
 .../configs/cluster_azure.xml                 |  39 ++++
 .../configs/cluster_hdfs.xml                  |  33 +++
 .../configs/disable_profilers_azure.xml       |   9 +
 .../configs/macro_hdfs.xml                    |   5 +
 .../configs/named_collections_azure.xml       |  14 ++
 .../configs/schema_cache_azure.xml            |   3 +
 .../configs/schema_cache_hdfs.xml             |   3 +
 .../configs/users_azure.xml                   |   9 +
 .../test_azure.py                             | 204 ++++++++++++++++++
 .../test_hdfs.py                              |  81 +++++++
 .../03203_hive_style_partitioning.reference   |  96 +++++++++
 .../03203_hive_style_partitioning.sh          |  93 ++++++++
 .../column1=Gordon/sample.parquet             | Bin 0 -> 1308 bytes
 .../column1=Schmidt/sample.parquet            | Bin 0 -> 1308 bytes
 .../column0=Elizabeth/sample.parquet          | Bin 0 -> 1308 bytes
 .../sample.parquet                            | Bin 0 -> 1308 bytes
 .../column1=Gordon/sample.parquet             | Bin 0 -> 1308 bytes
 .../column1=Schmidt/sample.parquet            | Bin 0 -> 1308 bytes
 .../coumn0=Elizabeth/sample.parquet           | Bin 0 -> 1308 bytes
 .../sample.parquet                            | Bin 0 -> 1308 bytes
 29 files changed, 749 insertions(+), 10 deletions(-)
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/__init__.py
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
 create mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
 create mode 100644 tests/queries/0_stateless/03203_hive_style_partitioning.reference
 create mode 100755 tests/queries/0_stateless/03203_hive_style_partitioning.sh
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Gordon/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Schmidt/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 14fe0924b40..738c0129d2d 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1106,6 +1106,11 @@ class IColumn;
     M(Bool, input_format_tsv_skip_trailing_empty_lines, false, "Skip trailing empty lines in TSV format", 0) \
     M(Bool, input_format_custom_skip_trailing_empty_lines, false, "Skip trailing empty lines in CustomSeparated format", 0) \
     M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \
+    M(Bool, file_hive_partitioning, false, "Allows to use hive partitioning for file format", 0)\
+    M(Bool, url_hive_partitioning, false, "Allows to use hive partitioning for url format", 0)\
+    M(Bool, s3_hive_partitioning, false, "Allows to use hive partitioning for s3 format", 0)\
+    M(Bool, azure_blob_storage_hive_partitioning, false, "Allows to use hive partitioning for AzureBlobStorage format", 0)\
+    M(Bool, hdfs_hive_partitioning, false, "Allows to use hive partitioning for hdfs format", 0)\
     \
     M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \
     \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 4ac25a649b7..dd778149674 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -118,6 +118,11 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
               {"input_format_csv_deserialize_separate_columns_into_tuple", true, true, "A new way of how interpret tuples in CSV format was added."},
               {"input_format_csv_try_infer_strings_from_quoted_tuples", true, true, "A new way of how interpret tuples in CSV format was added."},
               {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
+              {"file_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for file format."},
+              {"url_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for url format."},
+              {"s3_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for s3 format."},
+              {"azure_blob_storage_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for AzureBlobStorage format."},
+              {"hdfs_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for hdfs format."},
               }},
     {"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
               {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 90a97a9ea62..b169f02940e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -1,4 +1,5 @@
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Core/ColumnWithTypeAndName.h>
 
 #include <Formats/FormatFactory.h>
 #include <Parsers/ASTInsertQuery.h>
@@ -32,6 +33,19 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+
+bool checkIfHiveSettingEnabled(const ContextPtr & context, const std::string & storage_type_name)
+{
+    if (storage_type_name == "s3")
+        return context->getSettings().s3_hive_partitioning;
+    else if (storage_type_name == "hdfs")
+        return context->getSettings().hdfs_hive_partitioning;
+    else if (storage_type_name == "azure")
+        return context->getSettings().azure_blob_storage_hive_partitioning;
+    else
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name);
+}
+
 StorageObjectStorage::StorageObjectStorage(
     ConfigurationPtr configuration_,
     ObjectStoragePtr object_storage_,
@@ -60,7 +74,23 @@ StorageObjectStorage::StorageObjectStorage(
     metadata.setConstraints(constraints_);
     metadata.setComment(comment);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
+    auto file_iterator = StorageObjectStorageSource::createFileIterator(
+        configuration,
+        object_storage,
+        distributed_processing_,
+        context,
+        {}, // predicate
+        metadata.getColumns().getAll(), // virtual_columns
+        nullptr, // read_keys
+        {} // file_progress_callback
+    );
+
+    Strings paths;
+    
+    if (checkIfHiveSettingEnabled(context, configuration->getTypeName()))
+        if (auto file = file_iterator->next(0))
+            paths = {file->getPath()};
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), paths));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index aef783fc3c4..2741cfecf6b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -13,6 +13,7 @@
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/Cache/SchemaCache.h>
 #include <Common/parseGlobs.h>
+#include <DataTypes/DataTypeString.h>
 
 namespace fs = std::filesystem;
 
@@ -195,13 +196,24 @@ Chunk StorageObjectStorageSource::generate()
             const auto & object_info = reader.getObjectInfo();
             const auto & filename = object_info->getFileName();
             chassert(object_info->metadata);
+
+            auto hive_map = VirtualColumnUtils::parsePartitionMapFromPath(object_info->getPath());
+            bool contains_virtual_column = std::any_of(hive_map.begin(), hive_map.end(), 
+                [&](const auto& pair) {
+                    return read_from_format_info.requested_virtual_columns.contains(pair.first);
+                });
+
+            if (!contains_virtual_column)
+                hive_map.clear(); // If we cannot find any virual column in requested, we don't add any of them to chunk
+
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
                 chunk, read_from_format_info.requested_virtual_columns,
                 {
                     .path = getUniqueStoragePathIdentifier(*configuration, *object_info, false),
                     .size = object_info->metadata->size_bytes,
                     .filename = &filename,
-                    .last_modified = object_info->metadata->last_modified
+                    .last_modified = object_info->metadata->last_modified,
+                    .hive_partitioning_map = hive_map
                 });
             return chunk;
         }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 7f39ff615f0..0c32f29cb34 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -52,6 +52,7 @@
 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
 #include <Common/re2.h>
+#include <Formats/SchemaInferenceUtils.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -1095,7 +1096,11 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     storage_metadata.setConstraints(args.constraints);
     storage_metadata.setComment(args.comment);
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+
+    Strings paths_for_virtuals;
+    if (args.getContext()->getSettingsRef().file_hive_partitioning)
+        paths_for_virtuals = paths;
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), paths_for_virtuals));
 }
 
 
@@ -1437,6 +1442,15 @@ Chunk StorageFileSource::generate()
                 chunk_size = input_format->getApproxBytesReadForChunk();
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
 
+            std::map<std::string, std::string> hive_map;
+            if (getContext()->getSettingsRef().file_hive_partitioning)
+            {
+                hive_map = VirtualColumnUtils::parsePartitionMapFromPath(current_path);
+
+                for (const auto& item : hive_map)
+                    requested_virtual_columns.push_back(NameAndTypePair(item.first, std::make_shared<DataTypeString>()));
+            }
+
             /// Enrich with virtual columns.
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
                 chunk, requested_virtual_columns,
@@ -1444,7 +1458,8 @@ Chunk StorageFileSource::generate()
                     .path = current_path,
                     .size = current_file_size,
                     .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
-                    .last_modified = current_file_last_modified
+                    .last_modified = current_file_last_modified,
+                    .hive_partitioning_map = hive_map
                 });
 
             return chunk;
@@ -1621,6 +1636,16 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
         storage->distributed_processing);
 }
 
+void addPartitionColumnsToInfoHeader(Strings paths, ReadFromFormatInfo & info)
+{
+    for (const auto& path : paths)
+    {
+        auto map = VirtualColumnUtils::parsePartitionMapFromPath(path);
+        for (const auto& item : map)
+            info.source_header.insertUnique(ColumnWithTypeAndName(std::make_shared<DataTypeString>(), item.first));
+    }
+}
+
 void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     createIterator(nullptr);
@@ -1628,10 +1653,20 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
+    Strings paths;
     if (storage->archive_info)
+    {
         files_to_read = storage->archive_info->paths_to_archives.size();
+        paths = storage->archive_info->paths_to_archives;
+    }
     else
+    {
         files_to_read = storage->paths.size();
+        paths = storage->paths;
+    }
+
+    if (getContext()->getSettingsRef().file_hive_partitioning)
+        addPartitionColumnsToInfoHeader(paths, info);
 
     if (max_num_streams > files_to_read)
         num_streams = files_to_read;
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 895da028fc2..f6374701fc2 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -36,6 +36,7 @@
 #include <Common/thread_local_rng.h>
 #include <Common/logger_useful.h>
 #include <Common/re2.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>
 
@@ -151,7 +152,11 @@ IStorageURLBase::IStorageURLBase(
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+
+    Strings uri_for_partitioning;
+    if (context_->getSettingsRef().url_hive_partitioning)
+        uri_for_partitioning = {uri};
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), uri_for_partitioning));
 }
 
 
@@ -410,12 +415,17 @@ Chunk StorageURLSource::generate()
             size_t chunk_size = 0;
             if (input_format)
                 chunk_size = input_format->getApproxBytesReadForChunk();
+            std::map<std::string, std::string> hive_map;
+            if (getContext()->getSettingsRef().url_hive_partitioning)
+                hive_map = VirtualColumnUtils::parsePartitionMapFromPath(curr_uri.getPath());
+
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
                 chunk, requested_virtual_columns,
                 {
                     .path = curr_uri.getPath(),
-                    .size = current_file_size
+                    .size = current_file_size,
+                    .hive_partitioning_map = hive_map
                 });
             return chunk;
         }
@@ -1170,6 +1180,7 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate)
 void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     createIterator(nullptr);
+    const auto & settings = context->getSettingsRef();
 
     if (is_empty_glob)
     {
@@ -1180,7 +1191,6 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil
     Pipes pipes;
     pipes.reserve(num_streams);
 
-    const auto & settings = context->getSettingsRef();
     const size_t max_parsing_threads = num_streams >= settings.max_parsing_threads ? 1 : (settings.max_parsing_threads  / num_streams);
 
     for (size_t i = 0; i < num_streams; ++i)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 778c9e13adb..0b79e3b7a16 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -1,4 +1,3 @@
-#include <algorithm>
 #include <memory>
 #include <stack>
 #include <Core/NamesAndTypes.h>
@@ -37,6 +36,7 @@
 
 #include <Storages/VirtualColumnUtils.h>
 #include <IO/WriteHelpers.h>
+#include <Common/re2.h>
 #include <Common/typeid_cast.h>
 #include "Functions/FunctionsLogical.h"
 #include "Functions/IFunction.h"
@@ -115,7 +115,22 @@ NameSet getVirtualNamesForFileLikeStorage()
     return {"_path", "_file", "_size", "_time"};
 }
 
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns)
+Strings parseVirtualColumnNameFromPath(const std::string & path)
+{
+    std::string pattern = "/([^/]+)=([^/]+)";
+    // Map to store the key-value pairs
+    std::map<std::string, std::string> key_values;
+
+    re2::StringPiece input_piece(path);
+    std::string key;
+    Strings result;
+    while (RE2::FindAndConsume(&input_piece, pattern, &key))
+        result.push_back(key);
+
+    return result;
+}
+
+VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, Strings paths)
 {
     VirtualColumnsDescription desc;
 
@@ -132,6 +147,13 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
 
+    for (const auto& path : paths)
+    {
+        auto names = parseVirtualColumnNameFromPath(path);
+        for (const auto& name : names)
+            add_virtual("_" + name, std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
+    }
+
     return desc;
 }
 
@@ -178,6 +200,8 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
     {
         if (column.name == "_file" || column.name == "_path")
             block.insert({column.type->createColumn(), column.type, column.name});
+        if (!getVirtualNamesForFileLikeStorage().contains(column.name))
+            block.insert({column.type->createColumn(), column.type, column.name});
     }
     block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
 
@@ -189,6 +213,21 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
     return block.getByName("_idx").column;
 }
 
+std::map<std::string, std::string> parsePartitionMapFromPath(const std::string & path)
+{
+    std::string pattern = "/([^/]+)=([^/]+)";  // Regex to capture key=value pairs
+    // Map to store the key-value pairs
+    std::map<std::string, std::string> key_values;
+
+    re2::StringPiece input_piece(path);
+    std::string key;
+    std::string value;
+    while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
+        key_values["_" + key] = value;
+
+    return key_values;
+}
+
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
     VirtualsForFileLikeStorage virtual_values)
@@ -226,6 +265,15 @@ void addRequestedFileLikeStorageVirtualsToChunk(
             else
                 chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst());
         }
+        else
+        {
+            auto it = virtual_values.hive_partitioning_map.find(virtual_column.getNameInStorage());
+            if (it != virtual_values.hive_partitioning_map.end())
+            {
+                chunk.addColumn(virtual_column.getTypeInStorage()->createColumnConst(chunk.getNumRows(), it->second)->convertToFullColumnIfConst());
+                virtual_values.hive_partitioning_map.erase(it);
+            }
+        }
     }
 }
 
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index fbfbdd6c6cc..a03d4c7447f 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -6,6 +6,8 @@
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/VirtualColumnsDescription.h>
 
+#include <map>
+#include <string>
 #include <unordered_set>
 
 
@@ -47,7 +49,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
 }
 
 NameSet getVirtualNamesForFileLikeStorage();
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns);
+VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, Strings paths = {});
 
 ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
 
@@ -74,9 +76,12 @@ struct VirtualsForFileLikeStorage
     std::optional<size_t> size { std::nullopt };
     const String * filename { nullptr };
     std::optional<Poco::Timestamp> last_modified { std::nullopt };
+    std::map<std::string, std::string> hive_partitioning_map;
 
 };
 
+std::map<std::string, std::string> parsePartitionMapFromPath(const std::string & path);
+
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
     VirtualsForFileLikeStorage virtual_values);
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/__init__.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml
new file mode 100644
index 00000000000..ffa4673c9ee
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml
@@ -0,0 +1,39 @@
+<clickhouse>
+    <remote_servers>
+        <simple_cluster>
+            <shard>
+                <replica>
+                    <host>node_0</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </simple_cluster>
+
+        <cluster_non_existent_port>
+            <shard>
+                <replica>
+                    <host>node_0</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node_1</host>
+                    <port>19000</port>
+                </replica>
+            </shard>
+        </cluster_non_existent_port>
+
+    </remote_servers>
+    <macros>
+        <default_cluster_macro>simple_cluster</default_cluster_macro>
+    </macros>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml
new file mode 100644
index 00000000000..b99b21ea40b
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml
@@ -0,0 +1,33 @@
+<clickhouse>
+    <remote_servers>
+        <cluster_non_existent_port>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>19000</port>
+                </replica>
+            </shard>
+        </cluster_non_existent_port>
+
+        <test_cluster_two_shards>
+            <shard>
+                <replica>
+                    <host>127.0.0.1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>127.0.0.2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster_two_shards>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml
new file mode 100644
index 00000000000..a39badbf8ec
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml
@@ -0,0 +1,9 @@
+<!-- Sometime azurite is super slow, profiler make it even worse -->
+<clickhouse>
+    <profiles>
+        <default>
+            <query_profiler_real_time_period_ns>0</query_profiler_real_time_period_ns>
+            <query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml
new file mode 100644
index 00000000000..c2e11b47a5e
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml
@@ -0,0 +1,5 @@
+<clickhouse>
+    <macros>
+        <default_cluster_macro>test_cluster_two_shards</default_cluster_macro>
+    </macros>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml
new file mode 100644
index 00000000000..bd7f9ff97f1
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml
@@ -0,0 +1,14 @@
+<clickhouse>
+    <named_collections>
+        <azure_conf1>
+            <container>cont</container>
+            <blob_path>test_simple_write_named.csv</blob_path>
+            <structure>key UInt64, data String</structure>
+            <format>CSV</format>
+        </azure_conf1>
+        <azure_conf2>
+            <account_name>devstoreaccount1</account_name>
+            <account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
+        </azure_conf2>
+    </named_collections>
+</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml
new file mode 100644
index 00000000000..e2168ecd06d
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml
@@ -0,0 +1,3 @@
+<clickhouse>
+    <schema_inference_cache_max_elements_for_azure>2</schema_inference_cache_max_elements_for_azure>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml
new file mode 100644
index 00000000000..37639649b5f
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml
@@ -0,0 +1,3 @@
+<clickhouse>
+    <schema_inference_cache_max_elements_for_hdfs>2</schema_inference_cache_max_elements_for_hdfs>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml
new file mode 100644
index 00000000000..4b6ba057ecb
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml
@@ -0,0 +1,9 @@
+<clickhouse>
+    <users>
+        <default>
+            <password></password>
+            <profile>default</profile>
+            <named_collection_control>1</named_collection_control>
+        </default>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
new file mode 100644
index 00000000000..c9b2c9fec2e
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+
+import pytest
+import time
+
+from helpers.cluster import ClickHouseCluster, is_arm
+import re
+
+from azure.storage.blob import BlobServiceClient
+from helpers.cluster import ClickHouseCluster, ClickHouseInstance
+
+if is_arm():
+    pytestmark = pytest.mark.skip
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance(
+            "node",
+            main_configs=["configs/named_collections_azure.xml", "configs/schema_cache_azure.xml"],
+            user_configs=["configs/disable_profilers_azure.xml", "configs/users_azure.xml"],
+            with_azurite=True,
+        )
+        cluster.start()
+        container_client = cluster.blob_service_client.get_container_client("cont")
+        container_client.create_container()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def azure_query(
+    node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None
+):
+    for i in range(try_num):
+        try:
+            if expect_error:
+                return node.query_and_get_error(query, settings=settings)
+            else:
+                return node.query(query, settings=settings)
+        except Exception as ex:
+            retriable_errors = [
+                "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
+                "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected",
+                "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
+                "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read",
+                "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
+                "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected",
+                "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
+                "Azure::Core::Http::TransportException, e.what() = Error while polling for socket ready read",
+            ]
+            retry = False
+            for error in retriable_errors:
+                if error in str(ex):
+                    retry = True
+                    print(f"Try num: {i}. Having retriable error: {ex}")
+                    time.sleep(i)
+                    break
+            if not retry or i == try_num - 1:
+                raise Exception(ex)
+            if query_on_retry is not None:
+                node.query(query_on_retry)
+            continue
+
+
+def get_azure_file_content(filename, port):
+    container_name = "cont"
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
+    blob_service_client = BlobServiceClient.from_connection_string(
+        str(connection_string)
+    )
+    container_client = blob_service_client.get_container_client(container_name)
+    blob_client = container_client.get_blob_client(filename)
+    download_stream = blob_client.download_blob()
+    return download_stream.readall().decode("utf-8")
+
+
+@pytest.fixture(autouse=True, scope="function")
+def delete_all_files(cluster):
+    port = cluster.env_variables["AZURITE_PORT"]
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
+    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
+    containers = blob_service_client.list_containers()
+    for container in containers:
+        container_client = blob_service_client.get_container_client(container)
+        blob_list = container_client.list_blobs()
+        for blob in blob_list:
+            print(blob)
+            blob_client = container_client.get_blob_client(blob)
+            blob_client.delete_blob()
+
+        assert len(list(container_client.list_blobs())) == 0
+
+    yield
+
+
+def test_azure_partitioning_with_one_parameter(cluster):
+    # type: (ClickHouseCluster) -> None
+    node = cluster.instances["node"]  # type: ClickHouseInstance
+    table_format = "column1 String, column2 String"
+    values = f"('Elizabeth', 'Gordon')"
+    path = "a/column1=Elizabeth/sample.csv"
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+    )
+
+    query = (
+        f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}')"
+    )
+    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
+        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
+            bucket="cont", max_path=path
+        )
+    ]
+
+    query = (
+        f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+    )
+    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
+        "Gordon"
+    ]
+
+def test_azure_partitioning_with_two_parameters(cluster):
+    # type: (ClickHouseCluster) -> None
+    node = cluster.instances["node"]  # type: ClickHouseInstance
+    table_format = "column1 String, column2 String"
+    values_1 = f"('Elizabeth', 'Gordon')"
+    values_2 = f"('Emilia', 'Gregor')"
+    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+    )
+
+    query = (
+        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+    )
+    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
+        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
+            bucket="cont", max_path=path
+        )
+    ]
+
+    query = (
+        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
+    )
+    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
+        "Elizabeth"
+    ]
+
+    query = (
+        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
+    )
+    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
+        "Elizabeth"
+    ]
+
+def test_azure_partitioning_without_setting(cluster):
+    # type: (ClickHouseCluster) -> None
+    node = cluster.instances["node"]  # type: ClickHouseInstance
+    table_format = "column1 String, column2 String"
+    values_1 = f"('Elizabeth', 'Gordon')"
+    values_2 = f"('Emilia', 'Gregor')"
+    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+    )
+
+    query = (
+        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+    )
+    pattern = re.compile(r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL)
+
+    with pytest.raises(Exception, match=pattern):
+        azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 0})
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
new file mode 100644
index 00000000000..38641b63960
--- /dev/null
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import pytest
+
+from helpers.client import QueryRuntimeException
+from helpers.cluster import ClickHouseCluster, is_arm
+import re
+
+from helpers.cluster import ClickHouseCluster
+
+if is_arm():
+    pytestmark = pytest.mark.skip
+
+cluster = ClickHouseCluster(__file__)
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=[
+        "configs/macro_hdfs.xml",
+        "configs/schema_cache_hdfs.xml",
+        "configs/cluster_hdfs.xml",
+    ],
+    with_hdfs=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+def test_hdfs_partitioning_with_one_parameter(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/parquet_1")
+        == f"Elizabeth\tGordon\n"
+    )
+
+    r = node1.query(
+        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", settings={"hdfs_hive_partitioning": 1}
+    )
+    assert (r == f"Elizabeth\n")
+
+def test_hdfs_partitioning_with_two_parameters(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
+        == f"Elizabeth\tGordon\n"
+    )
+
+    r = node1.query(
+        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", settings={"hdfs_hive_partitioning": 1}
+    )
+    assert (r == f"Gordon\n")
+
+def test_hdfs_partitioning_without_setting(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
+        == f"Elizabeth\tGordon\n"
+    )
+    pattern = re.compile(r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL)
+
+    with pytest.raises(QueryRuntimeException, match=pattern):
+        node1.query(f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", settings={"hdfs_hive_partitioning": 0})
+
+if __name__ == "__main__":
+    cluster.start()
+    input("Cluster created, press any key to destroy...")
+    cluster.shutdown()
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
new file mode 100644
index 00000000000..6ef1fcdf652
--- /dev/null
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -0,0 +1,96 @@
+TESTING THE FILE HIVE PARTITIONING
+first	 last	Elizabeth
+Jorge	Frank	Elizabeth
+Hunter	Moreno	Elizabeth
+Esther	Guzman	Elizabeth
+Dennis	Stephens	Elizabeth
+Nettie	Franklin	Elizabeth
+Stanley	Gibson	Elizabeth
+Eugenia	Greer	Elizabeth
+Jeffery	Delgado	Elizabeth
+Clara	Cross	Elizabeth
+Elizabeth	Gordon	Elizabeth
+Eva	Schmidt	Elizabeth	Schmidt
+Samuel	Schmidt	Elizabeth	Schmidt
+Eva	Schmidt	Elizabeth
+Samuel	Schmidt	Elizabeth
+Elizabeth	Gordon	Elizabeth	Gordon
+Elizabeth	Gordon	Elizabeth
+Elizabeth	Gordon	Elizabeth	Gordon
+Elizabeth	Gordon	Elizabeth
+first	 last	Elizabeth
+Jorge	Frank	Elizabeth
+Hunter	Moreno	Elizabeth
+Esther	Guzman	Elizabeth
+Dennis	Stephens	Elizabeth
+Nettie	Franklin	Elizabeth
+Stanley	Gibson	Elizabeth
+Eugenia	Greer	Elizabeth
+Jeffery	Delgado	Elizabeth
+Clara	Cross	Elizabeth
+Elizabeth	Gordon	Elizabeth
+1
+TESTING THE URL PARTITIONING
+first	 last	Elizabeth
+Jorge	Frank	Elizabeth
+Hunter	Moreno	Elizabeth
+Esther	Guzman	Elizabeth
+Dennis	Stephens	Elizabeth
+Nettie	Franklin	Elizabeth
+Stanley	Gibson	Elizabeth
+Eugenia	Greer	Elizabeth
+Jeffery	Delgado	Elizabeth
+Clara	Cross	Elizabeth
+Elizabeth	Gordon	Elizabeth
+Eva	Schmidt	Elizabeth	Schmidt
+Samuel	Schmidt	Elizabeth	Schmidt
+Eva	Schmidt	Elizabeth
+Samuel	Schmidt	Elizabeth
+Elizabeth	Gordon	Elizabeth	Gordon
+Elizabeth	Gordon	Elizabeth
+Elizabeth	Gordon	Elizabeth	Gordon
+Elizabeth	Gordon	Elizabeth
+first	 last	Elizabeth
+Jorge	Frank	Elizabeth
+Hunter	Moreno	Elizabeth
+Esther	Guzman	Elizabeth
+Dennis	Stephens	Elizabeth
+Nettie	Franklin	Elizabeth
+Stanley	Gibson	Elizabeth
+Eugenia	Greer	Elizabeth
+Jeffery	Delgado	Elizabeth
+Clara	Cross	Elizabeth
+Elizabeth	Gordon	Elizabeth
+1
+TESTING THE S3 PARTITIONING
+first	 last	Elizabeth
+Jorge	Frank	Elizabeth
+Hunter	Moreno	Elizabeth
+Esther	Guzman	Elizabeth
+Dennis	Stephens	Elizabeth
+Nettie	Franklin	Elizabeth
+Stanley	Gibson	Elizabeth
+Eugenia	Greer	Elizabeth
+Jeffery	Delgado	Elizabeth
+Clara	Cross	Elizabeth
+Elizabeth	Gordon	Elizabeth
+Eva	Schmidt	Elizabeth	Schmidt
+Samuel	Schmidt	Elizabeth	Schmidt
+Eva	Schmidt	Elizabeth
+Samuel	Schmidt	Elizabeth
+Elizabeth	Gordon	Elizabeth	Gordon
+Elizabeth	Gordon	Elizabeth
+Elizabeth	Gordon	Elizabeth	Gordon
+Elizabeth	Gordon	Elizabeth
+first	 last	Elizabeth
+Jorge	Frank	Elizabeth
+Hunter	Moreno	Elizabeth
+Esther	Guzman	Elizabeth
+Dennis	Stephens	Elizabeth
+Nettie	Franklin	Elizabeth
+Stanley	Gibson	Elizabeth
+Eugenia	Greer	Elizabeth
+Jeffery	Delgado	Elizabeth
+Clara	Cross	Elizabeth
+Elizabeth	Gordon	Elizabeth
+1
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
new file mode 100755
index 00000000000..a5d4c85a33b
--- /dev/null
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
+
+
+$CLICKHOUSE_LOCAL -n -q """set file_hive_partitioning = 1;
+
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+
+SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+
+SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+
+SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+
+SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+
+SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
+
+$CLICKHOUSE_LOCAL -n -q """set file_hive_partitioning = 0;
+
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
+
+
+$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
+
+
+$CLICKHOUSE_LOCAL -n -q """set url_hive_partitioning = 1;
+
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+
+SELECT *, _non_existing_column FROM url('http://localhost:11111/test/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
+
+$CLICKHOUSE_LOCAL -n -q """set url_hive_partitioning = 0;
+
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
+
+
+$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
+
+
+$CLICKHOUSE_LOCAL -n -q """set s3_hive_partitioning = 1;
+
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+
+SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
+
+$CLICKHOUSE_LOCAL -n -q """set s3_hive_partitioning = 0;
+
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
+
diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Gordon/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Gordon/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Schmidt/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Schmidt/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/non_existing_column=Elizabeth/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001


From 83462d743e76dcfa8fd35b8b30335682f86d9374 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 2 Jul 2024 16:13:44 +0200
Subject: [PATCH 067/265] enhance SettingsChangesHistory.cpp

---
 src/Core/SettingsChangesHistory.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index b0725340f46..607f9b6d858 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -59,6 +59,11 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
 {
     {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
               {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
+              {"file_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for file format."},
+              {"url_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for url format."},
+              {"s3_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for s3 format."},
+              {"azure_blob_storage_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for AzureBlobStorage format."},
+              {"hdfs_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for hdfs format."},
               }},
     {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
               {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},

From 8c45c7c4d0161bb0ba96e432064d3c9069fc852a Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 2 Jul 2024 14:43:45 +0000
Subject: [PATCH 068/265] style check

---
 .../ObjectStorage/StorageObjectStorage.cpp    |  2 +-
 .../StorageObjectStorageSource.cpp            |  7 +--
 .../test_azure.py                             | 43 +++++++++++++------
 .../test_hdfs.py                              | 32 ++++++++------
 .../03203_hive_style_partitioning.sh          | 24 +++++++----
 5 files changed, 69 insertions(+), 39 deletions(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index b169f02940e..ae7c211330c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -86,7 +86,7 @@ StorageObjectStorage::StorageObjectStorage(
     );
 
     Strings paths;
-    
+
     if (checkIfHiveSettingEnabled(context, configuration->getTypeName()))
         if (auto file = file_iterator->next(0))
             paths = {file->getPath()};
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 2741cfecf6b..afb23961312 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -198,13 +198,14 @@ Chunk StorageObjectStorageSource::generate()
             chassert(object_info->metadata);
 
             auto hive_map = VirtualColumnUtils::parsePartitionMapFromPath(object_info->getPath());
-            bool contains_virtual_column = std::any_of(hive_map.begin(), hive_map.end(), 
-                [&](const auto& pair) {
+            bool contains_virtual_column = std::any_of(hive_map.begin(), hive_map.end(),
+                [&](const auto& pair)
+                {
                     return read_from_format_info.requested_virtual_columns.contains(pair.first);
                 });
 
             if (!contains_virtual_column)
-                hive_map.clear(); // If we cannot find any virual column in requested, we don't add any of them to chunk
+                hive_map.clear(); // If we cannot find any virtual column in requested, we don't add any of them to chunk
 
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
                 chunk, read_from_format_info.requested_virtual_columns,
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
index c9b2c9fec2e..0be697821f0 100644
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
@@ -12,14 +12,21 @@ from helpers.cluster import ClickHouseCluster, ClickHouseInstance
 if is_arm():
     pytestmark = pytest.mark.skip
 
+
 @pytest.fixture(scope="module")
 def cluster():
     try:
         cluster = ClickHouseCluster(__file__)
         cluster.add_instance(
             "node",
-            main_configs=["configs/named_collections_azure.xml", "configs/schema_cache_azure.xml"],
-            user_configs=["configs/disable_profilers_azure.xml", "configs/users_azure.xml"],
+            main_configs=[
+                "configs/named_collections_azure.xml",
+                "configs/schema_cache_azure.xml",
+            ],
+            user_configs=[
+                "configs/disable_profilers_azure.xml",
+                "configs/users_azure.xml",
+            ],
             with_azurite=True,
         )
         cluster.start()
@@ -121,7 +128,9 @@ def test_azure_partitioning_with_one_parameter(cluster):
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}')"
     )
-    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == [
         "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
             bucket="cont", max_path=path
         )
@@ -132,9 +141,10 @@ def test_azure_partitioning_with_one_parameter(cluster):
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
     )
-    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
-        "Gordon"
-    ]
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == ["Gordon"]
+
 
 def test_azure_partitioning_with_two_parameters(cluster):
     # type: (ClickHouseCluster) -> None
@@ -155,7 +165,9 @@ def test_azure_partitioning_with_two_parameters(cluster):
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
     )
-    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == [
         "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
             bucket="cont", max_path=path
         )
@@ -166,18 +178,19 @@ def test_azure_partitioning_with_two_parameters(cluster):
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
     )
-    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
-        "Elizabeth"
-    ]
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == ["Elizabeth"]
 
     query = (
         f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
     )
-    assert azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 1}).splitlines() == [
-        "Elizabeth"
-    ]
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == ["Elizabeth"]
+
 
 def test_azure_partitioning_without_setting(cluster):
     # type: (ClickHouseCluster) -> None
@@ -198,7 +211,9 @@ def test_azure_partitioning_without_setting(cluster):
         f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
     )
-    pattern = re.compile(r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL)
+    pattern = re.compile(
+        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
+    )
 
     with pytest.raises(Exception, match=pattern):
         azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 0})
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
index 38641b63960..4667d18688a 100644
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
+++ b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
@@ -31,20 +31,18 @@ def started_cluster():
     finally:
         cluster.shutdown()
 
+
 def test_hdfs_partitioning_with_one_parameter(started_cluster):
     hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(
-        f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n"
-    )
-    assert (
-        hdfs_api.read_data(f"/column0=Elizabeth/parquet_1")
-        == f"Elizabeth\tGordon\n"
-    )
+    hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
+    assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
 
     r = node1.query(
-        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", settings={"hdfs_hive_partitioning": 1}
+        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        settings={"hdfs_hive_partitioning": 1},
     )
-    assert (r == f"Elizabeth\n")
+    assert r == f"Elizabeth\n"
+
 
 def test_hdfs_partitioning_with_two_parameters(started_cluster):
     hdfs_api = started_cluster.hdfs_api
@@ -57,9 +55,11 @@ def test_hdfs_partitioning_with_two_parameters(started_cluster):
     )
 
     r = node1.query(
-        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", settings={"hdfs_hive_partitioning": 1}
+        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+        settings={"hdfs_hive_partitioning": 1},
     )
-    assert (r == f"Gordon\n")
+    assert r == f"Gordon\n"
+
 
 def test_hdfs_partitioning_without_setting(started_cluster):
     hdfs_api = started_cluster.hdfs_api
@@ -70,10 +70,16 @@ def test_hdfs_partitioning_without_setting(started_cluster):
         hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
         == f"Elizabeth\tGordon\n"
     )
-    pattern = re.compile(r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL)
+    pattern = re.compile(
+        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
+    )
 
     with pytest.raises(QueryRuntimeException, match=pattern):
-        node1.query(f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", settings={"hdfs_hive_partitioning": 0})
+        node1.query(
+            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            settings={"hdfs_hive_partitioning": 0},
+        )
+
 
 if __name__ == "__main__":
     cluster.start()
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index a5d4c85a33b..83a8f87a813 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -8,7 +8,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
 
 
-$CLICKHOUSE_LOCAL -n -q """set file_hive_partitioning = 1;
+$CLICKHOUSE_LOCAL -n -q """
+set file_hive_partitioning = 1;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
@@ -31,13 +32,15 @@ SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.pa
 
 $CLICKHOUSE_LOCAL -n -q """set file_hive_partitioning = 0;
 
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
 $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
 
 
-$CLICKHOUSE_LOCAL -n -q """set url_hive_partitioning = 1;
+$CLICKHOUSE_LOCAL -n -q """
+set url_hive_partitioning = 1;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
@@ -60,13 +63,15 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=*/
 
 $CLICKHOUSE_LOCAL -n -q """set url_hive_partitioning = 0;
 
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
+SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
 $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
 
 
-$CLICKHOUSE_LOCAL -n -q """set s3_hive_partitioning = 1;
+$CLICKHOUSE_LOCAL -n -q """
+set s3_hive_partitioning = 1;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
@@ -85,9 +90,12 @@ SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/c
 SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
 SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+"""
 
-$CLICKHOUSE_LOCAL -n -q """set s3_hive_partitioning = 0;
+$CLICKHOUSE_LOCAL -n -q """
+set s3_hive_partitioning = 0;
 
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
+SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 

From cd5cdcc124f95204a6f63e8a1ce4d7148d8fec7f Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 2 Jul 2024 17:00:59 +0200
Subject: [PATCH 069/265] Shellcheck fix

---
 tests/queries/0_stateless/03203_hive_style_partitioning.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 83a8f87a813..98c039f3454 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -30,7 +30,8 @@ SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/c
 SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
 
-$CLICKHOUSE_LOCAL -n -q """set file_hive_partitioning = 0;
+$CLICKHOUSE_LOCAL -n -q """
+set file_hive_partitioning = 0;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
@@ -61,7 +62,8 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=El
 SELECT *, _non_existing_column FROM url('http://localhost:11111/test/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
 
-$CLICKHOUSE_LOCAL -n -q """set url_hive_partitioning = 0;
+$CLICKHOUSE_LOCAL -n -q """
+set url_hive_partitioning = 0;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
@@ -98,4 +100,3 @@ set s3_hive_partitioning = 0;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
-

From dc9dc1676d8f8af74c20173927c6027623cc788c Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 2 Jul 2024 17:37:47 +0200
Subject: [PATCH 070/265] add default for map in VirtualsForFileLikeStorage

---
 src/Storages/VirtualColumnUtils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index a03d4c7447f..f9b49cc48ed 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -76,7 +76,7 @@ struct VirtualsForFileLikeStorage
     std::optional<size_t> size { std::nullopt };
     const String * filename { nullptr };
     std::optional<Poco::Timestamp> last_modified { std::nullopt };
-    std::map<std::string, std::string> hive_partitioning_map;
+    std::map<std::string, std::string> hive_partitioning_map {};
 
 };
 

From 61f863c4e1f1d99483af78824d1c5792059dc400 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 3 Jul 2024 13:47:18 +0000
Subject: [PATCH 071/265] fix ambiguous override of non-virtual

---
 src/Client/Connection.cpp                  | 2 +-
 src/IO/ReadBufferFromPocoSocketChunked.cpp | 8 ++++----
 src/IO/ReadBufferFromPocoSocketChunked.h   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 803f68c69d6..198518d6314 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -1122,7 +1122,7 @@ bool Connection::poll(size_t timeout_microseconds)
 
 bool Connection::hasReadPendingData() const
 {
-    return last_input_packet_type.has_value() || in->hasPendingData();
+    return last_input_packet_type.has_value() || in->hasBufferedData();
 }
 
 
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.cpp b/src/IO/ReadBufferFromPocoSocketChunked.cpp
index 93afeadba60..4a1e3732a55 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@@ -34,12 +34,12 @@ void ReadBufferFromPocoSocketChunked::enableChunked()
     next_chunk = 0;
 }
 
-bool ReadBufferFromPocoSocketChunked::hasPendingData() const
+bool ReadBufferFromPocoSocketChunked::hasBufferedData() const
 {
-    if (chunked)
-        return available() || static_cast<size_t>(data_end - working_buffer.end()) > sizeof(next_chunk);
+    if (available())
+        return true;
 
-    return ReadBufferFromPocoSocketBase::hasPendingData();
+    return chunked && (static_cast<size_t>(data_end - working_buffer.end()) > sizeof(next_chunk));
 }
 
 bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const
diff --git a/src/IO/ReadBufferFromPocoSocketChunked.h b/src/IO/ReadBufferFromPocoSocketChunked.h
index 943a50f5d08..8bc4024b978 100644
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@@ -84,7 +84,7 @@ public:
 
     void enableChunked();
 
-    bool hasPendingData() const;
+    bool hasBufferedData() const;
 
     bool poll(size_t timeout_microseconds) const;
 

From 942f7d7532059cf931242ce5c94a39ea0344b50b Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Fri, 5 Jul 2024 12:44:31 +0000
Subject: [PATCH 072/265] fixes after review

---
 programs/obfuscator/Obfuscator.cpp            |   4 +-
 src/Core/Settings.h                           |   6 +-
 src/Core/SettingsChangesHistory.cpp           |   6 +-
 src/Formats/ReadSchemaUtils.cpp               |  23 +-
 src/Formats/ReadSchemaUtils.h                 |   6 +-
 .../DataLakes/IStorageDataLake.h              |   3 +-
 .../ObjectStorage/ReadBufferIterator.cpp      |   2 +-
 .../ObjectStorage/ReadBufferIterator.h        |   2 +-
 .../ObjectStorage/StorageObjectStorage.cpp    |  60 ++---
 .../ObjectStorage/StorageObjectStorage.h      |   6 +
 .../StorageObjectStorageCluster.cpp           |   3 +-
 .../StorageObjectStorageSource.cpp            |  13 +-
 src/Storages/ObjectStorage/Utils.cpp          |   7 +-
 src/Storages/ObjectStorage/Utils.h            |   1 +
 .../StorageObjectStorageQueue.cpp             |   3 +-
 src/Storages/StorageFile.cpp                  |  53 ++---
 src/Storages/StorageURL.cpp                   |  24 +-
 src/Storages/VirtualColumnUtils.cpp           |  63 +++--
 src/Storages/VirtualColumnUtils.h             |   9 +-
 src/TableFunctions/TableFunctionFormat.cpp    |  10 +-
 .../TableFunctionObjectStorage.cpp            |   3 +-
 .../__init__.py                               |   0
 .../configs/cluster_azure.xml                 |  39 ----
 .../configs/cluster_hdfs.xml                  |  33 ---
 .../configs/disable_profilers_azure.xml       |   9 -
 .../configs/macro_hdfs.xml                    |   5 -
 .../configs/named_collections_azure.xml       |  14 --
 .../configs/schema_cache_azure.xml            |   3 -
 .../configs/schema_cache_hdfs.xml             |   3 -
 .../configs/users_azure.xml                   |   9 -
 .../test_azure.py                             | 219 ------------------
 .../test_hdfs.py                              |  87 -------
 .../test_storage_azure_blob_storage/test.py   | 110 +++++++++
 tests/integration/test_storage_hdfs/test.py   |  49 ++++
 34 files changed, 304 insertions(+), 583 deletions(-)
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/__init__.py
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
 delete mode 100644 tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py

diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp
index 688ae1a1143..11e85bc1302 100644
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@@ -1307,7 +1307,9 @@ try
             throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
 
         SingleReadBufferIterator read_buffer_iterator(std::move(file));
-        schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const);
+
+        std::string sample_string;
+        schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, sample_string, context_const);
     }
     else
     {
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 8399d3925db..65b93b893b6 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -935,6 +935,7 @@ class IColumn;
     M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
     M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \
     M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \
+    M(Bool, use_hive_partitioning, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines.", 0)\
 
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.
@@ -1106,11 +1107,6 @@ class IColumn;
     M(Bool, input_format_tsv_skip_trailing_empty_lines, false, "Skip trailing empty lines in TSV format", 0) \
     M(Bool, input_format_custom_skip_trailing_empty_lines, false, "Skip trailing empty lines in CustomSeparated format", 0) \
     M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \
-    M(Bool, file_hive_partitioning, false, "Allows to use hive partitioning for file format", 0)\
-    M(Bool, url_hive_partitioning, false, "Allows to use hive partitioning for url format", 0)\
-    M(Bool, s3_hive_partitioning, false, "Allows to use hive partitioning for s3 format", 0)\
-    M(Bool, azure_blob_storage_hive_partitioning, false, "Allows to use hive partitioning for AzureBlobStorage format", 0)\
-    M(Bool, hdfs_hive_partitioning, false, "Allows to use hive partitioning for hdfs format", 0)\
     \
     M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \
     \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 607f9b6d858..b676cd85ce6 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -59,11 +59,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
 {
     {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
               {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
-              {"file_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for file format."},
-              {"url_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for url format."},
-              {"s3_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for s3 format."},
-              {"azure_blob_storage_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for AzureBlobStorage format."},
-              {"hdfs_hive_partitioning", false, false, "A new settings that allows to use hive partitioning for hdfs format."},
+              {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
               }},
     {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
               {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 735b536986d..1e70840f91f 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -94,6 +94,7 @@ std::pair<ColumnsDescription, String> readSchemaFromFormatImpl(
     std::optional<String> format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
+    std::string & sample_path,
     const ContextPtr & context)
 try
 {
@@ -143,6 +144,10 @@ try
             {
                 iterator_data = read_buffer_iterator.next();
 
+                /// Extracting the File path for hive-style partitioning
+                if (sample_path.empty())
+                    sample_path = read_buffer_iterator.getLastFilePath();
+
                 /// Read buffer iterator can determine the data format if it's unknown.
                 /// For example by scanning schema cache or by finding new file with format extension.
                 if (!format_name && iterator_data.format_name)
@@ -163,7 +168,7 @@ try
                         return {*iterator_data.cached_columns, *format_name};
                     }
 
-                    schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFileName());
+                    schemas_for_union_mode.emplace_back(iterator_data.cached_columns->getAll(), read_buffer_iterator.getLastFilePath());
                     continue;
                 }
 
@@ -249,7 +254,7 @@ try
 
                     if (!names_and_types.empty())
                         read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
-                    schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
+                    schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath());
                 }
                 catch (...)
                 {
@@ -410,7 +415,7 @@ try
                         throw Exception(ErrorCodes::CANNOT_DETECT_FORMAT, "The data format cannot be detected by the contents of the files. You can specify the format manually");
 
                     read_buffer_iterator.setSchemaToLastFile(ColumnsDescription(names_and_types));
-                    schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFileName());
+                    schemas_for_union_mode.emplace_back(names_and_types, read_buffer_iterator.getLastFilePath());
                 }
 
                 if (format_name && mode == SchemaInferenceMode::DEFAULT)
@@ -526,9 +531,9 @@ try
 }
 catch (Exception & e)
 {
-    auto file_name = read_buffer_iterator.getLastFileName();
-    if (!file_name.empty())
-        e.addMessage(fmt::format("(in file/uri {})", file_name));
+    auto file_path = read_buffer_iterator.getLastFilePath();
+    if (!file_path.empty())
+        e.addMessage(fmt::format("(in file/uri {})", file_path));
     throw;
 }
 
@@ -536,17 +541,19 @@ ColumnsDescription readSchemaFromFormat(
     const String & format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
+    std::string & sample_path,
     const ContextPtr & context)
 {
-    return readSchemaFromFormatImpl(format_name, format_settings, read_buffer_iterator, context).first;
+    return readSchemaFromFormatImpl(format_name, format_settings, read_buffer_iterator, sample_path, context).first;
 }
 
 std::pair<ColumnsDescription, String> detectFormatAndReadSchema(
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
+    std::string & sample_path,
     const ContextPtr & context)
 {
-    return readSchemaFromFormatImpl(std::nullopt, format_settings, read_buffer_iterator, context);
+    return readSchemaFromFormatImpl(std::nullopt, format_settings, read_buffer_iterator, sample_path, context);
 }
 
 SchemaCache::Key getKeyForSchemaCache(
diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
index bb5e068f696..6c562a06bf0 100644
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@@ -56,8 +56,8 @@ struct IReadBufferIterator
     /// Set auto detected format name.
     virtual void setFormatName(const String & /*format_name*/) {}
 
-    /// Get last processed file name for better exception messages.
-    virtual String getLastFileName() const { return ""; }
+    /// Get last processed file path for better exception messages.
+    virtual String getLastFilePath() const { return ""; }
 
     /// Return true if method recreateLastReadBuffer is implemented.
     virtual bool supportsLastReadBufferRecreation() const { return false; }
@@ -122,6 +122,7 @@ ColumnsDescription readSchemaFromFormat(
     const String & format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
+    std::string & sample_path,
     const ContextPtr & context);
 
 /// Try to detect the format of the data and it's schema.
@@ -131,6 +132,7 @@ ColumnsDescription readSchemaFromFormat(
 std::pair<ColumnsDescription, String> detectFormatAndReadSchema(
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
+    std::string & sample_path,
     const ContextPtr & context);
 
 SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index 83865c47eb8..5c40cda442b 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -89,8 +89,9 @@ public:
         {
             ConfigurationPtr configuration = base_configuration->clone();
             configuration->setPaths(metadata->getDataFiles());
+            std::string sample_string;
             return Storage::resolveSchemaFromData(
-                object_storage_, configuration, format_settings_, local_context);
+                object_storage_, configuration, format_settings_, sample_string, local_context);
         }
     }
 
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 78cdc442f64..a47049791ae 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -131,7 +131,7 @@ void ReadBufferIterator::setFormatName(const String & format_name)
     format = format_name;
 }
 
-String ReadBufferIterator::getLastFileName() const
+String ReadBufferIterator::getLastFilePath() const
 {
     if (current_object_info)
         return current_object_info->getPath();
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h
index 6eeb52ec2ed..b81aebb7b07 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.h
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.h
@@ -33,7 +33,7 @@ public:
 
     void setResultingSchema(const ColumnsDescription & columns) override;
 
-    String getLastFileName() const override;
+    String getLastFilePath() const override;
 
     void setFormatName(const String & format_name) override;
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index ae7c211330c..717f48983f3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -33,17 +33,22 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-
-bool checkIfHiveSettingEnabled(const ContextPtr & context, const std::string & storage_type_name)
+std::string StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context)
 {
-    if (storage_type_name == "s3")
-        return context->getSettings().s3_hive_partitioning;
-    else if (storage_type_name == "hdfs")
-        return context->getSettings().hdfs_hive_partitioning;
-    else if (storage_type_name == "azure")
-        return context->getSettings().azure_blob_storage_hive_partitioning;
-    else
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name);
+    auto file_iterator = StorageObjectStorageSource::createFileIterator(
+        configuration,
+        object_storage,
+        distributed_processing,
+        context,
+        {}, // predicate
+        metadata.getColumns().getAll(), // virtual_columns
+        nullptr, // read_keys
+        {} // file_progress_callback
+    );
+
+    if (auto file = file_iterator->next(0))
+        return file->getPath();
+    return "";
 }
 
 StorageObjectStorage::StorageObjectStorage(
@@ -66,7 +71,9 @@ StorageObjectStorage::StorageObjectStorage(
     , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName())))
 {
     ColumnsDescription columns{columns_};
-    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context);
+
+    std::string sample_path;
+    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, sample_path, context);
     configuration->check(context);
 
     StorageInMemoryMetadata metadata;
@@ -74,23 +81,13 @@ StorageObjectStorage::StorageObjectStorage(
     metadata.setConstraints(constraints_);
     metadata.setComment(comment);
 
-    auto file_iterator = StorageObjectStorageSource::createFileIterator(
-        configuration,
-        object_storage,
-        distributed_processing_,
-        context,
-        {}, // predicate
-        metadata.getColumns().getAll(), // virtual_columns
-        nullptr, // read_keys
-        {} // file_progress_callback
-    );
+    
+    if (sample_path.empty() && context->getSettings().use_hive_partitioning)
+        sample_path = getPathSample(metadata, context);
+    else if (!context->getSettings().use_hive_partitioning)
+        sample_path = "";
 
-    Strings paths;
-
-    if (checkIfHiveSettingEnabled(context, configuration->getTypeName()))
-        if (auto file = file_iterator->next(0))
-            paths = {file->getPath()};
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), paths));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), sample_path));
     setInMemoryMetadata(metadata);
 }
 
@@ -386,33 +383,36 @@ ColumnsDescription StorageObjectStorage::resolveSchemaFromData(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
+    std::string & sample_path,
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
-    return readSchemaFromFormat(configuration->format, format_settings, *iterator, context);
+    return readSchemaFromFormat(configuration->format, format_settings, *iterator, sample_path, context);
 }
 
 std::string StorageObjectStorage::resolveFormatFromData(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
+    std::string & sample_path,
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
-    return detectFormatAndReadSchema(format_settings, *iterator, context).second;
+    return detectFormatAndReadSchema(format_settings, *iterator, sample_path, context).second;
 }
 
 std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAndFormatFromData(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
+    std::string & sample_path,
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
-    auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context);
+    auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, sample_path, context);
     configuration->format = format;
     return std::pair(columns, format);
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index cf8ec113653..dd7ec31c970 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -42,6 +42,7 @@ public:
         size_t list_object_keys_size;
         bool throw_on_zero_files_match;
         bool ignore_non_existent_file;
+        bool use_hive_partitioning;
     };
 
     StorageObjectStorage(
@@ -100,23 +101,28 @@ public:
         const ObjectStoragePtr & object_storage,
         const ConfigurationPtr & configuration,
         const std::optional<FormatSettings> & format_settings,
+        std::string & sample_path,
         const ContextPtr & context);
 
     static std::string resolveFormatFromData(
         const ObjectStoragePtr & object_storage,
         const ConfigurationPtr & configuration,
         const std::optional<FormatSettings> & format_settings,
+        std::string & sample_path,
         const ContextPtr & context);
 
     static std::pair<ColumnsDescription, std::string> resolveSchemaAndFormatFromData(
         const ObjectStoragePtr & object_storage,
         const ConfigurationPtr & configuration,
         const std::optional<FormatSettings> & format_settings,
+        std::string & sample_path,
         const ContextPtr & context);
 
 protected:
     virtual void updateConfiguration(ContextPtr local_context);
 
+    std::string getPathSample(StorageInMemoryMetadata metadata, ContextPtr context);
+
     static std::unique_ptr<ReadBufferIterator> createReadBufferIterator(
         const ObjectStoragePtr & object_storage,
         const ConfigurationPtr & configuration,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 78f568d8ae2..0dc4b845a47 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -33,7 +33,8 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     , object_storage(object_storage_)
 {
     ColumnsDescription columns{columns_};
-    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, context_);
+    std::string sample_path;
+    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, sample_path, context_);
     configuration->check(context_);
 
     StorageInMemoryMetadata metadata;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index afb23961312..ecb3ff9d856 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -197,16 +197,6 @@ Chunk StorageObjectStorageSource::generate()
             const auto & filename = object_info->getFileName();
             chassert(object_info->metadata);
 
-            auto hive_map = VirtualColumnUtils::parsePartitionMapFromPath(object_info->getPath());
-            bool contains_virtual_column = std::any_of(hive_map.begin(), hive_map.end(),
-                [&](const auto& pair)
-                {
-                    return read_from_format_info.requested_virtual_columns.contains(pair.first);
-                });
-
-            if (!contains_virtual_column)
-                hive_map.clear(); // If we cannot find any virtual column in requested, we don't add any of them to chunk
-
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
                 chunk, read_from_format_info.requested_virtual_columns,
                 {
@@ -214,8 +204,7 @@ Chunk StorageObjectStorageSource::generate()
                     .size = object_info->metadata->size_bytes,
                     .filename = &filename,
                     .last_modified = object_info->metadata->last_modified,
-                    .hive_partitioning_map = hive_map
-                });
+                }, object_info->getPath());
             return chunk;
         }
 
diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp
index e49e14d2a0c..73410d959e0 100644
--- a/src/Storages/ObjectStorage/Utils.cpp
+++ b/src/Storages/ObjectStorage/Utils.cpp
@@ -49,19 +49,20 @@ void resolveSchemaAndFormat(
     ObjectStoragePtr object_storage,
     const StorageObjectStorage::ConfigurationPtr & configuration,
     std::optional<FormatSettings> format_settings,
+    std::string & sample_path,
     const ContextPtr & context)
 {
     if (columns.empty())
     {
         if (format == "auto")
             std::tie(columns, format) =
-                StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context);
+                StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, sample_path, context);
         else
-            columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context);
+            columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, sample_path, context);
     }
     else if (format == "auto")
     {
-        format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, context);
+        format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, sample_path, context);
     }
 
     if (!columns.hasOnlyOrdinary())
diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h
index 2077999df41..7ee14f50979 100644
--- a/src/Storages/ObjectStorage/Utils.h
+++ b/src/Storages/ObjectStorage/Utils.h
@@ -19,6 +19,7 @@ void resolveSchemaAndFormat(
     ObjectStoragePtr object_storage,
     const StorageObjectStorage::ConfigurationPtr & configuration,
     std::optional<FormatSettings> format_settings,
+    std::string & sample_path,
     const ContextPtr & context);
 
 }
diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
index 95265cde9ea..c12cdddeec7 100644
--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@@ -160,7 +160,8 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
     configuration->check(context_);
 
     ColumnsDescription columns{columns_};
-    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context_);
+    std::string sample_path;
+    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, sample_path, context_);
     configuration->check(context_);
 
     StorageInMemoryMetadata storage_metadata;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 0c32f29cb34..9751d596fff 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -502,7 +502,7 @@ namespace
             StorageFile::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
         }
 
-        String getLastFileName() const override
+        String getLastFilePath() const override
         {
             if (current_index != 0)
                 return paths[current_index - 1];
@@ -777,7 +777,7 @@ namespace
             format = format_name;
         }
 
-        String getLastFileName() const override
+        String getLastFilePath() const override
         {
             return last_read_file_path;
         }
@@ -880,10 +880,11 @@ std::pair<ColumnsDescription, String> StorageFile::getTableStructureAndFormatFro
     auto read_buffer_iterator = SingleReadBufferIterator(std::move(read_buf));
 
     ColumnsDescription columns;
+    std::string sample_path;
     if (format)
-        columns = readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context);
+        columns = readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context);
     else
-        std::tie(columns, format) = detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+        std::tie(columns, format) = detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
 
     peekable_read_buffer_from_fd = read_buffer_iterator.releaseBuffer();
     if (peekable_read_buffer_from_fd)
@@ -928,20 +929,21 @@ std::pair<ColumnsDescription, String> StorageFile::getTableStructureAndFormatFro
 
     }
 
+    std::string sample_path;
     if (archive_info)
     {
         ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context);
         if (format)
-            return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
+            return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context), *format};
 
-        return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+        return detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
     }
 
     ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context);
     if (format)
-        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context), *format};
 
-    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
 }
 
 ColumnsDescription StorageFile::getTableStructureFromFile(
@@ -1097,10 +1099,10 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     storage_metadata.setComment(args.comment);
     setInMemoryMetadata(storage_metadata);
 
-    Strings paths_for_virtuals;
-    if (args.getContext()->getSettingsRef().file_hive_partitioning)
-        paths_for_virtuals = paths;
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), paths_for_virtuals));
+    std::string path_for_virtuals;
+    if (args.getContext()->getSettingsRef().use_hive_partitioning && !paths.empty())
+        path_for_virtuals = paths[0];
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), path_for_virtuals, format_settings.value_or(FormatSettings{})));
 }
 
 
@@ -1442,14 +1444,9 @@ Chunk StorageFileSource::generate()
                 chunk_size = input_format->getApproxBytesReadForChunk();
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
 
-            std::map<std::string, std::string> hive_map;
-            if (getContext()->getSettingsRef().file_hive_partitioning)
-            {
-                hive_map = VirtualColumnUtils::parsePartitionMapFromPath(current_path);
-
-                for (const auto& item : hive_map)
-                    requested_virtual_columns.push_back(NameAndTypePair(item.first, std::make_shared<DataTypeString>()));
-            }
+            std::string hive_partitioning_path;
+            if (getContext()->getSettingsRef().use_hive_partitioning)
+                hive_partitioning_path = current_path;
 
             /// Enrich with virtual columns.
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
@@ -1459,8 +1456,7 @@ Chunk StorageFileSource::generate()
                     .size = current_file_size,
                     .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
                     .last_modified = current_file_last_modified,
-                    .hive_partitioning_map = hive_map
-                });
+                }, hive_partitioning_path);
 
             return chunk;
         }
@@ -1636,16 +1632,6 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
         storage->distributed_processing);
 }
 
-void addPartitionColumnsToInfoHeader(Strings paths, ReadFromFormatInfo & info)
-{
-    for (const auto& path : paths)
-    {
-        auto map = VirtualColumnUtils::parsePartitionMapFromPath(path);
-        for (const auto& item : map)
-            info.source_header.insertUnique(ColumnWithTypeAndName(std::make_shared<DataTypeString>(), item.first));
-    }
-}
-
 void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     createIterator(nullptr);
@@ -1665,9 +1651,6 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
         paths = storage->paths;
     }
 
-    if (getContext()->getSettingsRef().file_hive_partitioning)
-        addPartitionColumnsToInfoHeader(paths, info);
-
     if (max_num_streams > files_to_read)
         num_streams = files_to_read;
 
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index f6374701fc2..59c5465a381 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -153,10 +153,10 @@ IStorageURLBase::IStorageURLBase(
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 
-    Strings uri_for_partitioning;
-    if (context_->getSettingsRef().url_hive_partitioning)
-        uri_for_partitioning = {uri};
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), uri_for_partitioning));
+    std::string uri_for_partitioning;
+    if (context_->getSettingsRef().use_hive_partitioning)
+        uri_for_partitioning = uri;
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), uri_for_partitioning, format_settings.value_or(FormatSettings{})));
 }
 
 
@@ -415,9 +415,9 @@ Chunk StorageURLSource::generate()
             size_t chunk_size = 0;
             if (input_format)
                 chunk_size = input_format->getApproxBytesReadForChunk();
-            std::map<std::string, std::string> hive_map;
-            if (getContext()->getSettingsRef().url_hive_partitioning)
-                hive_map = VirtualColumnUtils::parsePartitionMapFromPath(curr_uri.getPath());
+            std::string hive_partitioning_path;
+            if (getContext()->getSettingsRef().use_hive_partitioning)
+                hive_partitioning_path = curr_uri.getPath();
 
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
@@ -425,8 +425,7 @@ Chunk StorageURLSource::generate()
                 {
                     .path = curr_uri.getPath(),
                     .size = current_file_size,
-                    .hive_partitioning_map = hive_map
-                });
+                }, hive_partitioning_path);
             return chunk;
         }
 
@@ -859,7 +858,7 @@ namespace
             format = format_name;
         }
 
-        String getLastFileName() const override { return current_url_option; }
+        String getLastFilePath() const override { return current_url_option; }
 
         bool supportsLastReadBufferRecreation() const override { return true; }
 
@@ -960,9 +959,10 @@ std::pair<ColumnsDescription, String> IStorageURLBase::getTableStructureAndForma
         urls_to_check = {uri};
 
     ReadBufferIterator read_buffer_iterator(urls_to_check, format, compression_method, headers, format_settings, context);
+    std::string sample_path;
     if (format)
-        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
-    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context), *format};
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
 }
 
 ColumnsDescription IStorageURLBase::getTableStructureFromData(
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 0b79e3b7a16..379b14d8e51 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -38,6 +38,7 @@
 #include <IO/WriteHelpers.h>
 #include <Common/re2.h>
 #include <Common/typeid_cast.h>
+#include <Formats/SchemaInferenceUtils.h>
 #include "Functions/FunctionsLogical.h"
 #include "Functions/IFunction.h"
 #include "Functions/IFunctionAdaptors.h"
@@ -115,22 +116,19 @@ NameSet getVirtualNamesForFileLikeStorage()
     return {"_path", "_file", "_size", "_time"};
 }
 
-Strings parseVirtualColumnNameFromPath(const std::string & path)
+std::map<std::string, std::string> parseFromPath(const std::string& path)
 {
     std::string pattern = "/([^/]+)=([^/]+)";
-    // Map to store the key-value pairs
-    std::map<std::string, std::string> key_values;
-
     re2::StringPiece input_piece(path);
-    std::string key;
-    Strings result;
-    while (RE2::FindAndConsume(&input_piece, pattern, &key))
-        result.push_back(key);
 
-    return result;
+    std::map<std::string, std::string> key_values;
+    std::string key, value;
+    while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
+        key_values["_" + key] = value;
+    return key_values;
 }
 
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, Strings paths)
+VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, std::string path, FormatSettings settings)
 {
     VirtualColumnsDescription desc;
 
@@ -147,11 +145,13 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
 
-    for (const auto& path : paths)
+    auto map = parseFromPath(path);
+    for (const auto& item : map)
     {
-        auto names = parseVirtualColumnNameFromPath(path);
-        for (const auto& name : names)
-            add_virtual("_" + name, std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
+        auto type = tryInferDataTypeForSingleField(item.second, settings);
+        if (type == nullptr)
+            type = std::make_shared<DataTypeString>();
+        add_virtual(item.first, std::make_shared<DataTypeLowCardinality>(type));
     }
 
     return desc;
@@ -213,25 +213,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
     return block.getByName("_idx").column;
 }
 
-std::map<std::string, std::string> parsePartitionMapFromPath(const std::string & path)
-{
-    std::string pattern = "/([^/]+)=([^/]+)";  // Regex to capture key=value pairs
-    // Map to store the key-value pairs
-    std::map<std::string, std::string> key_values;
-
-    re2::StringPiece input_piece(path);
-    std::string key;
-    std::string value;
-    while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
-        key_values["_" + key] = value;
-
-    return key_values;
-}
-
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values)
+    VirtualsForFileLikeStorage virtual_values, const std::string & hive_partitioning_path)
 {
+    auto hive_map = parseFromPath(hive_partitioning_path);
     for (const auto & virtual_column : requested_virtual_columns)
     {
         if (virtual_column.name == "_path")
@@ -265,13 +251,22 @@ void addRequestedFileLikeStorageVirtualsToChunk(
             else
                 chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst());
         }
-        else
+        else if (!hive_map.empty())
         {
-            auto it = virtual_values.hive_partitioning_map.find(virtual_column.getNameInStorage());
-            if (it != virtual_values.hive_partitioning_map.end())
+            bool contains_virtual_column = std::any_of(hive_map.begin(), hive_map.end(),
+                [&](const auto& pair)
+                {
+                    return requested_virtual_columns.contains(pair.first);
+                });
+
+            if (!contains_virtual_column)
+                hive_map.clear(); // If we cannot find any virtual column in requested, we don't add any of them to chunk
+
+            auto it = hive_map.find(virtual_column.getNameInStorage());
+            if (it != hive_map.end())
             {
                 chunk.addColumn(virtual_column.getTypeInStorage()->createColumnConst(chunk.getNumRows(), it->second)->convertToFullColumnIfConst());
-                virtual_values.hive_partitioning_map.erase(it);
+                hive_map.erase(it);
             }
         }
     }
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index f9b49cc48ed..72922be60bd 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -5,6 +5,7 @@
 #include <Parsers/IAST_fwd.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/VirtualColumnsDescription.h>
+#include <Formats/FormatSettings.h>
 
 #include <map>
 #include <string>
@@ -49,7 +50,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
 }
 
 NameSet getVirtualNamesForFileLikeStorage();
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, Strings paths = {});
+VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, std::string path = "", FormatSettings settings = FormatSettings());
 
 ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
 
@@ -76,15 +77,13 @@ struct VirtualsForFileLikeStorage
     std::optional<size_t> size { std::nullopt };
     const String * filename { nullptr };
     std::optional<Poco::Timestamp> last_modified { std::nullopt };
-    std::map<std::string, std::string> hive_partitioning_map {};
-
 };
 
-std::map<std::string, std::string> parsePartitionMapFromPath(const std::string & path);
+std::map<std::string, std::string> parseFromPath(const std::string& path);
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values);
+    VirtualsForFileLikeStorage virtual_values, const std::string & hive_partitioning_path = "");
 }
 
 }
diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp
index ad2a142a140..66152cb0c91 100644
--- a/src/TableFunctions/TableFunctionFormat.cpp
+++ b/src/TableFunctions/TableFunctionFormat.cpp
@@ -85,9 +85,10 @@ ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr conte
     if (structure == "auto")
     {
         SingleReadBufferIterator read_buffer_iterator(std::make_unique<ReadBufferFromString>(data));
+        std::string sample_path;
         if (format == "auto")
-            return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context).first;
-        return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context);
+            return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, sample_path, context).first;
+        return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, sample_path, context);
     }
     return parseColumnsListFromString(structure, context);
 }
@@ -131,11 +132,12 @@ StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, Con
     String format_name = format;
     if (structure == "auto")
     {
+        std::string sample_path;
         SingleReadBufferIterator read_buffer_iterator(std::make_unique<ReadBufferFromString>(data));
         if (format_name == "auto")
-            std::tie(columns, format_name) = detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context);
+            std::tie(columns, format_name) = detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, sample_path, context);
         else
-            columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context);
+            columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, sample_path, context);
     }
     else
     {
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index 550d9cc799b..39392a4c44c 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -84,7 +84,8 @@ ColumnsDescription TableFunctionObjectStorage<
         context->checkAccess(getSourceAccessType());
         ColumnsDescription columns;
         auto storage = getObjectStorage(context, !is_insert_query);
-        resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context);
+        std::string sample_path;
+        resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, sample_path, context);
         return columns;
     }
     else
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/__init__.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml
deleted file mode 100644
index ffa4673c9ee..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_azure.xml
+++ /dev/null
@@ -1,39 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <simple_cluster>
-            <shard>
-                <replica>
-                    <host>node_0</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>node_1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>node_2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </simple_cluster>
-
-        <cluster_non_existent_port>
-            <shard>
-                <replica>
-                    <host>node_0</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>node_1</host>
-                    <port>19000</port>
-                </replica>
-            </shard>
-        </cluster_non_existent_port>
-
-    </remote_servers>
-    <macros>
-        <default_cluster_macro>simple_cluster</default_cluster_macro>
-    </macros>
-</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml
deleted file mode 100644
index b99b21ea40b..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/cluster_hdfs.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <cluster_non_existent_port>
-            <shard>
-                <replica>
-                    <host>node1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>node1</host>
-                    <port>19000</port>
-                </replica>
-            </shard>
-        </cluster_non_existent_port>
-
-        <test_cluster_two_shards>
-            <shard>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards>
-    </remote_servers>
-</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml
deleted file mode 100644
index a39badbf8ec..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/disable_profilers_azure.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-<!-- Sometime azurite is super slow, profiler make it even worse -->
-<clickhouse>
-    <profiles>
-        <default>
-            <query_profiler_real_time_period_ns>0</query_profiler_real_time_period_ns>
-            <query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
-        </default>
-    </profiles>
-</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml
deleted file mode 100644
index c2e11b47a5e..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/macro_hdfs.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<clickhouse>
-    <macros>
-        <default_cluster_macro>test_cluster_two_shards</default_cluster_macro>
-    </macros>
-</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml
deleted file mode 100644
index bd7f9ff97f1..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/named_collections_azure.xml
+++ /dev/null
@@ -1,14 +0,0 @@
-<clickhouse>
-    <named_collections>
-        <azure_conf1>
-            <container>cont</container>
-            <blob_path>test_simple_write_named.csv</blob_path>
-            <structure>key UInt64, data String</structure>
-            <format>CSV</format>
-        </azure_conf1>
-        <azure_conf2>
-            <account_name>devstoreaccount1</account_name>
-            <account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
-        </azure_conf2>
-    </named_collections>
-</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml
deleted file mode 100644
index e2168ecd06d..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_azure.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<clickhouse>
-    <schema_inference_cache_max_elements_for_azure>2</schema_inference_cache_max_elements_for_azure>
-</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml
deleted file mode 100644
index 37639649b5f..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/schema_cache_hdfs.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<clickhouse>
-    <schema_inference_cache_max_elements_for_hdfs>2</schema_inference_cache_max_elements_for_hdfs>
-</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml b/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml
deleted file mode 100644
index 4b6ba057ecb..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/configs/users_azure.xml
+++ /dev/null
@@ -1,9 +0,0 @@
-<clickhouse>
-    <users>
-        <default>
-            <password></password>
-            <profile>default</profile>
-            <named_collection_control>1</named_collection_control>
-        </default>
-    </users>
-</clickhouse>
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
deleted file mode 100644
index 0be697821f0..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_azure.py
+++ /dev/null
@@ -1,219 +0,0 @@
-#!/usr/bin/env python3
-
-import pytest
-import time
-
-from helpers.cluster import ClickHouseCluster, is_arm
-import re
-
-from azure.storage.blob import BlobServiceClient
-from helpers.cluster import ClickHouseCluster, ClickHouseInstance
-
-if is_arm():
-    pytestmark = pytest.mark.skip
-
-
-@pytest.fixture(scope="module")
-def cluster():
-    try:
-        cluster = ClickHouseCluster(__file__)
-        cluster.add_instance(
-            "node",
-            main_configs=[
-                "configs/named_collections_azure.xml",
-                "configs/schema_cache_azure.xml",
-            ],
-            user_configs=[
-                "configs/disable_profilers_azure.xml",
-                "configs/users_azure.xml",
-            ],
-            with_azurite=True,
-        )
-        cluster.start()
-        container_client = cluster.blob_service_client.get_container_client("cont")
-        container_client.create_container()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def azure_query(
-    node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None
-):
-    for i in range(try_num):
-        try:
-            if expect_error:
-                return node.query_and_get_error(query, settings=settings)
-            else:
-                return node.query(query, settings=settings)
-        except Exception as ex:
-            retriable_errors = [
-                "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
-                "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected",
-                "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response",
-                "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read",
-                "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
-                "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected",
-                "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response",
-                "Azure::Core::Http::TransportException, e.what() = Error while polling for socket ready read",
-            ]
-            retry = False
-            for error in retriable_errors:
-                if error in str(ex):
-                    retry = True
-                    print(f"Try num: {i}. Having retriable error: {ex}")
-                    time.sleep(i)
-                    break
-            if not retry or i == try_num - 1:
-                raise Exception(ex)
-            if query_on_retry is not None:
-                node.query(query_on_retry)
-            continue
-
-
-def get_azure_file_content(filename, port):
-    container_name = "cont"
-    connection_string = (
-        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
-        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
-        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
-    )
-    blob_service_client = BlobServiceClient.from_connection_string(
-        str(connection_string)
-    )
-    container_client = blob_service_client.get_container_client(container_name)
-    blob_client = container_client.get_blob_client(filename)
-    download_stream = blob_client.download_blob()
-    return download_stream.readall().decode("utf-8")
-
-
-@pytest.fixture(autouse=True, scope="function")
-def delete_all_files(cluster):
-    port = cluster.env_variables["AZURITE_PORT"]
-    connection_string = (
-        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
-        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
-        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
-    )
-    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
-    containers = blob_service_client.list_containers()
-    for container in containers:
-        container_client = blob_service_client.get_container_client(container)
-        blob_list = container_client.list_blobs()
-        for blob in blob_list:
-            print(blob)
-            blob_client = container_client.get_blob_client(blob)
-            blob_client.delete_blob()
-
-        assert len(list(container_client.list_blobs())) == 0
-
-    yield
-
-
-def test_azure_partitioning_with_one_parameter(cluster):
-    # type: (ClickHouseCluster) -> None
-    node = cluster.instances["node"]  # type: ClickHouseInstance
-    table_format = "column1 String, column2 String"
-    values = f"('Elizabeth', 'Gordon')"
-    path = "a/column1=Elizabeth/sample.csv"
-
-    azure_query(
-        node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
-    )
-
-    query = (
-        f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}')"
-    )
-    assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
-    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
-            bucket="cont", max_path=path
-        )
-    ]
-
-    query = (
-        f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
-    )
-    assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
-    ).splitlines() == ["Gordon"]
-
-
-def test_azure_partitioning_with_two_parameters(cluster):
-    # type: (ClickHouseCluster) -> None
-    node = cluster.instances["node"]  # type: ClickHouseInstance
-    table_format = "column1 String, column2 String"
-    values_1 = f"('Elizabeth', 'Gordon')"
-    values_2 = f"('Emilia', 'Gregor')"
-    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
-
-    azure_query(
-        node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
-    )
-
-    query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
-    )
-    assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
-    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
-            bucket="cont", max_path=path
-        )
-    ]
-
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
-    )
-    assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
-
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
-    )
-    assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
-
-
-def test_azure_partitioning_without_setting(cluster):
-    # type: (ClickHouseCluster) -> None
-    node = cluster.instances["node"]  # type: ClickHouseInstance
-    table_format = "column1 String, column2 String"
-    values_1 = f"('Elizabeth', 'Gordon')"
-    values_2 = f"('Emilia', 'Gregor')"
-    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
-
-    azure_query(
-        node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
-    )
-
-    query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
-    )
-    pattern = re.compile(
-        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
-    )
-
-    with pytest.raises(Exception, match=pattern):
-        azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 0})
diff --git a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py b/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
deleted file mode 100644
index 4667d18688a..00000000000
--- a/tests/integration/test_hive_style_partitioning_hdfs_azure/test_hdfs.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-
-import pytest
-
-from helpers.client import QueryRuntimeException
-from helpers.cluster import ClickHouseCluster, is_arm
-import re
-
-from helpers.cluster import ClickHouseCluster
-
-if is_arm():
-    pytestmark = pytest.mark.skip
-
-cluster = ClickHouseCluster(__file__)
-node1 = cluster.add_instance(
-    "node1",
-    main_configs=[
-        "configs/macro_hdfs.xml",
-        "configs/schema_cache_hdfs.xml",
-        "configs/cluster_hdfs.xml",
-    ],
-    with_hdfs=True,
-)
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def test_hdfs_partitioning_with_one_parameter(started_cluster):
-    hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
-    assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
-
-    r = node1.query(
-        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
-        settings={"hdfs_hive_partitioning": 1},
-    )
-    assert r == f"Elizabeth\n"
-
-
-def test_hdfs_partitioning_with_two_parameters(started_cluster):
-    hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(
-        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
-    )
-    assert (
-        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
-        == f"Elizabeth\tGordon\n"
-    )
-
-    r = node1.query(
-        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-        settings={"hdfs_hive_partitioning": 1},
-    )
-    assert r == f"Gordon\n"
-
-
-def test_hdfs_partitioning_without_setting(started_cluster):
-    hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(
-        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
-    )
-    assert (
-        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
-        == f"Elizabeth\tGordon\n"
-    )
-    pattern = re.compile(
-        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
-    )
-
-    with pytest.raises(QueryRuntimeException, match=pattern):
-        node1.query(
-            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-            settings={"hdfs_hive_partitioning": 0},
-        )
-
-
-if __name__ == "__main__":
-    cluster.start()
-    input("Cluster created, press any key to destroy...")
-    cluster.shutdown()
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 20b004a7605..893df6d23aa 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -5,6 +5,7 @@ import json
 import logging
 import os
 import io
+import re
 import random
 import threading
 import time
@@ -1462,3 +1463,112 @@ def test_insert_create_new_file(cluster):
     assert TSV(res) == TSV(
         "test_create_new_file.csv\t1\ntest_create_new_file.1.csv\t2\n"
     )
+
+
+def test_hive_partitioning_with_one_parameter(cluster):
+    # type: (ClickHouseCluster) -> None
+    node = cluster.instances["node"]  # type: ClickHouseInstance
+    table_format = "column1 String, column2 String"
+    values = f"('Elizabeth', 'Gordon')"
+    path = "a/column1=Elizabeth/sample.csv"
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+    )
+
+    query = (
+        f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}')"
+    )
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == [
+        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
+            bucket="cont", max_path=path
+        )
+    ]
+
+    query = (
+        f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+    )
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == ["Gordon"]
+
+
+def test_hive_partitioning_with_two_parameters(cluster):
+    # type: (ClickHouseCluster) -> None
+    node = cluster.instances["node"]  # type: ClickHouseInstance
+    table_format = "column1 String, column2 String"
+    values_1 = f"('Elizabeth', 'Gordon')"
+    values_2 = f"('Emilia', 'Gregor')"
+    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+    )
+
+    query = (
+        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+    )
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == [
+        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
+            bucket="cont", max_path=path
+        )
+    ]
+
+    query = (
+        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
+    )
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == ["Elizabeth"]
+
+    query = (
+        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
+    )
+    assert azure_query(
+        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+    ).splitlines() == ["Elizabeth"]
+
+
+def test_hive_partitioning_without_setting(cluster):
+    # type: (ClickHouseCluster) -> None
+    node = cluster.instances["node"]  # type: ClickHouseInstance
+    table_format = "column1 String, column2 String"
+    values_1 = f"('Elizabeth', 'Gordon')"
+    values_2 = f"('Emilia', 'Gregor')"
+    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
+
+    azure_query(
+        node,
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+    )
+
+    query = (
+        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
+        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+    )
+    pattern = re.compile(
+        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
+    )
+
+    with pytest.raises(Exception, match=pattern):
+        azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 0})
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 47d8f44c0b7..8071b520a4f 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -3,6 +3,7 @@ import os
 import pytest
 import time
 from helpers.cluster import ClickHouseCluster, is_arm
+from helpers.client import QueryRuntimeException
 from helpers.test_tools import TSV
 from pyhdfs import HdfsClient
 
@@ -1180,6 +1181,54 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
     assert int(result) == 44
 
 
+def test_hive_partitioning_with_one_parameter(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
+    assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
+
+    r = node1.query(
+        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        settings={"hdfs_hive_partitioning": 1},
+    )
+    assert r == f"Elizabeth\n"
+
+
+def test_hive_partitioning_with_two_parameters(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
+        == f"Elizabeth\tGordon\n"
+    )
+
+    r = node1.query(
+        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+        settings={"hdfs_hive_partitioning": 1},
+    )
+    assert r == f"Gordon\n"
+
+
+def test_hive_partitioning_without_setting(started_cluster):
+    hdfs_api = started_cluster.hdfs_api
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
+        == f"Elizabeth\tGordon\n"
+    )
+    pattern = re.compile(
+        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
+    )
+
+    with pytest.raises(QueryRuntimeException, match=pattern):
+        node1.query(
+            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            settings={"hdfs_hive_partitioning": 0},
+        )
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")

From 3fb50aa5d8560f3cd5f4fc9b35bfa47a60d2ca80 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Fri, 5 Jul 2024 13:28:47 +0000
Subject: [PATCH 073/265] style fix

---
 src/Storages/ObjectStorage/StorageObjectStorage.cpp | 1 -
 tests/integration/test_storage_hdfs/test.py         | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 717f48983f3..4b5b514e67d 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -81,7 +81,6 @@ StorageObjectStorage::StorageObjectStorage(
     metadata.setConstraints(constraints_);
     metadata.setComment(comment);
 
-    
     if (sample_path.empty() && context->getSettings().use_hive_partitioning)
         sample_path = getPathSample(metadata, context);
     else if (!context->getSettings().use_hive_partitioning)
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 8071b520a4f..da46756841d 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1229,6 +1229,7 @@ def test_hive_partitioning_without_setting(started_cluster):
             settings={"hdfs_hive_partitioning": 0},
         )
 
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")

From 6c4c17f119fb95d66f894a92ce8c91fa6664ff5b Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 5 Jul 2024 16:44:26 +0200
Subject: [PATCH 074/265] remove use_hive_partitioning from query settings

---
 src/Storages/ObjectStorage/StorageObjectStorage.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 3fbfc3aacd7..f97d2620fe5 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -43,7 +43,6 @@ public:
         size_t list_object_keys_size;
         bool throw_on_zero_files_match;
         bool ignore_non_existent_file;
-        bool use_hive_partitioning;
     };
 
     StorageObjectStorage(

From 9064bb1b8389e301f45bc78a5365665292f51c6e Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 5 Jul 2024 19:36:59 +0200
Subject: [PATCH 075/265] fix settings in tests

---
 .../test_storage_azure_blob_storage/test.py          | 12 ++++++------
 tests/integration/test_storage_hdfs/test.py          |  6 +++---
 .../0_stateless/03203_hive_style_partitioning.sh     | 12 ++++++------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index f2a1f9e35a9..6966abfee4f 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1484,7 +1484,7 @@ def test_hive_partitioning_with_one_parameter(cluster):
         f"blob_path='{path}', format='CSV', structure='{table_format}')"
     )
     assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+        node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == [
         "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
             bucket="cont", max_path=path
@@ -1497,7 +1497,7 @@ def test_hive_partitioning_with_one_parameter(cluster):
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
     )
     assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+        node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == ["Gordon"]
 
 
@@ -1521,7 +1521,7 @@ def test_hive_partitioning_with_two_parameters(cluster):
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
     )
     assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+        node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == [
         "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
             bucket="cont", max_path=path
@@ -1534,7 +1534,7 @@ def test_hive_partitioning_with_two_parameters(cluster):
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
     )
     assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+        node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == ["Elizabeth"]
 
     query = (
@@ -1543,7 +1543,7 @@ def test_hive_partitioning_with_two_parameters(cluster):
         f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
     )
     assert azure_query(
-        node, query, settings={"azure_blob_storage_hive_partitioning": 1}
+        node, query, settings={"use_hive_partitioning": 1}
     ).splitlines() == ["Elizabeth"]
 
 
@@ -1571,4 +1571,4 @@ def test_hive_partitioning_without_setting(cluster):
     )
 
     with pytest.raises(Exception, match=pattern):
-        azure_query(node, query, settings={"azure_blob_storage_hive_partitioning": 0})
+        azure_query(node, query, settings={"use_hive_partitioning": 0})
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index da46756841d..aa3efb8ba4a 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -1188,7 +1188,7 @@ def test_hive_partitioning_with_one_parameter(started_cluster):
 
     r = node1.query(
         "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
-        settings={"hdfs_hive_partitioning": 1},
+        settings={"use_hive_partitioning": 1},
     )
     assert r == f"Elizabeth\n"
 
@@ -1205,7 +1205,7 @@ def test_hive_partitioning_with_two_parameters(started_cluster):
 
     r = node1.query(
         "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-        settings={"hdfs_hive_partitioning": 1},
+        settings={"use_hive_partitioning": 1},
     )
     assert r == f"Gordon\n"
 
@@ -1226,7 +1226,7 @@ def test_hive_partitioning_without_setting(started_cluster):
     with pytest.raises(QueryRuntimeException, match=pattern):
         node1.query(
             f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-            settings={"hdfs_hive_partitioning": 0},
+            settings={"use_hive_partitioning": 0},
         )
 
 
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 98c039f3454..544fd17ffff 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -9,7 +9,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
 
 
 $CLICKHOUSE_LOCAL -n -q """
-set file_hive_partitioning = 1;
+set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
@@ -31,7 +31,7 @@ SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_exi
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
 
 $CLICKHOUSE_LOCAL -n -q """
-set file_hive_partitioning = 0;
+set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
@@ -41,7 +41,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
 
 
 $CLICKHOUSE_LOCAL -n -q """
-set url_hive_partitioning = 1;
+set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
@@ -63,7 +63,7 @@ SELECT *, _non_existing_column FROM url('http://localhost:11111/test/partitionin
 SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
 
 $CLICKHOUSE_LOCAL -n -q """
-set url_hive_partitioning = 0;
+set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
@@ -73,7 +73,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
 
 
 $CLICKHOUSE_LOCAL -n -q """
-set s3_hive_partitioning = 1;
+set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
@@ -96,7 +96,7 @@ SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=*/s
 """
 
 $CLICKHOUSE_LOCAL -n -q """
-set s3_hive_partitioning = 0;
+set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"

From ebb10d7f8fe16e533593178a1778632c00a3c1b7 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sat, 6 Jul 2024 02:12:01 +0000
Subject: [PATCH 076/265] add rebuild option in projection and LWD

---
 src/Core/Settings.h                           |  2 +-
 src/Core/SettingsChangesHistory.cpp           |  2 +-
 src/Core/SettingsEnums.cpp                    |  3 +-
 src/Core/SettingsEnums.h                      |  1 +
 src/Interpreters/InterpreterDeleteQuery.cpp   | 56 +++++++++++++++----
 ...61_lightweight_delete_projection.reference |  3 +
 .../03161_lightweight_delete_projection.sql   | 27 +++++++++
 7 files changed, 80 insertions(+), 14 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4a343d864db..bd691fe0dee 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -612,7 +612,7 @@ class IColumn;
     M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
     M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
     M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \
-    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \
+    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection of this table then do lightweight delete, or do lightweight delete then rebuild projections.", 0) \
     M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
     M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
     M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 6af6b4b15aa..951dd4d74f3 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -61,7 +61,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
               {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
               {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
-              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
+              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection of this table then do lightweight delete, or do lightweight delete then rebuild projections."},
               {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
               }},
     {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 82e7d6db410..6c000d83254 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -175,7 +175,8 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU
 
 IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS,
     {{"throw", LightweightMutationProjectionMode::THROW},
-     {"drop", LightweightMutationProjectionMode::DROP}})
+     {"drop", LightweightMutationProjectionMode::DROP},
+     {"rebuild", LightweightMutationProjectionMode::REBUILD}})
 
 IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS)
 
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 67fbce31be8..e0572df1d6a 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -343,6 +343,7 @@ enum class LightweightMutationProjectionMode : uint8_t
 {
     THROW,
     DROP,
+    REBUILD,
 };
 
 DECLARE_SETTING_ENUM(LightweightMutationProjectionMode)
diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp
index 39d5d9e9cef..7af539082f5 100644
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@@ -117,26 +117,18 @@ BlockIO InterpreterDeleteQuery::execute()
         {
             auto context = Context::createCopy(getContext());
             auto mode = context->getSettingsRef().lightweight_mutation_projection_mode;
-            if (mode == LightweightMutationProjectionMode::THROW)
-            {
-                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                    "DELETE query is not supported for table {} as it has projections. "
-                    "User should drop all the projections manually before running the query",
-                    table->getStorageID().getFullTableName());
-            }
-            else if (mode == LightweightMutationProjectionMode::DROP)
+
+            auto dropOrClearProjections = [&](bool isDrop)
             {
                 std::vector<String> all_projections = metadata_snapshot->projections.getAllRegisteredNames();
 
-                context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync));
-
                 /// Drop projections first so that lightweight delete can be performed.
                 for (const auto & projection : all_projections)
                 {
                     String alter_query =
                         "ALTER TABLE " + table->getStorageID().getFullTableName()
                         + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
-                        + " DROP PROJECTION IF EXISTS " + projection;
+                        + (isDrop ? " DROP" : " CLEAR") +" PROJECTION " + projection;
 
                     ParserAlterQuery parser;
                     ASTPtr alter_ast = parseQuery(
@@ -151,6 +143,48 @@ BlockIO InterpreterDeleteQuery::execute()
                     InterpreterAlterQuery alter_interpreter(alter_ast, context);
                     alter_interpreter.execute();
                 }
+
+                return all_projections;
+            };
+
+            if (mode == LightweightMutationProjectionMode::THROW)
+            {
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                    "DELETE query is not supported for table {} as it has projections. "
+                    "User should drop all the projections manually before running the query",
+                    table->getStorageID().getFullTableName());
+            }
+            else if (mode == LightweightMutationProjectionMode::DROP)
+            {
+                dropOrClearProjections(true);
+            }
+            else if (mode == LightweightMutationProjectionMode::REBUILD)
+            {
+                std::vector<String> all_projections{dropOrClearProjections(false)};
+                BlockIO res = lightweightDelete();
+
+                for (const auto & projection : all_projections)
+                {
+                    String alter_query =
+                        "ALTER TABLE " + table->getStorageID().getFullTableName()
+                        + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
+                        + " MATERIALIZE PROJECTION " + projection;
+
+                    ParserAlterQuery parser;
+                    ASTPtr alter_ast = parseQuery(
+                        parser,
+                        alter_query.data(),
+                        alter_query.data() + alter_query.size(),
+                        "ALTER query",
+                        0,
+                        DBMS_DEFAULT_MAX_PARSER_DEPTH,
+                        DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
+
+                    InterpreterAlterQuery alter_interpreter(alter_ast, context);
+                    alter_interpreter.execute();
+                }
+
+                return res;
             }
             else
             {
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index c5a6cbab0bc..307d3cb53fc 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -1,2 +1,5 @@
 1231	John	33
 8888	Alice	50
+6666	Ksenia	48
+8888	Alice	50
+p	users	3
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index b189388e356..fb32646b46a 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -19,6 +19,8 @@ DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode
 
 DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop';
 
+SYSTEM FLUSH LOGS;
+
 -- expecting no projection
 SELECT
     name,
@@ -29,3 +31,28 @@ WHERE (database = currentDatabase()) AND (`table` = 'users');
 SELECT * FROM users ORDER BY uid;
 
 DROP TABLE users;
+
+CREATE TABLE users (
+    uid Int16,
+    name String,
+    age Int16,
+    projection p (select * order by age)
+) ENGINE = MergeTree order by uid;
+
+INSERT INTO users VALUES (1231, 'John', 33), (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+
+DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'rebuild';
+
+SELECT * FROM users ORDER BY uid;
+
+SYSTEM FLUSH LOGS;
+
+-- expecting projection p with 3 rows is active
+SELECT
+    name,
+    `table`,
+    rows,
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND active = 1;
+
+DROP TABLE users;
\ No newline at end of file

From 6e5e680797f5b2147455826e4e223c27be5039a6 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 6 Jul 2024 05:42:21 +0000
Subject: [PATCH 077/265] bump libprotobuf-mutator, fix build

---
 contrib/libprotobuf-mutator                   |  2 +-
 src/AggregateFunctions/fuzzers/CMakeLists.txt |  2 +-
 ..._function_state_deserialization_fuzzer.cpp | 24 +------------------
 src/Core/fuzzers/CMakeLists.txt               |  2 +-
 src/Core/fuzzers/names_and_types_fuzzer.cpp   | 22 -----------------
 src/DataTypes/fuzzers/CMakeLists.txt          |  2 +-
 .../data_type_deserialization_fuzzer.cpp      | 22 -----------------
 src/Formats/fuzzers/CMakeLists.txt            |  2 +-
 src/Formats/fuzzers/format_fuzzer.cpp         | 21 ----------------
 src/Storages/fuzzers/CMakeLists.txt           |  2 +-
 .../fuzzers/columns_description_fuzzer.cpp    | 21 ----------------
 11 files changed, 7 insertions(+), 115 deletions(-)

diff --git a/contrib/libprotobuf-mutator b/contrib/libprotobuf-mutator
index a304ec48dcf..b922c8ab900 160000
--- a/contrib/libprotobuf-mutator
+++ b/contrib/libprotobuf-mutator
@@ -1 +1 @@
-Subproject commit a304ec48dcf15d942607032151f7e9ee504b5dcf
+Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa
diff --git a/src/AggregateFunctions/fuzzers/CMakeLists.txt b/src/AggregateFunctions/fuzzers/CMakeLists.txt
index 907a275b4b3..1ce0c52feb7 100644
--- a/src/AggregateFunctions/fuzzers/CMakeLists.txt
+++ b/src/AggregateFunctions/fuzzers/CMakeLists.txt
@@ -1,2 +1,2 @@
 clickhouse_add_executable(aggregate_function_state_deserialization_fuzzer aggregate_function_state_deserialization_fuzzer.cpp ${SRCS})
-target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions)
+target_link_libraries(aggregate_function_state_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)
diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
index a956d9906bc..31fc93e4288 100644
--- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
+++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
@@ -12,33 +12,11 @@
 
 #include <Interpreters/Context.h>
 
-#include <Functions/CastOverloadResolver.h>
-
+#include <AggregateFunctions/IAggregateFunction.h>
 #include <AggregateFunctions/registerAggregateFunctions.h>
 
 #include <base/scope_guard.h>
 
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-class IFunctionBase;
-using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
-
-FunctionBasePtr createFunctionBaseCast(
-    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for aggregate_function_state_deserialization_fuzzer");
-}
-
-}
-
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/Core/fuzzers/CMakeLists.txt b/src/Core/fuzzers/CMakeLists.txt
index 51db6fa0b53..61d6b9629eb 100644
--- a/src/Core/fuzzers/CMakeLists.txt
+++ b/src/Core/fuzzers/CMakeLists.txt
@@ -1,2 +1,2 @@
 clickhouse_add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
-target_link_libraries (names_and_types_fuzzer PRIVATE dbms)
+target_link_libraries (names_and_types_fuzzer PRIVATE clickhouse_functions)
diff --git a/src/Core/fuzzers/names_and_types_fuzzer.cpp b/src/Core/fuzzers/names_and_types_fuzzer.cpp
index 74debedf2a3..6fdd8703014 100644
--- a/src/Core/fuzzers/names_and_types_fuzzer.cpp
+++ b/src/Core/fuzzers/names_and_types_fuzzer.cpp
@@ -1,29 +1,7 @@
-#include <Core/ColumnsWithTypeAndName.h>
-#include <Functions/CastOverloadResolver.h>
 #include <Core/NamesAndTypes.h>
 #include <IO/ReadBufferFromMemory.h>
 
 
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-class IFunctionBase;
-using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
-
-FunctionBasePtr createFunctionBaseCast(
-    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for names_and_types_fuzzer");
-}
-
-}
-
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/DataTypes/fuzzers/CMakeLists.txt b/src/DataTypes/fuzzers/CMakeLists.txt
index 939bf5f5e3f..e54ef0a860c 100644
--- a/src/DataTypes/fuzzers/CMakeLists.txt
+++ b/src/DataTypes/fuzzers/CMakeLists.txt
@@ -1,2 +1,2 @@
 clickhouse_add_executable(data_type_deserialization_fuzzer data_type_deserialization_fuzzer.cpp ${SRCS})
-target_link_libraries(data_type_deserialization_fuzzer PRIVATE dbms clickhouse_aggregate_functions)
+target_link_libraries(data_type_deserialization_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)
diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
index 7d9a0513d18..0ae325871fb 100644
--- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
+++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp
@@ -8,33 +8,11 @@
 #include <Common/MemoryTracker.h>
 #include <Common/CurrentThread.h>
 
-#include <Functions/CastOverloadResolver.h>
-
 #include <Interpreters/Context.h>
 
 #include <AggregateFunctions/registerAggregateFunctions.h>
 
 
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-class IFunctionBase;
-using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
-
-FunctionBasePtr createFunctionBaseCast(
-    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for data_type_deserialization_fuzzer");
-}
-
-}
-
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/Formats/fuzzers/CMakeLists.txt b/src/Formats/fuzzers/CMakeLists.txt
index 38009aeec1d..b8a7e78b6e2 100644
--- a/src/Formats/fuzzers/CMakeLists.txt
+++ b/src/Formats/fuzzers/CMakeLists.txt
@@ -1,2 +1,2 @@
 clickhouse_add_executable(format_fuzzer format_fuzzer.cpp ${SRCS})
-target_link_libraries(format_fuzzer PRIVATE dbms clickhouse_aggregate_functions)
+target_link_libraries(format_fuzzer PRIVATE clickhouse_functions clickhouse_aggregate_functions)
diff --git a/src/Formats/fuzzers/format_fuzzer.cpp b/src/Formats/fuzzers/format_fuzzer.cpp
index 2c1ec65e54d..27f7d7b292f 100644
--- a/src/Formats/fuzzers/format_fuzzer.cpp
+++ b/src/Formats/fuzzers/format_fuzzer.cpp
@@ -3,7 +3,6 @@
 #include <IO/ReadBufferFromMemory.h>
 #include <IO/ReadHelpers.h>
 
-#include <Formats/FormatFactory.h>
 #include <Formats/registerFormats.h>
 
 #include <QueryPipeline/Pipe.h>
@@ -21,26 +20,6 @@
 #include <AggregateFunctions/registerAggregateFunctions.h>
 
 
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-class IFunctionBase;
-using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
-
-FunctionBasePtr createFunctionBaseCast(
-    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for format_fuzzer");
-}
-
-}
-
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try
diff --git a/src/Storages/fuzzers/CMakeLists.txt b/src/Storages/fuzzers/CMakeLists.txt
index 719b9b77cd9..7bee2da2e26 100644
--- a/src/Storages/fuzzers/CMakeLists.txt
+++ b/src/Storages/fuzzers/CMakeLists.txt
@@ -4,4 +4,4 @@ clickhouse_add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.c
 target_link_libraries (mergetree_checksum_fuzzer PRIVATE dbms)
 
 clickhouse_add_executable (columns_description_fuzzer columns_description_fuzzer.cpp)
-target_link_libraries (columns_description_fuzzer PRIVATE dbms)
+target_link_libraries (columns_description_fuzzer PRIVATE clickhouse_functions)
diff --git a/src/Storages/fuzzers/columns_description_fuzzer.cpp b/src/Storages/fuzzers/columns_description_fuzzer.cpp
index ac285ea50f7..e10e0cc52f5 100644
--- a/src/Storages/fuzzers/columns_description_fuzzer.cpp
+++ b/src/Storages/fuzzers/columns_description_fuzzer.cpp
@@ -1,28 +1,7 @@
-#include <Functions/CastOverloadResolver.h>
 #include <Storages/ColumnsDescription.h>
 #include <iostream>
 
 
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-class IFunctionBase;
-using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
-
-FunctionBasePtr createFunctionBaseCast(
-    ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional<CastDiagnostic>, CastType)
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for columns_description_fuzzer");
-}
-
-}
-
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 {
     try

From c13371166c9a9545155c9274d36c3b91a1ade5cb Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Sun, 7 Jul 2024 22:25:15 +0000
Subject: [PATCH 078/265] add import re to tests

---
 tests/integration/test_storage_hdfs/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index da46756841d..9a166cba2ab 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -2,6 +2,7 @@ import os
 
 import pytest
 import time
+import re
 from helpers.cluster import ClickHouseCluster, is_arm
 from helpers.client import QueryRuntimeException
 from helpers.test_tools import TSV

From a3c4cbfce257f171e66e04598ac9eae548c3836f Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 8 Jul 2024 13:57:54 +0000
Subject: [PATCH 079/265] clang-tidy, fix cp with minio

---
 docker/test/stateless/setup_minio.sh          |  2 +-
 .../03203_hive_style_partitioning.sh          | 52 +++++++++----------
 utils/keeper-bench/Runner.cpp                 |  3 +-
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh
index 2b9433edd20..aacb9d88a45 100755
--- a/docker/test/stateless/setup_minio.sh
+++ b/docker/test/stateless/setup_minio.sh
@@ -101,7 +101,7 @@ upload_data() {
   # shellcheck disable=SC2045
   for file in $(ls "${data_path}"); do
     echo "${file}";
-    ./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
+    ./mc cp --recursive "${data_path}"/"${file}" clickminio/test/"${file}";
   done
 }
 
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 544fd17ffff..334bfef4f02 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -43,29 +43,29 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
 
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
 
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM url('http://localhost:11111/test/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
+SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
-SELECT *, _column0 FROM url('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
@@ -75,28 +75,28 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
 
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
 
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
 """
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
-SELECT *, _column0 FROM s3('http://localhost:11111/test/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index 587e015b340..f8a0e37d1a9 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -544,7 +544,8 @@ struct ZooKeeperRequestFromLogReader
         file_read_buf = DB::wrapReadBufferWithCompressionMethod(std::move(file_read_buf), compression_method);
 
         DB::SingleReadBufferIterator read_buffer_iterator(std::move(file_read_buf));
-        auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+        std::string sample_path;
+        auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
 
         DB::ColumnsWithTypeAndName columns;
         columns.reserve(columns_description.size());

From 3cc6a133c64861f4493849905950abb5cc1fbaac Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 9 Jul 2024 12:38:16 +0200
Subject: [PATCH 080/265] Update setup_minio.sh

---
 docker/test/stateless/setup_minio.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh
index aacb9d88a45..0e344cbb9c4 100755
--- a/docker/test/stateless/setup_minio.sh
+++ b/docker/test/stateless/setup_minio.sh
@@ -101,7 +101,7 @@ upload_data() {
   # shellcheck disable=SC2045
   for file in $(ls "${data_path}"); do
     echo "${file}";
-    ./mc cp --recursive "${data_path}"/"${file}" clickminio/test/"${file}";
+    ./mc cp --recursive "${data_path}"/ clickminio/test/;
   done
 }
 
@@ -148,4 +148,4 @@ main() {
   setup_aws_credentials
 }
 
-main "$@"
\ No newline at end of file
+main "$@"

From 362bf4befcd55de5f49e2665c7c7f9483a700dc8 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 9 Jul 2024 17:30:08 +0200
Subject: [PATCH 081/265] Update setup_minio.sh

---
 docker/test/stateless/setup_minio.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh
index 0e344cbb9c4..8bd75f16321 100755
--- a/docker/test/stateless/setup_minio.sh
+++ b/docker/test/stateless/setup_minio.sh
@@ -101,7 +101,7 @@ upload_data() {
   # shellcheck disable=SC2045
   for file in $(ls "${data_path}"); do
     echo "${file}";
-    ./mc cp --recursive "${data_path}"/ clickminio/test/;
+    ./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
   done
 }
 

From 1761102b3a071143b40039ee1e83666ebffa88fb Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 9 Jul 2024 17:31:41 +0200
Subject: [PATCH 082/265] fix path

---
 .../column1=Gordon/sample.parquet                   | Bin
 .../column1=Schmidt/sample.parquet                  | Bin
 .../sample.parquet                                  | Bin
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/data_minio/hive_partitioning/{coumn0=Elizabeth => column0=Elizabeth}/column1=Gordon/sample.parquet (100%)
 rename tests/queries/0_stateless/data_minio/hive_partitioning/{coumn0=Elizabeth => column0=Elizabeth}/column1=Schmidt/sample.parquet (100%)
 rename tests/queries/0_stateless/data_minio/hive_partitioning/{coumn0=Elizabeth => column0=Elizabeth}/sample.parquet (100%)

diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Gordon/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet
similarity index 100%
rename from tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Gordon/sample.parquet
rename to tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet
diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Schmidt/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet
similarity index 100%
rename from tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/column1=Schmidt/sample.parquet
rename to tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet
diff --git a/tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet
similarity index 100%
rename from tests/queries/0_stateless/data_minio/hive_partitioning/coumn0=Elizabeth/sample.parquet
rename to tests/queries/0_stateless/data_minio/hive_partitioning/column0=Elizabeth/sample.parquet

From 9db80a6e2d14c6341c7afc66aeaf6998c98f9f8a Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 9 Jul 2024 17:47:05 +0000
Subject: [PATCH 083/265] more testing with chunked

---
 programs/benchmark/Benchmark.cpp                | 4 ++--
 src/Client/ConnectionParameters.cpp             | 4 ++--
 src/Client/ConnectionParameters.h               | 4 ++--
 src/Dictionaries/ClickHouseDictionarySource.cpp | 8 ++++----
 src/Interpreters/Cluster.cpp                    | 4 ++--
 src/Interpreters/Cluster.h                      | 4 ++--
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index 251761e0bad..0a7faf5ec01 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -666,8 +666,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
 
         Strings hosts = options.count("host") ? options["host"].as<Strings>() : Strings({"localhost"});
 
-        String proto_send_chunked {"notchunked_optional"};
-        String proto_recv_chunked {"notchunked_optional"};
+        String proto_send_chunked {"chunked"};
+        String proto_recv_chunked {"chunked"};
 
         if (options.count("proto_caps"))
         {
diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index 4bca65083c4..50af589dba3 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -107,8 +107,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
         }
     }
 
-    proto_send_chunked = config.getString("proto_caps.send", "notchunked_optional");
-    proto_recv_chunked = config.getString("proto_caps.recv", "notchunked_optional");
+    proto_send_chunked = config.getString("proto_caps.send", "chunked");
+    proto_recv_chunked = config.getString("proto_caps.recv", "chunked");
 
     quota_key = config.getString("quota_key", "");
 
diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h
index 71057a2b543..ef4df17143e 100644
--- a/src/Client/ConnectionParameters.h
+++ b/src/Client/ConnectionParameters.h
@@ -20,8 +20,8 @@ struct ConnectionParameters
     std::string default_database;
     std::string user;
     std::string password;
-    std::string proto_send_chunked = "notchunked_optional";
-    std::string proto_recv_chunked = "notchunked_optional";
+    std::string proto_send_chunked = "chunked";
+    std::string proto_recv_chunked = "chunked";
     std::string quota_key;
     SSHKey ssh_private_key;
     std::string jwt;
diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp
index 3b096da92c6..14c6aac24f6 100644
--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@@ -236,8 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                 .host = host,
                 .user = named_collection->getAnyOrDefault<String>({"user", "username"}, "default"),
                 .password = named_collection->getOrDefault<String>("password", ""),
-                .proto_send_chunked = named_collection->getOrDefault<String>("proto_send_chunked", "notchunked_optional"),
-                .proto_recv_chunked = named_collection->getOrDefault<String>("proto_recv_chunked", "notchunked_optional"),
+                .proto_send_chunked = named_collection->getOrDefault<String>("proto_send_chunked", "chunked"),
+                .proto_recv_chunked = named_collection->getOrDefault<String>("proto_recv_chunked", "chunked"),
                 .quota_key = named_collection->getOrDefault<String>("quota_key", ""),
                 .db = named_collection->getAnyOrDefault<String>({"db", "database"}, default_database),
                 .table = named_collection->getOrDefault<String>("table", ""),
@@ -262,8 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                 .host = host,
                 .user = config.getString(settings_config_prefix + ".user", "default"),
                 .password = config.getString(settings_config_prefix + ".password", ""),
-                .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked_optional"),
-                .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked_optional"),
+                .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "chunked"),
+                .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "chunked"),
                 .quota_key = config.getString(settings_config_prefix + ".quota_key", ""),
                 .db = config.getString(settings_config_prefix + ".db", default_database),
                 .table = config.getString(settings_config_prefix + ".table", ""),
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 1d7ccd484d0..9b227fcc1fc 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -113,8 +113,8 @@ Cluster::Address::Address(
     secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
     priority = Priority{config.getInt(config_prefix + ".priority", 1)};
 
-    proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked_optional");
-    proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked_optional");
+    proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "chunked");
+    proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "chunked");
 
     const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
     auto default_port = config.getInt(port_type, 0);
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index f3146ac0134..009ef15df6c 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -114,8 +114,8 @@ public:
         UInt16 port{0};
         String user;
         String password;
-        String proto_send_chunked = "notchunked_optional";
-        String proto_recv_chunked = "notchunked_optional";
+        String proto_send_chunked = "chunked";
+        String proto_recv_chunked = "chunked";
         String quota_key;
 
         /// For inter-server authorization

From d7f08ffdb74b4fce89eff3133e36a5f50fc4ef0b Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 9 Jul 2024 21:01:37 +0200
Subject: [PATCH 084/265] Update setup_minio.sh

---
 docker/test/stateless/setup_minio.sh | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh
index 8bd75f16321..49837fdb1ac 100755
--- a/docker/test/stateless/setup_minio.sh
+++ b/docker/test/stateless/setup_minio.sh
@@ -99,10 +99,7 @@ upload_data() {
   # iterating over globs will cause redundant file variable to be
   # a path to a file, not a filename
   # shellcheck disable=SC2045
-  for file in $(ls "${data_path}"); do
-    echo "${file}";
-    ./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
-  done
+  ./mc cp --recursive "${data_path}"/ clickminio/test/
 }
 
 setup_aws_credentials() {

From 2794b7bf84faf91cfb92d4a8fb76bb3a8183de44 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 10 Jul 2024 00:20:11 +0000
Subject: [PATCH 085/265] defaults to notchunked, add docs to server's and
 client's configs

---
 programs/benchmark/Benchmark.cpp                |  4 ++--
 programs/client/clickhouse-client.xml           | 15 +++++++++++++++
 programs/server/config.xml                      | 15 +++++++++++++++
 src/Client/ConnectionParameters.cpp             |  4 ++--
 src/Client/ConnectionParameters.h               |  4 ++--
 src/Dictionaries/ClickHouseDictionarySource.cpp |  8 ++++----
 src/Interpreters/Cluster.cpp                    |  4 ++--
 src/Interpreters/Cluster.h                      |  4 ++--
 src/Server/TCPHandler.cpp                       |  8 ++++----
 9 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp
index 0a7faf5ec01..36f774a3c12 100644
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@@ -666,8 +666,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
 
         Strings hosts = options.count("host") ? options["host"].as<Strings>() : Strings({"localhost"});
 
-        String proto_send_chunked {"chunked"};
-        String proto_recv_chunked {"chunked"};
+        String proto_send_chunked {"notchunked"};
+        String proto_recv_chunked {"notchunked"};
 
         if (options.count("proto_caps"))
         {
diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index d0deb818c1e..376e64906e2 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -37,6 +37,21 @@
         <production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
     </prompt_by_server_display_name>
 
+    <!-- Chunked capabilities for native protocol by client.
+         Can be enabled separately for send and receive channels.
+         Supported modes:
+         - chunked - client will only work with server supporting chunked protocol;
+         - chunked_optional - client prefer server to enable chunked protocol, but can switch to notchunked if server does not support this;
+         - notchunked - client will only work with server supporting notchunked protocol (current default);
+         - notchunked_optional - client prefer server notchunked protocol, but can switch to chunked if server does not support this.
+     -->
+    <!--
+    <proto_caps>
+        <send>chunked_optional</send>
+        <recv>chunked_optional</recv>
+    </proto_caps>
+    -->
+
     <!--
         Settings adjustable via command-line parameters
         can take their defaults from that config file, see examples:
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 8c824125dac..80e981f0695 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -143,6 +143,21 @@
     -->
     <tcp_port>9000</tcp_port>
 
+    <!-- Chunked capabilities for native protocol by server.
+         Can be enabled separately for send and receive channels.
+         Supported modes:
+         - chunked - server requires from client to have chunked enabled;
+         - chunked_optional - server supports both chunked and notchunked protocol;
+         - notchunked - server requires from client notchunked protocol (current default);
+         - notchunked_optional - server supports both chunked and notchunked protocol.
+     -->
+    <!--
+    <proto_caps>
+        <send>notchunked_optional</send>
+        <recv>notchunked_optional</recv>
+    </proto_caps>
+    -->
+
     <!-- Compatibility with MySQL protocol.
          ClickHouse will pretend to be MySQL for applications connecting to this port.
     -->
diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp
index 50af589dba3..4d0a9ffa08c 100644
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@@ -107,8 +107,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
         }
     }
 
-    proto_send_chunked = config.getString("proto_caps.send", "chunked");
-    proto_recv_chunked = config.getString("proto_caps.recv", "chunked");
+    proto_send_chunked = config.getString("proto_caps.send", "notchunked");
+    proto_recv_chunked = config.getString("proto_caps.recv", "notchunked");
 
     quota_key = config.getString("quota_key", "");
 
diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h
index ef4df17143e..382bfe34a3d 100644
--- a/src/Client/ConnectionParameters.h
+++ b/src/Client/ConnectionParameters.h
@@ -20,8 +20,8 @@ struct ConnectionParameters
     std::string default_database;
     std::string user;
     std::string password;
-    std::string proto_send_chunked = "chunked";
-    std::string proto_recv_chunked = "chunked";
+    std::string proto_send_chunked = "notchunked";
+    std::string proto_recv_chunked = "notchunked";
     std::string quota_key;
     SSHKey ssh_private_key;
     std::string jwt;
diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp
index 14c6aac24f6..b36d53a6159 100644
--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@@ -236,8 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                 .host = host,
                 .user = named_collection->getAnyOrDefault<String>({"user", "username"}, "default"),
                 .password = named_collection->getOrDefault<String>("password", ""),
-                .proto_send_chunked = named_collection->getOrDefault<String>("proto_send_chunked", "chunked"),
-                .proto_recv_chunked = named_collection->getOrDefault<String>("proto_recv_chunked", "chunked"),
+                .proto_send_chunked = named_collection->getOrDefault<String>("proto_send_chunked", "notchunked"),
+                .proto_recv_chunked = named_collection->getOrDefault<String>("proto_recv_chunked", "notchunked"),
                 .quota_key = named_collection->getOrDefault<String>("quota_key", ""),
                 .db = named_collection->getAnyOrDefault<String>({"db", "database"}, default_database),
                 .table = named_collection->getOrDefault<String>("table", ""),
@@ -262,8 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                 .host = host,
                 .user = config.getString(settings_config_prefix + ".user", "default"),
                 .password = config.getString(settings_config_prefix + ".password", ""),
-                .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "chunked"),
-                .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "chunked"),
+                .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked"),
+                .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked"),
                 .quota_key = config.getString(settings_config_prefix + ".quota_key", ""),
                 .db = config.getString(settings_config_prefix + ".db", default_database),
                 .table = config.getString(settings_config_prefix + ".table", ""),
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 9b227fcc1fc..dd9e35834eb 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -113,8 +113,8 @@ Cluster::Address::Address(
     secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
     priority = Priority{config.getInt(config_prefix + ".priority", 1)};
 
-    proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "chunked");
-    proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "chunked");
+    proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked");
+    proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked");
 
     const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
     auto default_port = config.getInt(port_type, 0);
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index 009ef15df6c..c69d77668ab 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -114,8 +114,8 @@ public:
         UInt16 port{0};
         String user;
         String password;
-        String proto_send_chunked = "chunked";
-        String proto_recv_chunked = "chunked";
+        String proto_send_chunked = "notchunked";
+        String proto_recv_chunked = "notchunked";
         String quota_key;
 
         /// For inter-server authorization
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index 40fd3848455..9c5e5e9c572 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -310,8 +310,8 @@ void TCPHandler::runImpl()
                 return chunked_srv;
             };
 
-            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "chunked_optional"), proto_recv_chunked_cl, "send");
-            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "chunked_optional"), proto_send_chunked_cl, "recv");
+            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked"), proto_recv_chunked_cl, "send");
+            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked"), proto_send_chunked_cl, "recv");
 
             if (out_chunked)
                 out->enableChunked();
@@ -1660,8 +1660,8 @@ void TCPHandler::sendHello()
         writeVarUInt(VERSION_PATCH, *out);
     if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
     {
-        writeStringBinary(server.config().getString("proto_caps.send", "chunked"), *out);
-        writeStringBinary(server.config().getString("proto_caps.recv", "chunked"), *out);
+        writeStringBinary(server.config().getString("proto_caps.send", "notchunked"), *out);
+        writeStringBinary(server.config().getString("proto_caps.recv", "notchunked"), *out);
     }
     if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
     {

From 7b58722c07e8feb6acca5f0762411a55b8c58915 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 10 Jul 2024 13:15:44 +0200
Subject: [PATCH 086/265] Update setup_minio.sh

---
 docker/test/stateless/setup_minio.sh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docker/test/stateless/setup_minio.sh b/docker/test/stateless/setup_minio.sh
index 49837fdb1ac..02e3d117de2 100755
--- a/docker/test/stateless/setup_minio.sh
+++ b/docker/test/stateless/setup_minio.sh
@@ -99,7 +99,9 @@ upload_data() {
   # iterating over globs will cause redundant file variable to be
   # a path to a file, not a filename
   # shellcheck disable=SC2045
-  ./mc cp --recursive "${data_path}"/ clickminio/test/
+  if [ -d "${data_path}" ]; then
+    ./mc cp --recursive "${data_path}"/ clickminio/test/
+  fi
 }
 
 setup_aws_credentials() {

From a751719a33e2691426bdb057eaf509a74e84753d Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Wed, 10 Jul 2024 17:44:06 +0000
Subject: [PATCH 087/265] fixes due to review

---
 docs/en/operations/settings/settings.md       |   6 +++++
 docs/en/sql-reference/table-functions/file.md |  17 ++++++++++++
 docs/en/sql-reference/table-functions/hdfs.md |  17 ++++++++++++
 docs/en/sql-reference/table-functions/s3.md   |  17 ++++++++++++
 programs/obfuscator/Obfuscator.cpp            |   3 +--
 src/Formats/ReadSchemaUtils.cpp               |  11 ++------
 src/Formats/ReadSchemaUtils.h                 |   2 --
 src/Storages/Hive/StorageHive.cpp             |   2 +-
 .../DataLakes/IStorageDataLake.h              |   4 +--
 .../ObjectStorage/StorageObjectStorage.cpp    |  14 ++++++----
 .../StorageObjectStorageCluster.cpp           |   2 +-
 .../StorageObjectStorageSource.cpp            |  11 +++++---
 .../StorageObjectStorageSource.h              |   3 ++-
 .../StorageObjectStorageQueue.cpp             |   2 +-
 src/Storages/StorageFile.cpp                  |  20 +++++++-------
 src/Storages/StorageFileCluster.cpp           |   2 +-
 src/Storages/StorageURL.cpp                   |  23 +++++++++++-----
 src/Storages/StorageURLCluster.cpp            |   2 +-
 src/Storages/VirtualColumnUtils.cpp           |  25 +++++++++++-------
 src/Storages/VirtualColumnUtils.h             |   9 +++++--
 src/TableFunctions/TableFunctionFormat.cpp    |  10 +++----
 .../03203_hive_style_partitioning.reference   |   5 +++-
 .../03203_hive_style_partitioning.sh          |  11 +++++---
 .../array=[1,2,3]/float=42.42/sample.parquet  | Bin 0 -> 1308 bytes
 .../number=42/date=2020-01-01/sample.parquet  | Bin 0 -> 1308 bytes
 25 files changed, 152 insertions(+), 66 deletions(-)
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 1d74a63b972..e100e0f27f7 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -5591,3 +5591,9 @@ Default value: `10000000`.
 Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
 
 Default value: `1GiB`.
+
+## use_hive_partitioning
+
+Allows the usage of Hive-style partitioning in queries. When enabled, ClickHouse interprets and maintains table partitions in a way that is consistent with the Hive partitioning scheme, which is commonly used in Hadoop ecosystems.
+
+Default value: `0`.
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 3a3162dad9a..88af3663552 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -198,6 +198,23 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
 
+## Hive-style partitioning {#hive-style-patitioning}
+
+When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
+
+**Example**
+
+Use virtual column, created with Hive-style partitioning
+
+``` sql
+SET use_hive_patitioning = 1;
+SELECT _specified_column from file('/specified_column=specified_data/file.txt');
+```
+
+``` reference
+specified_data
+```
+
 ## Settings {#settings}
 
 - [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 28cba5ccc6a..beb1ad12532 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -99,6 +99,23 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
 
+## Hive-style partitioning {#hive-style-patitioning}
+
+When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
+
+**Example**
+
+Use virtual column, created with Hive-style partitioning
+
+``` sql
+SET use_hive_patitioning = 1;
+SELECT _specified_column from HDFS('hdfs://hdfs1:9000/specified_column=specified_data/file.txt');
+```
+
+``` reference
+specified_data
+```
+
 ## Storage Settings {#storage-settings}
 
 - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 35e5d86034c..45c4caa1a13 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -274,6 +274,23 @@ FROM s3(
 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. 
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
 
+## Hive-style partitioning {#hive-style-patitioning}
+
+When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
+
+**Example**
+
+Use virtual column, created with Hive-style partitioning
+
+``` sql
+SET use_hive_patitioning = 1;
+SELECT _specified_column from HDFS('hdfs://hdfs1:9000/specified_column=specified_data/file.txt');
+```
+
+``` reference
+specified_data
+```
+
 ## Storage Settings {#storage-settings}
 
 - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp
index 11e85bc1302..4c3981c1d01 100644
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@@ -1308,8 +1308,7 @@ try
 
         SingleReadBufferIterator read_buffer_iterator(std::move(file));
 
-        std::string sample_string;
-        schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, sample_string, context_const);
+        schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const);
     }
     else
     {
diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 1e70840f91f..1920459c378 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -94,7 +94,6 @@ std::pair<ColumnsDescription, String> readSchemaFromFormatImpl(
     std::optional<String> format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    std::string & sample_path,
     const ContextPtr & context)
 try
 {
@@ -144,10 +143,6 @@ try
             {
                 iterator_data = read_buffer_iterator.next();
 
-                /// Extracting the File path for hive-style partitioning
-                if (sample_path.empty())
-                    sample_path = read_buffer_iterator.getLastFilePath();
-
                 /// Read buffer iterator can determine the data format if it's unknown.
                 /// For example by scanning schema cache or by finding new file with format extension.
                 if (!format_name && iterator_data.format_name)
@@ -541,19 +536,17 @@ ColumnsDescription readSchemaFromFormat(
     const String & format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    std::string & sample_path,
     const ContextPtr & context)
 {
-    return readSchemaFromFormatImpl(format_name, format_settings, read_buffer_iterator, sample_path, context).first;
+    return readSchemaFromFormatImpl(format_name, format_settings, read_buffer_iterator, context).first;
 }
 
 std::pair<ColumnsDescription, String> detectFormatAndReadSchema(
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    std::string & sample_path,
     const ContextPtr & context)
 {
-    return readSchemaFromFormatImpl(std::nullopt, format_settings, read_buffer_iterator, sample_path, context);
+    return readSchemaFromFormatImpl(std::nullopt, format_settings, read_buffer_iterator, context);
 }
 
 SchemaCache::Key getKeyForSchemaCache(
diff --git a/src/Formats/ReadSchemaUtils.h b/src/Formats/ReadSchemaUtils.h
index 6c562a06bf0..7168e7f0817 100644
--- a/src/Formats/ReadSchemaUtils.h
+++ b/src/Formats/ReadSchemaUtils.h
@@ -122,7 +122,6 @@ ColumnsDescription readSchemaFromFormat(
     const String & format_name,
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    std::string & sample_path,
     const ContextPtr & context);
 
 /// Try to detect the format of the data and it's schema.
@@ -132,7 +131,6 @@ ColumnsDescription readSchemaFromFormat(
 std::pair<ColumnsDescription, String> detectFormatAndReadSchema(
     const std::optional<FormatSettings> & format_settings,
     IReadBufferIterator & read_buffer_iterator,
-    std::string & sample_path,
     const ContextPtr & context);
 
 SchemaCache::Key getKeyForSchemaCache(const String & source, const String & format, const std::optional<FormatSettings> & format_settings, const ContextPtr & context);
diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp
index 28d8128e052..255dadea387 100644
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@@ -445,7 +445,7 @@ StorageHive::StorageHive(
     storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
 
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext()));
 }
 
 void StorageHive::lazyInitialize()
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index a651be6017f..ec8e740b1c9 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -89,9 +89,9 @@ public:
         {
             ConfigurationPtr configuration = base_configuration->clone();
             configuration->setPaths(metadata->getDataFiles());
-            std::string sample_string;
+            std::string sample_path;
             return Storage::resolveSchemaFromData(
-                object_storage_, configuration, format_settings_, sample_string, local_context);
+                object_storage_, configuration, format_settings_, sample_path, local_context);
         }
     }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 8ab8b6b6881..48e9118e321 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -43,7 +43,8 @@ std::string StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata
         {}, // predicate
         metadata.getColumns().getAll(), // virtual_columns
         nullptr, // read_keys
-        {} // file_progress_callback
+        {}, // file_progress_callback
+        true // override_settings_for_hive_partitioning
     );
 
     if (auto file = file_iterator->next(0))
@@ -86,7 +87,7 @@ StorageObjectStorage::StorageObjectStorage(
     else if (!context->getSettings().use_hive_partitioning)
         sample_path = "";
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), sample_path));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path));
     setInMemoryMetadata(metadata);
 }
 
@@ -396,7 +397,8 @@ ColumnsDescription StorageObjectStorage::resolveSchemaFromData(
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
-    return readSchemaFromFormat(configuration->format, format_settings, *iterator, sample_path, context);
+    sample_path = iterator->getLastFilePath();
+    return readSchemaFromFormat(configuration->format, format_settings, *iterator, context);
 }
 
 std::string StorageObjectStorage::resolveFormatFromData(
@@ -408,7 +410,8 @@ std::string StorageObjectStorage::resolveFormatFromData(
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
-    return detectFormatAndReadSchema(format_settings, *iterator, sample_path, context).second;
+    sample_path = iterator->getLastFilePath();
+    return detectFormatAndReadSchema(format_settings, *iterator, context).second;
 }
 
 std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAndFormatFromData(
@@ -420,7 +423,8 @@ std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAn
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
-    auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, sample_path, context);
+    sample_path = iterator->getLastFilePath();
+    auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context);
     configuration->format = format;
     return std::pair(columns, format);
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 0dc4b845a47..92327b4cde0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -41,7 +41,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     metadata.setColumns(columns);
     metadata.setConstraints(constraints_);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 88ae0a2319c..e5c9318de5d 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -105,7 +105,8 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
     ObjectInfos * read_keys,
-    std::function<void(FileProgress)> file_progress_callback)
+    std::function<void(FileProgress)> file_progress_callback,
+    bool override_settings_for_hive_partitioning)
 {
     if (distributed_processing)
         return std::make_shared<ReadTaskIterator>(
@@ -122,11 +123,14 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
     std::unique_ptr<IIterator> iterator;
     if (configuration->isPathWithGlobs())
     {
+        bool throw_on_zero_files_match = settings.throw_on_zero_files_match;
+        if (override_settings_for_hive_partitioning)
+            throw_on_zero_files_match = false;
         /// Iterate through disclosed globs and make a source for each file
         iterator = std::make_unique<GlobIterator>(
             object_storage, configuration, predicate, virtual_columns,
             local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size,
-            settings.throw_on_zero_files_match, file_progress_callback);
+            throw_on_zero_files_match, file_progress_callback);
     }
     else
     {
@@ -204,7 +208,8 @@ Chunk StorageObjectStorageSource::generate()
                   .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes,
                   .filename = &filename,
                   .last_modified = object_info->metadata->last_modified,
-                }, object_info->getPath());
+                  .hive_partitioning_path = object_info->getPath(),
+                });
 
             const auto & partition_columns = configuration->getPartitionColumns();
             if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 271b38fa75c..a99bb068372 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -58,7 +58,8 @@ public:
         const ActionsDAG::Node * predicate,
         const NamesAndTypesList & virtual_columns,
         ObjectInfos * read_keys,
-        std::function<void(FileProgress)> file_progress_callback = {});
+        std::function<void(FileProgress)> file_progress_callback = {},
+        bool override_settings_for_hive_partitioning = false);
 
     static std::string getUniqueStoragePathIdentifier(
         const Configuration & configuration,
diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
index 98f8cdc7e7a..a9239e3ad06 100644
--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@@ -168,7 +168,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
     storage_metadata.setColumns(columns);
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_));
     setInMemoryMetadata(storage_metadata);
 
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 9751d596fff..e6b9137444e 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -52,6 +52,7 @@
 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
 #include <Common/re2.h>
+#include "Formats/FormatSettings.h"
 #include <Formats/SchemaInferenceUtils.h>
 
 #include <QueryPipeline/Pipe.h>
@@ -880,11 +881,10 @@ std::pair<ColumnsDescription, String> StorageFile::getTableStructureAndFormatFro
     auto read_buffer_iterator = SingleReadBufferIterator(std::move(read_buf));
 
     ColumnsDescription columns;
-    std::string sample_path;
     if (format)
-        columns = readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context);
+        columns = readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context);
     else
-        std::tie(columns, format) = detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
+        std::tie(columns, format) = detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
 
     peekable_read_buffer_from_fd = read_buffer_iterator.releaseBuffer();
     if (peekable_read_buffer_from_fd)
@@ -929,21 +929,20 @@ std::pair<ColumnsDescription, String> StorageFile::getTableStructureAndFormatFro
 
     }
 
-    std::string sample_path;
     if (archive_info)
     {
         ReadBufferFromArchiveIterator read_buffer_iterator(*archive_info, format, format_settings, context);
         if (format)
-            return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context), *format};
+            return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
 
-        return detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
+        return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
     }
 
     ReadBufferFromFileIterator read_buffer_iterator(paths, format, compression_method, format_settings, context);
     if (format)
-        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context), *format};
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
 
-    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
 }
 
 ColumnsDescription StorageFile::getTableStructureFromFile(
@@ -1102,7 +1101,7 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     std::string path_for_virtuals;
     if (args.getContext()->getSettingsRef().use_hive_partitioning && !paths.empty())
         path_for_virtuals = paths[0];
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), path_for_virtuals, format_settings.value_or(FormatSettings{})));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), path_for_virtuals, format_settings.value_or(FormatSettings{})));
 }
 
 
@@ -1456,7 +1455,8 @@ Chunk StorageFileSource::generate()
                     .size = current_file_size,
                     .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
                     .last_modified = current_file_last_modified,
-                }, hive_partitioning_path);
+                    .hive_partitioning_path = hive_partitioning_path,
+                });
 
             return chunk;
         }
diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp
index d43e242f70c..f7684182e79 100644
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@@ -61,7 +61,7 @@ StorageFileCluster::StorageFileCluster(
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context));
 }
 
 void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 59c5465a381..5da42638b87 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -99,6 +99,17 @@ static ConnectionTimeouts getHTTPTimeouts(ContextPtr context)
     return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout);
 }
 
+String getSampleURI(String uri, ContextPtr context)
+{
+    if (urlWithGlobs(uri))
+    {
+        auto uris = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements);
+        if (!uris.empty())
+            return uris[0];
+    }
+    return uri;
+}
+
 IStorageURLBase::IStorageURLBase(
     const String & uri_,
     const ContextPtr & context_,
@@ -155,8 +166,8 @@ IStorageURLBase::IStorageURLBase(
 
     std::string uri_for_partitioning;
     if (context_->getSettingsRef().use_hive_partitioning)
-        uri_for_partitioning = uri;
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), uri_for_partitioning, format_settings.value_or(FormatSettings{})));
+        uri_for_partitioning = getSampleURI(uri, context_);
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, uri_for_partitioning, format_settings.value_or(FormatSettings{})));
 }
 
 
@@ -425,7 +436,8 @@ Chunk StorageURLSource::generate()
                 {
                     .path = curr_uri.getPath(),
                     .size = current_file_size,
-                }, hive_partitioning_path);
+                    .hive_partitioning_path = hive_partitioning_path,
+                });
             return chunk;
         }
 
@@ -959,10 +971,9 @@ std::pair<ColumnsDescription, String> IStorageURLBase::getTableStructureAndForma
         urls_to_check = {uri};
 
     ReadBufferIterator read_buffer_iterator(urls_to_check, format, compression_method, headers, format_settings, context);
-    std::string sample_path;
     if (format)
-        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, sample_path, context), *format};
-    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
+        return {readSchemaFromFormat(*format, format_settings, read_buffer_iterator, context), *format};
+    return detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
 }
 
 ColumnsDescription IStorageURLBase::getTableStructureFromData(
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index 2e7c63d0097..592bd71f546 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -75,7 +75,7 @@ StorageURLCluster::StorageURLCluster(
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context));
 }
 
 void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 379b14d8e51..b7669c65992 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -39,10 +39,13 @@
 #include <Common/re2.h>
 #include <Common/typeid_cast.h>
 #include <Formats/SchemaInferenceUtils.h>
+#include <Formats/EscapingRuleUtils.h>
+#include <Formats/FormatFactory.h>
 #include "Functions/FunctionsLogical.h"
 #include "Functions/IFunction.h"
 #include "Functions/IFunctionAdaptors.h"
 #include "Functions/indexHint.h"
+#include <Interpreters/convertFieldToType.h>
 #include <Parsers/makeASTForLogicalFunction.h>
 #include <Columns/ColumnSet.h>
 #include <Functions/FunctionHelpers.h>
@@ -116,7 +119,7 @@ NameSet getVirtualNamesForFileLikeStorage()
     return {"_path", "_file", "_size", "_time"};
 }
 
-std::map<std::string, std::string> parseFromPath(const std::string& path)
+std::map<std::string, std::string> parseHivePartitioningKeysAndValues(const std::string& path)
 {
     std::string pattern = "/([^/]+)=([^/]+)";
     re2::StringPiece input_piece(path);
@@ -128,7 +131,7 @@ std::map<std::string, std::string> parseFromPath(const std::string& path)
     return key_values;
 }
 
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, std::string path, FormatSettings settings)
+VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, std::string path, std::optional<FormatSettings> format_settings_)
 {
     VirtualColumnsDescription desc;
 
@@ -145,13 +148,17 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
 
-    auto map = parseFromPath(path);
-    for (const auto& item : map)
+    auto map = parseHivePartitioningKeysAndValues(path);
+    for (auto& item : map)
     {
-        auto type = tryInferDataTypeForSingleField(item.second, settings);
+        auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
+        auto type = tryInferDataTypeByEscapingRule(item.second, format_settings, FormatSettings::EscapingRule::Raw);
         if (type == nullptr)
             type = std::make_shared<DataTypeString>();
-        add_virtual(item.first, std::make_shared<DataTypeLowCardinality>(type));
+        if (type->canBeInsideLowCardinality())
+            add_virtual(item.first, std::make_shared<DataTypeLowCardinality>(type));
+        else
+            add_virtual(item.first, type);
     }
 
     return desc;
@@ -215,9 +222,9 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, const std::string & hive_partitioning_path)
+    VirtualsForFileLikeStorage virtual_values)
 {
-    auto hive_map = parseFromPath(hive_partitioning_path);
+    auto hive_map = parseHivePartitioningKeysAndValues(virtual_values.hive_partitioning_path);
     for (const auto & virtual_column : requested_virtual_columns)
     {
         if (virtual_column.name == "_path")
@@ -265,7 +272,7 @@ void addRequestedFileLikeStorageVirtualsToChunk(
             auto it = hive_map.find(virtual_column.getNameInStorage());
             if (it != hive_map.end())
             {
-                chunk.addColumn(virtual_column.getTypeInStorage()->createColumnConst(chunk.getNumRows(), it->second)->convertToFullColumnIfConst());
+                chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), convertFieldToType(Field(it->second), *virtual_column.type))->convertToFullColumnIfConst());
                 hive_map.erase(it);
             }
         }
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 72922be60bd..594253a32c1 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -50,7 +50,11 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
 }
 
 NameSet getVirtualNamesForFileLikeStorage();
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, std::string path = "", FormatSettings settings = FormatSettings());
+VirtualColumnsDescription getVirtualsForFileLikeStorage(
+    const ColumnsDescription & storage_columns,
+    const ContextPtr & context,
+    std::string sample_path = "",
+    std::optional<FormatSettings> format_settings_ = std::nullopt);
 
 ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
 
@@ -77,13 +81,14 @@ struct VirtualsForFileLikeStorage
     std::optional<size_t> size { std::nullopt };
     const String * filename { nullptr };
     std::optional<Poco::Timestamp> last_modified { std::nullopt };
+    const String & hive_partitioning_path = "";
 };
 
 std::map<std::string, std::string> parseFromPath(const std::string& path);
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, const std::string & hive_partitioning_path = "");
+    VirtualsForFileLikeStorage virtual_values);
 }
 
 }
diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp
index 66152cb0c91..ad2a142a140 100644
--- a/src/TableFunctions/TableFunctionFormat.cpp
+++ b/src/TableFunctions/TableFunctionFormat.cpp
@@ -85,10 +85,9 @@ ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr conte
     if (structure == "auto")
     {
         SingleReadBufferIterator read_buffer_iterator(std::make_unique<ReadBufferFromString>(data));
-        std::string sample_path;
         if (format == "auto")
-            return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, sample_path, context).first;
-        return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, sample_path, context);
+            return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context).first;
+        return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context);
     }
     return parseColumnsListFromString(structure, context);
 }
@@ -132,12 +131,11 @@ StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, Con
     String format_name = format;
     if (structure == "auto")
     {
-        std::string sample_path;
         SingleReadBufferIterator read_buffer_iterator(std::make_unique<ReadBufferFromString>(data));
         if (format_name == "auto")
-            std::tie(columns, format_name) = detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, sample_path, context);
+            std::tie(columns, format_name) = detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, context);
         else
-            columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, sample_path, context);
+            columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, context);
     }
     else
     {
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index 6ef1fcdf652..e0f46caf1c8 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -60,7 +60,10 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+42      2020-01-01
+[1,2,3] 42.42
+Array(Int64)    LowCardinality(Float64)
+101
 1
 TESTING THE S3 PARTITIONING
 first	 last	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 334bfef4f02..9d805b39b8a 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -28,7 +28,13 @@ SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=E
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
 SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+
+SELECT _number, _date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
+SELECT _array, _float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
+"""
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
@@ -59,8 +65,7 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/colum
 SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 
-SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;"""
+SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
 
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
diff --git a/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001


From 1f33eb32b0c80b9dde27a8d7aa9ad26c271aceae Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Thu, 11 Jul 2024 03:02:15 +0000
Subject: [PATCH 088/265] try to drop projection correctly

---
 src/Core/Settings.h                           |  2 +-
 src/Core/SettingsChangesHistory.cpp           |  2 +-
 src/Core/SettingsEnums.cpp                    |  3 +-
 src/Core/SettingsEnums.h                      |  3 +-
 src/Interpreters/InterpreterDeleteQuery.cpp   | 61 +------------------
 src/Interpreters/MutationsInterpreter.cpp     |  6 +-
 src/Storages/MergeTree/MutateTask.cpp         |  7 ++-
 ...61_lightweight_delete_projection.reference |  5 --
 .../03161_lightweight_delete_projection.sql   | 33 +---------
 9 files changed, 18 insertions(+), 104 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c884f8f80c4..f7b44ea775c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -588,7 +588,7 @@ class IColumn;
     M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
     M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
     M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \
-    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection of this table then do lightweight delete, or do lightweight delete then rebuild projections.", 0) \
+    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \
     M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
     M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
     M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 5174cf82c2e..194292a467e 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -70,7 +70,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"collect_hash_table_stats_during_joins", false, true, "New setting."},
               {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
               {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
-              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
+              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts."},
               {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
               {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
               {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 6c000d83254..82e7d6db410 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -175,8 +175,7 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU
 
 IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS,
     {{"throw", LightweightMutationProjectionMode::THROW},
-     {"drop", LightweightMutationProjectionMode::DROP},
-     {"rebuild", LightweightMutationProjectionMode::REBUILD}})
+     {"drop", LightweightMutationProjectionMode::DROP}})
 
 IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS)
 
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index e0572df1d6a..3611dfa72be 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -342,8 +342,7 @@ DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType)
 enum class LightweightMutationProjectionMode : uint8_t
 {
     THROW,
-    DROP,
-    REBUILD,
+    DROP
 };
 
 DECLARE_SETTING_ENUM(LightweightMutationProjectionMode)
diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp
index 7af539082f5..0f081c522dd 100644
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@@ -118,35 +118,6 @@ BlockIO InterpreterDeleteQuery::execute()
             auto context = Context::createCopy(getContext());
             auto mode = context->getSettingsRef().lightweight_mutation_projection_mode;
 
-            auto dropOrClearProjections = [&](bool isDrop)
-            {
-                std::vector<String> all_projections = metadata_snapshot->projections.getAllRegisteredNames();
-
-                /// Drop projections first so that lightweight delete can be performed.
-                for (const auto & projection : all_projections)
-                {
-                    String alter_query =
-                        "ALTER TABLE " + table->getStorageID().getFullTableName()
-                        + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
-                        + (isDrop ? " DROP" : " CLEAR") +" PROJECTION " + projection;
-
-                    ParserAlterQuery parser;
-                    ASTPtr alter_ast = parseQuery(
-                        parser,
-                        alter_query.data(),
-                        alter_query.data() + alter_query.size(),
-                        "ALTER query",
-                        0,
-                        DBMS_DEFAULT_MAX_PARSER_DEPTH,
-                        DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
-
-                    InterpreterAlterQuery alter_interpreter(alter_ast, context);
-                    alter_interpreter.execute();
-                }
-
-                return all_projections;
-            };
-
             if (mode == LightweightMutationProjectionMode::THROW)
             {
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED,
@@ -156,43 +127,13 @@ BlockIO InterpreterDeleteQuery::execute()
             }
             else if (mode == LightweightMutationProjectionMode::DROP)
             {
-                dropOrClearProjections(true);
-            }
-            else if (mode == LightweightMutationProjectionMode::REBUILD)
-            {
-                std::vector<String> all_projections{dropOrClearProjections(false)};
-                BlockIO res = lightweightDelete();
-
-                for (const auto & projection : all_projections)
-                {
-                    String alter_query =
-                        "ALTER TABLE " + table->getStorageID().getFullTableName()
-                        + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
-                        + " MATERIALIZE PROJECTION " + projection;
-
-                    ParserAlterQuery parser;
-                    ASTPtr alter_ast = parseQuery(
-                        parser,
-                        alter_query.data(),
-                        alter_query.data() + alter_query.size(),
-                        "ALTER query",
-                        0,
-                        DBMS_DEFAULT_MAX_PARSER_DEPTH,
-                        DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
-
-                    InterpreterAlterQuery alter_interpreter(alter_ast, context);
-                    alter_interpreter.execute();
-                }
-
-                return res;
+                return lightweightDelete();
             }
             else
             {
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
                     "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed.");
             }
-
-            return lightweightDelete();
         }
 
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 6d3a4f30b34..ace285bcfc9 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -487,7 +487,11 @@ static void validateUpdateColumns(
             if (column_name == RowExistsColumn::name)
             {
                 if (!source.supportsLightweightDelete())
-                    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
+                {
+                    // if (!source.getStorage()->isMergeTree() 
+                    //     || context->getSettingsRef().lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW)
+                    //     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
+                }
             }
             else if (virtual_columns.tryGet(column_name))
             {
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index a552ee89aee..8ca987eb1f8 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1042,6 +1042,8 @@ struct MutationContext
 
     /// Whether we need to count lightweight delete rows in this mutation
     bool count_lightweight_deleted_rows;
+
+    bool lightweight_mutation_mode;
 };
 
 using MutationContextPtr = std::shared_ptr<MutationContext>;
@@ -1571,7 +1573,7 @@ private:
             }
             else
             {
-                if (ctx->source_part->checksums.has(projection.getDirectoryName()))
+                if (!ctx->lightweight_mutation_mode && ctx->source_part->checksums.has(projection.getDirectoryName()))
                     entries_to_hardlink.insert(projection.getDirectoryName());
             }
         }
@@ -2255,7 +2257,8 @@ bool MutateTask::prepare()
     if (ctx->mutating_pipeline_builder.initialized())
         ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies());
 
-    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->updated_header.has(RowExistsColumn::name))
+    ctx->lightweight_mutation_mode = ctx->updated_header.has(RowExistsColumn::name);
+    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_mutation_mode)
     {
         /// This mutation contains lightweight delete and we need to count the deleted rows,
         /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index 307d3cb53fc..e69de29bb2d 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -1,5 +0,0 @@
-1231	John	33
-8888	Alice	50
-6666	Ksenia	48
-8888	Alice	50
-p	users	3
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index fb32646b46a..4e674fa0cfd 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -10,14 +10,12 @@ CREATE TABLE users (
 ) ENGINE = MergeTree order by uid;
 
 INSERT INTO users VALUES (1231, 'John', 33);
-INSERT INTO users VALUES (6666, 'Ksenia', 48);
-INSERT INTO users VALUES (8888, 'Alice', 50);
 
 DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED }
 
-DELETE FROM users WHERE uid = 8888 SETTINGS lightweight_mutation_projection_mode = 'throw';  -- { serverError NOT_IMPLEMENTED }
+DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw';  -- { serverError NOT_IMPLEMENTED }
 
-DELETE FROM users WHERE uid = 6666 SETTINGS lightweight_mutation_projection_mode = 'drop';
+DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop';
 
 SYSTEM FLUSH LOGS;
 
@@ -26,33 +24,8 @@ SELECT
     name,
     `table`
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users');
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
 SELECT * FROM users ORDER BY uid;
 
-DROP TABLE users;
-
-CREATE TABLE users (
-    uid Int16,
-    name String,
-    age Int16,
-    projection p (select * order by age)
-) ENGINE = MergeTree order by uid;
-
-INSERT INTO users VALUES (1231, 'John', 33), (6666, 'Ksenia', 48), (8888, 'Alice', 50);
-
-DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'rebuild';
-
-SELECT * FROM users ORDER BY uid;
-
-SYSTEM FLUSH LOGS;
-
--- expecting projection p with 3 rows is active
-SELECT
-    name,
-    `table`,
-    rows,
-FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND active = 1;
-
 DROP TABLE users;
\ No newline at end of file

From fa65e374dcb66c2e927f52ea521e9a8586feef65 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 11 Jul 2024 11:57:33 +0200
Subject: [PATCH 089/265] fix docs

---
 docs/en/sql-reference/table-functions/file.md | 4 ++--
 docs/en/sql-reference/table-functions/hdfs.md | 4 ++--
 docs/en/sql-reference/table-functions/s3.md   | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index d21f523ab8e..838a7ab61de 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -206,7 +206,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
 
-## Hive-style partitioning {#hive-style-patitioning}
+## Hive-style partitioning {#hive-style-partitioning}
 
 When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
 
@@ -215,7 +215,7 @@ When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtu
 Use virtual column, created with Hive-style partitioning
 
 ``` sql
-SET use_hive_patitioning = 1;
+SET use_hive_partitioning = 1;
 SELECT _specified_column from file('/specified_column=specified_data/file.txt');
 ```
 
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index beb1ad12532..fc84c431066 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -99,7 +99,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
 
-## Hive-style partitioning {#hive-style-patitioning}
+## Hive-style partitioning {#hive-style-partitioning}
 
 When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
 
@@ -108,7 +108,7 @@ When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtu
 Use virtual column, created with Hive-style partitioning
 
 ``` sql
-SET use_hive_patitioning = 1;
+SET use_hive_partitioning = 1;
 SELECT _specified_column from HDFS('hdfs://hdfs1:9000/specified_column=specified_data/file.txt');
 ```
 
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 45c4caa1a13..15074a77475 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -274,7 +274,7 @@ FROM s3(
 - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive. 
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
 
-## Hive-style partitioning {#hive-style-patitioning}
+## Hive-style partitioning {#hive-style-partitioning}
 
 When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
 
@@ -283,7 +283,7 @@ When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtu
 Use virtual column, created with Hive-style partitioning
 
 ``` sql
-SET use_hive_patitioning = 1;
+SET use_hive_partitioning = 1;
 SELECT _specified_column from HDFS('hdfs://hdfs1:9000/specified_column=specified_data/file.txt');
 ```
 

From eb085ea585d10f077d1ce66ee3f663ca016d24e8 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Thu, 11 Jul 2024 13:06:29 +0000
Subject: [PATCH 090/265] fix

---
 src/Interpreters/MutationsInterpreter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index ace285bcfc9..c2341463041 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -488,7 +488,7 @@ static void validateUpdateColumns(
             {
                 if (!source.supportsLightweightDelete())
                 {
-                    // if (!source.getStorage()->isMergeTree() 
+                    // if (!source.getStorage()->isMergeTree()
                     //     || context->getSettingsRef().lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW)
                     //     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
                 }

From df104abcc60366df3946a23c52c2e67a92dcb545 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 11 Jul 2024 17:20:19 +0200
Subject: [PATCH 091/265] try to fix tests

---
 .../03203_hive_style_partitioning.reference    |  8 ++++----
 .../03203_hive_style_partitioning.sh           | 18 +++++++++---------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index e0f46caf1c8..0e6b6052946 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -29,6 +29,10 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
+42      2020-01-01
+[1,2,3] 42.42
+Array(Int64)    LowCardinality(Float64)
+101
 1
 TESTING THE URL PARTITIONING
 first	 last	Elizabeth
@@ -60,10 +64,6 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-42      2020-01-01
-[1,2,3] 42.42
-Array(Int64)    LowCardinality(Float64)
-101
 1
 TESTING THE S3 PARTITIONING
 first	 last	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 9d805b39b8a..e74f24bfd80 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -5,10 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
+$CLICKHOUSE_CLIENT -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
 
 
-$CLICKHOUSE_LOCAL -n -q """
+$CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
@@ -36,17 +36,17 @@ SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/parti
 SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
 """
 
-$CLICKHOUSE_LOCAL -n -q """
+$CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
-$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
+$CLICKHOUSE_CLIENT -q "SELECT 'TESTING THE URL PARTITIONING'"
 
 
-$CLICKHOUSE_LOCAL -n -q """
+$CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
@@ -67,17 +67,17 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/colum
 
 SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
 
-$CLICKHOUSE_LOCAL -n -q """
+$CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
-$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
+$CLICKHOUSE_CLIENT -q "SELECT 'TESTING THE S3 PARTITIONING'"
 
 
-$CLICKHOUSE_LOCAL -n -q """
+$CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
@@ -100,7 +100,7 @@ SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partiti
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
 """
 
-$CLICKHOUSE_LOCAL -n -q """
+$CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;

From 9257c4aac299836dc3b1e215c8fd8ba9b190d3b4 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Thu, 11 Jul 2024 15:31:51 +0000
Subject: [PATCH 092/265] change support lightweight delete condition

---
 src/Interpreters/InterpreterDeleteQuery.cpp   | 5 +++--
 src/Interpreters/MutationsInterpreter.cpp     | 6 +-----
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 +---
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp
index 0f081c522dd..a7d0264f0b0 100644
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@@ -60,6 +60,7 @@ BlockIO InterpreterDeleteQuery::execute()
 
     auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
+    bool hasProjection = table->hasProjection();
 
     auto lightweightDelete = [&]()
     {
@@ -107,13 +108,13 @@ BlockIO InterpreterDeleteQuery::execute()
         table->mutate(mutation_commands, getContext());
         return {};
     }
-    else if (table->supportsLightweightDelete())
+    else if (!hasProjection && table->supportsLightweightDelete())
     {
         return lightweightDelete();
     }
     else
     {
-        if (table->hasProjection())
+        if (hasProjection)
         {
             auto context = Context::createCopy(getContext());
             auto mode = context->getSettingsRef().lightweight_mutation_projection_mode;
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index c2341463041..6d3a4f30b34 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -487,11 +487,7 @@ static void validateUpdateColumns(
             if (column_name == RowExistsColumn::name)
             {
                 if (!source.supportsLightweightDelete())
-                {
-                    // if (!source.getStorage()->isMergeTree()
-                    //     || context->getSettingsRef().lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW)
-                    //     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
-                }
+                    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
             }
             else if (virtual_columns.tryGet(column_name))
             {
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index c2e0e778220..0ef8bcfc681 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1641,11 +1641,9 @@ void IMergeTreeDataPart::loadColumns(bool require)
 }
 
 
-/// Project part / part with project parts / compact part doesn't support LWD.
 bool IMergeTreeDataPart::supportLightweightDeleteMutate() const
 {
-    return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact) &&
-        parent_part == nullptr && projection_parts.empty();
+    return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact);
 }
 
 bool IMergeTreeDataPart::hasLightweightDelete() const

From 4f11dbc7f372d46769da4ab3af6db83b7967faa0 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Thu, 11 Jul 2024 18:25:33 +0000
Subject: [PATCH 093/265] fix with wide part

---
 src/Storages/MergeTree/MutateTask.cpp         | 11 +++---
 .../03161_lightweight_delete_projection.sql   | 36 ++++++++++++++++++-
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 8ca987eb1f8..57784067720 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1043,7 +1043,7 @@ struct MutationContext
     /// Whether we need to count lightweight delete rows in this mutation
     bool count_lightweight_deleted_rows;
 
-    bool lightweight_mutation_mode;
+    bool lightweight_delete_mode;
 };
 
 using MutationContextPtr = std::shared_ptr<MutationContext>;
@@ -1573,7 +1573,7 @@ private:
             }
             else
             {
-                if (!ctx->lightweight_mutation_mode && ctx->source_part->checksums.has(projection.getDirectoryName()))
+                if (!ctx->lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName()))
                     entries_to_hardlink.insert(projection.getDirectoryName());
             }
         }
@@ -1843,7 +1843,8 @@ private:
                     hardlinked_files.insert(it->name());
                 }
             }
-            else if (!endsWith(it->name(), ".tmp_proj")) // ignore projection tmp merge dir
+            /// Ignore projection tmp merge dir, and under lightweight delete mode ignore projection files.
+            else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode) 
             {
                 // it's a projection part directory
                 ctx->new_data_part->getDataPartStorage().createProjection(destination);
@@ -2257,8 +2258,8 @@ bool MutateTask::prepare()
     if (ctx->mutating_pipeline_builder.initialized())
         ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies());
 
-    ctx->lightweight_mutation_mode = ctx->updated_header.has(RowExistsColumn::name);
-    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_mutation_mode)
+    ctx->lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
+    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_delete_mode)
     {
         /// This mutation contains lightweight delete and we need to count the deleted rows,
         /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 4e674fa0cfd..bfeb0127fa4 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -1,13 +1,47 @@
 
 DROP TABLE IF EXISTS users;
 
+-- compact part
 CREATE TABLE users (
     uid Int16,
     name String,
     age Int16,
     projection p1 (select count(), age group by age),
     projection p2 (select age, name group by age, name)
-) ENGINE = MergeTree order by uid;
+) ENGINE = MergeTree order by uid
+SETTINGS min_bytes_for_wide_part = 10485760;
+
+INSERT INTO users VALUES (1231, 'John', 33);
+
+DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED }
+
+DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw';  -- { serverError NOT_IMPLEMENTED }
+
+DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop';
+
+SYSTEM FLUSH LOGS;
+
+-- expecting no projection
+SELECT
+    name,
+    `table`
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+
+SELECT * FROM users ORDER BY uid;
+
+DROP TABLE users;
+
+
+-- wide part
+CREATE TABLE users (
+    uid Int16,
+    name String,
+    age Int16,
+    projection p1 (select count(), age group by age),
+    projection p2 (select age, name group by age, name)
+) ENGINE = MergeTree order by uid
+SETTINGS min_bytes_for_wide_part = 0;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 

From df9211c345e8bcfc53ed392a351e6320991240d1 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Thu, 11 Jul 2024 18:32:38 +0000
Subject: [PATCH 094/265] fix

---
 src/Storages/MergeTree/MutateTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 57784067720..2adcb49d6a3 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1844,7 +1844,7 @@ private:
                 }
             }
             /// Ignore projection tmp merge dir, and under lightweight delete mode ignore projection files.
-            else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode) 
+            else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode)
             {
                 // it's a projection part directory
                 ctx->new_data_part->getDataPartStorage().createProjection(destination);

From 10dd4a9fe66914899ec5e5c89e5b9cc24096f64c Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 11 Jul 2024 21:16:47 +0200
Subject: [PATCH 095/265] debugging tests

---
 .../0_stateless/03203_hive_style_partitioning.sh   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index e74f24bfd80..14b4a116596 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -5,10 +5,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
+$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
 
 
-$CLICKHOUSE_CLIENT -n -q """
+$CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
@@ -36,17 +36,17 @@ SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/parti
 SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
 """
 
-$CLICKHOUSE_CLIENT -n -q """
+$CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
-$CLICKHOUSE_CLIENT -q "SELECT 'TESTING THE URL PARTITIONING'"
+$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
 
 
-$CLICKHOUSE_CLIENT -n -q """
+$CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
@@ -67,14 +67,14 @@ SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/colum
 
 SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
 
-$CLICKHOUSE_CLIENT -n -q """
+$CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
 
 
-$CLICKHOUSE_CLIENT -q "SELECT 'TESTING THE S3 PARTITIONING'"
+$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
 
 
 $CLICKHOUSE_CLIENT -n -q """

From 119777cd7346a32f45588d069bf6a89efe091867 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 12 Jul 2024 12:03:25 +0200
Subject: [PATCH 096/265] update reference

---
 .../0_stateless/03203_hive_style_partitioning.reference     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index 0e6b6052946..d187f4cdd2c 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -29,9 +29,9 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-42      2020-01-01
-[1,2,3] 42.42
-Array(Int64)    LowCardinality(Float64)
+42	2020-01-01
+[1,2,3]	42.42
+Array(Int64)	LowCardinality(Float64)
 101
 1
 TESTING THE URL PARTITIONING

From 0988e1deadf34736f126e9eb3a2162d3abbe1314 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Fri, 12 Jul 2024 14:59:43 +0200
Subject: [PATCH 097/265] update tests

---
 .../queries/0_stateless/03203_hive_style_partitioning.reference | 2 +-
 tests/queries/0_stateless/03203_hive_style_partitioning.sh      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index d187f4cdd2c..be43048dd01 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -96,4 +96,4 @@ Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
-1
+OK
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 14b4a116596..58a74a3ca8f 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -104,4 +104,4 @@ $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
-""" 2>&1 | grep -c "UNKNOWN_IDENTIFIER"
+""" 2>&1 | grep -F -q "UNKNOWN_IDENTIFIER" && echo "OK" || echo "FAIL";

From 9c6a49b6d474836ee894ddaaa02ebb982370d25c Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 12 Jul 2024 14:30:46 +0000
Subject: [PATCH 098/265] fix WriteBufferFromPocoSocketChunked::finalizeImpl()

---
 src/IO/WriteBufferFromPocoSocketChunked.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/WriteBufferFromPocoSocketChunked.cpp b/src/IO/WriteBufferFromPocoSocketChunked.cpp
index 98c5126c24b..9da46ee2d10 100644
--- a/src/IO/WriteBufferFromPocoSocketChunked.cpp
+++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp
@@ -202,7 +202,7 @@ void WriteBufferFromPocoSocketChunked::nextImpl()
 
 void WriteBufferFromPocoSocketChunked::finalizeImpl()
 {
-    if (offset() == sizeof(*chunk_size_ptr))
+    if (chunked && offset() == sizeof(*chunk_size_ptr))
         pos -= sizeof(*chunk_size_ptr);
     WriteBufferFromPocoSocket::finalizeImpl();
 }

From 201f813516e1283a4d0528bf71753e8291526ccf Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sat, 13 Jul 2024 02:37:09 +0000
Subject: [PATCH 099/265] add prep for rebuild

---
 .../MergeTree/MergeMutateSelectedEntry.h      |  5 +-
 .../MergeTree/MergeTreeMutationEntry.cpp      |  4 +-
 .../MergeTree/MergeTreeMutationEntry.h        |  6 ++-
 .../MergeTree/MutatePlainMergeTreeTask.cpp    |  2 +
 src/Storages/MergeTree/MutateTask.cpp         | 52 +++++++++++++------
 src/Storages/StorageMergeTree.cpp             | 11 +++-
 6 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
index c420cbca12b..116c7d26552 100644
--- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h
+++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
@@ -40,12 +40,15 @@ struct MergeMutateSelectedEntry
     CurrentlyMergingPartsTaggerPtr tagger;
     MutationCommandsConstPtr commands;
     MergeTreeTransactionPtr txn;
+    Field lightweight_delete_projection_mode;
     MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_,
-                             MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR)
+                             MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR,
+                             const Field & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW)
         : future_part(future_part_)
         , tagger(std::move(tagger_))
         , commands(commands_)
         , txn(txn_)
+        , lightweight_delete_projection_mode(lightweight_delete_projection_mode_)
     {}
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
index 4dbccb91620..06f4875d120 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
@@ -48,7 +48,8 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_)
 }
 
 MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number,
-                                               const TransactionID & tid_, const WriteSettings & settings)
+                                               const TransactionID & tid_, const WriteSettings & settings,
+                                               const Field & lightweight_delete_projection_mode_)
     : create_time(time(nullptr))
     , commands(std::move(commands_))
     , disk(std::move(disk_))
@@ -56,6 +57,7 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP
     , file_name("tmp_mutation_" + toString(tmp_number) + ".txt")
     , is_temp(true)
     , tid(tid_)
+    , lightweight_delete_projection_mode(lightweight_delete_projection_mode_)
 {
     try
     {
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h
index 04297f2852a..cbc7e2d4274 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.h
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h
@@ -36,9 +36,13 @@ struct MergeTreeMutationEntry
     /// or UnknownCSN if it's not committed (yet) or RolledBackCSN if it's rolled back or PrehistoricCSN if there is no transaction.
     CSN csn = Tx::UnknownCSN;
 
+    /// From query context.
+    Field lightweight_delete_projection_mode;
+
     /// Create a new entry and write it to a temporary file.
     MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number,
-                           const TransactionID & tid_, const WriteSettings & settings);
+                           const TransactionID & tid_, const WriteSettings & settings,
+                           const Field & lightweight_delete_projection_mode_);
     MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete;
     MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default;
 
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 20f387137e7..1bf337973ff 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -140,6 +140,8 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const
     auto queryId = getQueryId();
     context->setCurrentQueryId(queryId);
     context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION);
+    if (merge_mutate_entry)
+        context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode);
     return context;
 }
 
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 2adcb49d6a3..ed603abd9c3 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -656,7 +656,9 @@ static NameSet collectFilesToSkip(
     const std::set<MergeTreeIndexPtr> & indices_to_recalc,
     const String & mrk_extension,
     const std::set<ProjectionDescriptionRawPtr> & projections_to_recalc,
-    const std::set<ColumnStatisticsPtr> & stats_to_recalc)
+    const std::set<ColumnStatisticsPtr> & stats_to_recalc,
+    const StorageMetadataPtr & metadata_snapshot,
+    bool lightweight_delete_mode)
 {
     NameSet files_to_skip = source_part->getFileNamesWithoutChecksums();
 
@@ -680,8 +682,16 @@ static NameSet collectFilesToSkip(
         }
     }
 
-    for (const auto & projection : projections_to_recalc)
-        files_to_skip.insert(projection->getDirectoryName());
+    if (lightweight_delete_mode)
+    {
+        for (const auto & projection : metadata_snapshot->getProjections())
+            files_to_skip.insert(projection.getDirectoryName());
+    }
+    else
+    {
+        for (const auto & projection : projections_to_recalc)
+            files_to_skip.insert(projection->getDirectoryName());
+    }
 
     for (const auto & stat : stats_to_recalc)
         files_to_skip.insert(stat->getFileName() + STATS_FILE_SUFFIX);
@@ -1042,8 +1052,6 @@ struct MutationContext
 
     /// Whether we need to count lightweight delete rows in this mutation
     bool count_lightweight_deleted_rows;
-
-    bool lightweight_delete_mode;
 };
 
 using MutationContextPtr = std::shared_ptr<MutationContext>;
@@ -1573,7 +1581,7 @@ private:
             }
             else
             {
-                if (!ctx->lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName()))
+                if (!ctx->updated_header.has(RowExistsColumn::name) && ctx->source_part->checksums.has(projection.getDirectoryName()))
                     entries_to_hardlink.insert(projection.getDirectoryName());
             }
         }
@@ -1843,8 +1851,7 @@ private:
                     hardlinked_files.insert(it->name());
                 }
             }
-            /// Ignore projection tmp merge dir, and under lightweight delete mode ignore projection files.
-            else if (!endsWith(it->name(), ".tmp_proj") && !ctx->lightweight_delete_mode)
+            else if (!endsWith(it->name(), ".tmp_proj")) // ignore projection tmp merge dir
             {
                 // it's a projection part directory
                 ctx->new_data_part->getDataPartStorage().createProjection(destination);
@@ -2193,6 +2200,7 @@ bool MutateTask::prepare()
     context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false);
     context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0));
     context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1);
+    context_for_reading->setSetting("lightweight_mutation_projection_mode", Field(ctx->context->getSettingsRef().lightweight_mutation_projection_mode));
 
     MutationHelpers::splitAndModifyMutationCommands(
         ctx->source_part, ctx->metadata_snapshot,
@@ -2217,6 +2225,15 @@ bool MutateTask::prepare()
         ctx->mutating_pipeline_builder = ctx->interpreter->execute();
         ctx->updated_header = ctx->interpreter->getUpdatedHeader();
         ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress);
+
+        // ctx->updated_header.has(RowExistsColumn::name);
+        // for (const auto & projection : ctx->metadata_snapshot->getProjections())
+        // {
+        //     if (!ctx->source_part->hasProjection(projection.name))
+        //         continue;
+
+        //     ctx->materialized_projections.insert(projection.name);
+        // }
     }
 
     auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0);
@@ -2258,8 +2275,8 @@ bool MutateTask::prepare()
     if (ctx->mutating_pipeline_builder.initialized())
         ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies());
 
-    ctx->lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
-    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->lightweight_delete_mode)
+    bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
+    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && lightweight_delete_mode)
     {
         /// This mutation contains lightweight delete and we need to count the deleted rows,
         /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column
@@ -2296,10 +2313,13 @@ bool MutateTask::prepare()
             ctx->context,
             ctx->materialized_indices);
 
-        ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(
-            ctx->source_part,
-            ctx->metadata_snapshot,
-            ctx->materialized_projections);
+        if (!lightweight_delete_mode)
+        {
+            ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(
+                ctx->source_part,
+                ctx->metadata_snapshot,
+                ctx->materialized_projections);
+        }
 
         ctx->stats_to_recalc = MutationHelpers::getStatisticsToRecalculate(ctx->metadata_snapshot, ctx->materialized_statistics);
 
@@ -2310,7 +2330,9 @@ bool MutateTask::prepare()
             ctx->indices_to_recalc,
             ctx->mrk_extension,
             ctx->projections_to_recalc,
-            ctx->stats_to_recalc);
+            ctx->stats_to_recalc,
+            ctx->metadata_snapshot,
+            lightweight_delete_mode);
 
         ctx->files_to_rename = MutationHelpers::collectFilesForRenames(
             ctx->source_part,
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 611289ffd78..063e3b7f064 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -517,7 +517,8 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
     {
         std::lock_guard lock(currently_processing_in_background_mutex);
 
-        MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings());
+        MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings(),
+                                     Field(query_context->getSettingsRef().lightweight_mutation_projection_mode));
         version = increment.get();
         entry.commit(version);
         String mutation_id = entry.file_name;
@@ -1282,12 +1283,18 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
         auto commands = std::make_shared<MutationCommands>();
         size_t current_ast_elements = 0;
         auto last_mutation_to_apply = mutations_end_it;
+
+        /// Trying to grab it from query context.
+        Field lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW;
+
         for (auto it = mutations_begin_it; it != mutations_end_it; ++it)
         {
             /// Do not squash mutations from different transactions to be able to commit/rollback them independently.
             if (first_mutation_tid != it->second.tid)
                 break;
 
+            lightweight_delete_projection_mode = it->second.lightweight_delete_projection_mode;
+
             size_t commands_size = 0;
             MutationCommands commands_for_size_validation;
             for (const auto & command : it->second.commands)
@@ -1364,7 +1371,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
             future_part->part_format = part->getFormat();
 
             tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true);
-            return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands, txn);
+            return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands, txn, lightweight_delete_projection_mode);
         }
     }
 

From 0fc14520c821f22b493d32657fede6be10832d60 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 13 Jul 2024 23:06:37 +0000
Subject: [PATCH 100/265] add server termination on exit

---
 programs/server/fuzzers/tcp_protocol_fuzzer.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/programs/server/fuzzers/tcp_protocol_fuzzer.cpp b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp
index 950ea09669a..7cebdc2ad65 100644
--- a/programs/server/fuzzers/tcp_protocol_fuzzer.cpp
+++ b/programs/server/fuzzers/tcp_protocol_fuzzer.cpp
@@ -10,6 +10,7 @@
 #include <Poco/Net/SocketAddress.h>
 #include <Poco/Net/StreamSocket.h>
 
+#include <Daemon/BaseDaemon.h>
 #include <Interpreters/Context.h>
 
 
@@ -25,6 +26,12 @@ static int64_t port = 9000;
 
 using namespace std::chrono_literals;
 
+void on_exit()
+{
+    BaseDaemon::terminate();
+    main_app.wait();
+}
+
 extern "C"
 int LLVMFuzzerInitialize(int * argc, char ***argv)
 {
@@ -60,6 +67,8 @@ int LLVMFuzzerInitialize(int * argc, char ***argv)
             exit(-1);
     }
 
+    atexit(on_exit);
+
     return 0;
 }
 

From 3ccc2aed4c76eba20e0fc88768412bbfacafbb95 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 13 Jul 2024 23:44:13 +0000
Subject: [PATCH 101/265] add fuzzer_arguments to fuzzer runner

---
 docker/test/libfuzzer/run_libfuzzer.py | 7 +++++++
 tests/fuzz/tcp_protocol_fuzzer.options | 4 ++++
 2 files changed, 11 insertions(+)
 create mode 100644 tests/fuzz/tcp_protocol_fuzzer.options

diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py
index 5ed019490d5..cdd09dfa3be 100755
--- a/docker/test/libfuzzer/run_libfuzzer.py
+++ b/docker/test/libfuzzer/run_libfuzzer.py
@@ -20,6 +20,7 @@ def run_fuzzer(fuzzer: str):
 
     options_file = f"{fuzzer}.options"
     custom_libfuzzer_options = ""
+    fuzzer_arguments = ""
 
     with Path(options_file) as path:
         if path.exists() and path.is_file():
@@ -47,6 +48,12 @@ def run_fuzzer(fuzzer: str):
                     for key, value in parser["libfuzzer"].items()
                 )
 
+            if parser.has_section("fuzzer_arguments"):
+                fuzzer_arguments = " ".join(
+                    ("%s" % key) if value == "" else ("%s=%s" % (key, value))
+                    for key, value in parser["fuzzer_arguments"].items()
+                )
+
     cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}"
     if custom_libfuzzer_options:
         cmd_line += f" {custom_libfuzzer_options}"
diff --git a/tests/fuzz/tcp_protocol_fuzzer.options b/tests/fuzz/tcp_protocol_fuzzer.options
new file mode 100644
index 00000000000..4885669d91d
--- /dev/null
+++ b/tests/fuzz/tcp_protocol_fuzzer.options
@@ -0,0 +1,4 @@
+[fuzzer_arguments]
+--log-file=tcp_protocol_fuzzer.log
+--=
+--logging.terminal=0

From d4116aeaeaeec3b17cd813d686a815476a794bed Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 15 Jul 2024 01:31:40 +0000
Subject: [PATCH 102/265] fix

---
 src/Core/SettingsEnums.h                            | 2 +-
 src/Storages/MergeTree/MergeMutateSelectedEntry.h   | 4 ++--
 src/Storages/MergeTree/MergeTreeMutationEntry.cpp   | 2 +-
 src/Storages/MergeTree/MergeTreeMutationEntry.h     | 4 ++--
 src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp | 3 +--
 src/Storages/StorageMergeTree.cpp                   | 4 ++--
 6 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 3611dfa72be..67fbce31be8 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -342,7 +342,7 @@ DECLARE_SETTING_ENUM(ParallelReplicasCustomKeyFilterType)
 enum class LightweightMutationProjectionMode : uint8_t
 {
     THROW,
-    DROP
+    DROP,
 };
 
 DECLARE_SETTING_ENUM(LightweightMutationProjectionMode)
diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
index 116c7d26552..bf2d1a7f677 100644
--- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h
+++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
@@ -40,10 +40,10 @@ struct MergeMutateSelectedEntry
     CurrentlyMergingPartsTaggerPtr tagger;
     MutationCommandsConstPtr commands;
     MergeTreeTransactionPtr txn;
-    Field lightweight_delete_projection_mode;
+    LightweightMutationProjectionMode lightweight_delete_projection_mode;
     MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_,
                              MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR,
-                             const Field & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW)
+                             const LightweightMutationProjectionMode & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW)
         : future_part(future_part_)
         , tagger(std::move(tagger_))
         , commands(commands_)
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
index 06f4875d120..d1bd8efa7a5 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
@@ -49,7 +49,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_)
 
 MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number,
                                                const TransactionID & tid_, const WriteSettings & settings,
-                                               const Field & lightweight_delete_projection_mode_)
+                                               const LightweightMutationProjectionMode & lightweight_delete_projection_mode_)
     : create_time(time(nullptr))
     , commands(std::move(commands_))
     , disk(std::move(disk_))
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h
index cbc7e2d4274..3aca744aa15 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.h
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h
@@ -37,12 +37,12 @@ struct MergeTreeMutationEntry
     CSN csn = Tx::UnknownCSN;
 
     /// From query context.
-    Field lightweight_delete_projection_mode;
+    LightweightMutationProjectionMode lightweight_delete_projection_mode;
 
     /// Create a new entry and write it to a temporary file.
     MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number,
                            const TransactionID & tid_, const WriteSettings & settings,
-                           const Field & lightweight_delete_projection_mode_);
+                           const LightweightMutationProjectionMode & lightweight_delete_projection_mode_);
     MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete;
     MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default;
 
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 1bf337973ff..666dbe7e61e 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -140,8 +140,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const
     auto queryId = getQueryId();
     context->setCurrentQueryId(queryId);
     context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION);
-    if (merge_mutate_entry)
-        context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode);
+    context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode);
     return context;
 }
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 063e3b7f064..7f210779916 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -518,7 +518,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
         std::lock_guard lock(currently_processing_in_background_mutex);
 
         MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings(),
-                                     Field(query_context->getSettingsRef().lightweight_mutation_projection_mode));
+                                     query_context->getSettingsRef().lightweight_mutation_projection_mode);
         version = increment.get();
         entry.commit(version);
         String mutation_id = entry.file_name;
@@ -1285,7 +1285,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
         auto last_mutation_to_apply = mutations_end_it;
 
         /// Trying to grab it from query context.
-        Field lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW;
+        LightweightMutationProjectionMode lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW;
 
         for (auto it = mutations_begin_it; it != mutations_end_it; ++it)
         {

From 3c09d585cde8068e1f57a1b2adfcdf8b126a8574 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 15 Jul 2024 02:14:58 +0000
Subject: [PATCH 103/265] fix

---
 src/Storages/MergeTree/MergeMutateSelectedEntry.h | 1 +
 src/Storages/MergeTree/MergeTreeMutationEntry.h   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
index bf2d1a7f677..f75d10d9ecb 100644
--- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h
+++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
@@ -2,6 +2,7 @@
 
 #include <Storages/MergeTree/FutureMergedMutatedPart.h>
 #include <Storages/MutationCommands.h>
+#include <Core/SettingsEnums.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h
index 3aca744aa15..dbb17654ddd 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.h
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h
@@ -5,6 +5,7 @@
 #include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Storages/MutationCommands.h>
 #include <Common/TransactionID.h>
+#include <Core/SettingsEnums.h>
 
 
 namespace DB

From 36fb1cc3e79c9570ba43f81e4e47041100a63d0d Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 15 Jul 2024 13:15:14 +0000
Subject: [PATCH 104/265] temporarily disable the setting in taskcontext

---
 src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 666dbe7e61e..19aa63d90a2 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -140,7 +140,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const
     auto queryId = getQueryId();
     context->setCurrentQueryId(queryId);
     context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION);
-    context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode);
+    // context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode);
     return context;
 }
 

From 4df94a0ef3f8af73328d0a8f45bb217cc70b2e45 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 15 Jul 2024 14:47:52 +0000
Subject: [PATCH 105/265] cleanup for setting in mergetree

---
 src/Core/Settings.h                           |  1 -
 src/Core/SettingsChangesHistory.cpp           |  1 -
 src/Interpreters/InterpreterDeleteQuery.cpp   | 66 +++++--------------
 src/Storages/IStorage.h                       |  3 -
 .../MergeTree/MergeMutateSelectedEntry.h      |  7 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 15 -----
 src/Storages/MergeTree/MergeTreeData.h        |  2 -
 .../MergeTree/MergeTreeMutationEntry.cpp      |  4 +-
 .../MergeTree/MergeTreeMutationEntry.h        |  7 +-
 src/Storages/MergeTree/MergeTreeSettings.h    |  1 +
 .../MergeTree/MutatePlainMergeTreeTask.cpp    |  1 -
 src/Storages/MergeTree/MutateTask.cpp         | 10 ---
 src/Storages/StorageMergeTree.cpp             | 10 +--
 13 files changed, 25 insertions(+), 103 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f7b44ea775c..bafc3f93846 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -588,7 +588,6 @@ class IColumn;
     M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
     M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
     M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \
-    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \
     M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
     M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
     M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 194292a467e..d6cc0112e0a 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -70,7 +70,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"collect_hash_table_stats_during_joins", false, true, "New setting."},
               {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
               {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
-              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts."},
               {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
               {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
               {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp
index a7d0264f0b0..302275eb113 100644
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@@ -25,7 +25,6 @@ namespace ErrorCodes
     extern const int TABLE_IS_READ_ONLY;
     extern const int SUPPORT_IS_DISABLED;
     extern const int BAD_ARGUMENTS;
-    extern const int NOT_IMPLEMENTED;
 }
 
 
@@ -60,9 +59,25 @@ BlockIO InterpreterDeleteQuery::execute()
 
     auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
     auto metadata_snapshot = table->getInMemoryMetadataPtr();
-    bool hasProjection = table->hasProjection();
 
-    auto lightweightDelete = [&]()
+    if (table->supportsDelete())
+    {
+        /// Convert to MutationCommand
+        MutationCommands mutation_commands;
+        MutationCommand mut_command;
+
+        mut_command.type = MutationCommand::Type::DELETE;
+        mut_command.predicate = delete_query.predicate;
+
+        mutation_commands.emplace_back(mut_command);
+
+        table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
+        MutationsInterpreter::Settings settings(false);
+        MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate();
+        table->mutate(mutation_commands, getContext());
+        return {};
+    }
+    else if (table->supportsLightweightDelete())
     {
         if (!getContext()->getSettingsRef().enable_lightweight_delete)
             throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
@@ -89,54 +104,9 @@ BlockIO InterpreterDeleteQuery::execute()
         context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync));
         InterpreterAlterQuery alter_interpreter(alter_ast, context);
         return alter_interpreter.execute();
-    };
-
-    if (table->supportsDelete())
-    {
-        /// Convert to MutationCommand
-        MutationCommands mutation_commands;
-        MutationCommand mut_command;
-
-        mut_command.type = MutationCommand::Type::DELETE;
-        mut_command.predicate = delete_query.predicate;
-
-        mutation_commands.emplace_back(mut_command);
-
-        table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
-        MutationsInterpreter::Settings settings(false);
-        MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate();
-        table->mutate(mutation_commands, getContext());
-        return {};
-    }
-    else if (!hasProjection && table->supportsLightweightDelete())
-    {
-        return lightweightDelete();
     }
     else
     {
-        if (hasProjection)
-        {
-            auto context = Context::createCopy(getContext());
-            auto mode = context->getSettingsRef().lightweight_mutation_projection_mode;
-
-            if (mode == LightweightMutationProjectionMode::THROW)
-            {
-                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                    "DELETE query is not supported for table {} as it has projections. "
-                    "User should drop all the projections manually before running the query",
-                    table->getStorageID().getFullTableName());
-            }
-            else if (mode == LightweightMutationProjectionMode::DROP)
-            {
-                return lightweightDelete();
-            }
-            else
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed.");
-            }
-        }
-
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
             "DELETE query is not supported for table {}",
             table->getStorageID().getFullTableName());
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 6217470780d..991c8ff64af 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -262,9 +262,6 @@ public:
     /// Return true if storage can execute lightweight delete mutations.
     virtual bool supportsLightweightDelete() const { return false; }
 
-    /// Return true if storage has any projection.
-    virtual bool hasProjection() const { return false; }
-
     /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete
     /// because those are internally translated into 'ALTER UDPATE' mutations.
     virtual bool supportsDelete() const { return false; }
diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
index f75d10d9ecb..e7efe00741c 100644
--- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h
+++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
@@ -2,7 +2,7 @@
 
 #include <Storages/MergeTree/FutureMergedMutatedPart.h>
 #include <Storages/MutationCommands.h>
-#include <Core/SettingsEnums.h>
+
 
 namespace DB
 {
@@ -41,15 +41,12 @@ struct MergeMutateSelectedEntry
     CurrentlyMergingPartsTaggerPtr tagger;
     MutationCommandsConstPtr commands;
     MergeTreeTransactionPtr txn;
-    LightweightMutationProjectionMode lightweight_delete_projection_mode;
     MergeMutateSelectedEntry(FutureMergedMutatedPartPtr future_part_, CurrentlyMergingPartsTaggerPtr tagger_,
-                             MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR,
-                             const LightweightMutationProjectionMode & lightweight_delete_projection_mode_ = LightweightMutationProjectionMode::THROW)
+                             MutationCommandsConstPtr commands_, const MergeTreeTransactionPtr & txn_ = NO_TRANSACTION_PTR)
         : future_part(future_part_)
         , tagger(std::move(tagger_))
         , commands(commands_)
         , txn(txn_)
-        , lightweight_delete_projection_mode(lightweight_delete_projection_mode_)
     {}
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e31f6db5409..5182147350e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6158,21 +6158,6 @@ bool MergeTreeData::supportsLightweightDelete() const
     return true;
 }
 
-bool MergeTreeData::hasProjection() const
-{
-    auto lock = lockParts();
-    for (const auto & part : data_parts_by_info)
-    {
-        if (part->getState() == MergeTreeDataPartState::Outdated
-            || part->getState() == MergeTreeDataPartState::Deleting)
-            continue;
-
-        if (part->hasProjection())
-            return true;
-    }
-    return false;
-}
-
 MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const
 {
     ProjectionPartsVector res;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index c8b721038c6..7d216f989c1 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -439,8 +439,6 @@ public:
 
     bool supportsLightweightDelete() const override;
 
-    bool hasProjection() const override;
-
     bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; }
 
     bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override;
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
index d1bd8efa7a5..4dbccb91620 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
@@ -48,8 +48,7 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_)
 }
 
 MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number,
-                                               const TransactionID & tid_, const WriteSettings & settings,
-                                               const LightweightMutationProjectionMode & lightweight_delete_projection_mode_)
+                                               const TransactionID & tid_, const WriteSettings & settings)
     : create_time(time(nullptr))
     , commands(std::move(commands_))
     , disk(std::move(disk_))
@@ -57,7 +56,6 @@ MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskP
     , file_name("tmp_mutation_" + toString(tmp_number) + ".txt")
     , is_temp(true)
     , tid(tid_)
-    , lightweight_delete_projection_mode(lightweight_delete_projection_mode_)
 {
     try
     {
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.h b/src/Storages/MergeTree/MergeTreeMutationEntry.h
index dbb17654ddd..04297f2852a 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.h
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.h
@@ -5,7 +5,6 @@
 #include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Storages/MutationCommands.h>
 #include <Common/TransactionID.h>
-#include <Core/SettingsEnums.h>
 
 
 namespace DB
@@ -37,13 +36,9 @@ struct MergeTreeMutationEntry
     /// or UnknownCSN if it's not committed (yet) or RolledBackCSN if it's rolled back or PrehistoricCSN if there is no transaction.
     CSN csn = Tx::UnknownCSN;
 
-    /// From query context.
-    LightweightMutationProjectionMode lightweight_delete_projection_mode;
-
     /// Create a new entry and write it to a temporary file.
     MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk, const String & path_prefix_, UInt64 tmp_number,
-                           const TransactionID & tid_, const WriteSettings & settings,
-                           const LightweightMutationProjectionMode & lightweight_delete_projection_mode_);
+                           const TransactionID & tid_, const WriteSettings & settings);
     MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete;
     MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default;
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index c0afd781c7e..a458a21ca1b 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -215,6 +215,7 @@ struct Settings;
     M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \
     /** Projection settings. */ \
     M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \
+    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \
 
 #define MAKE_OBSOLETE_MERGE_TREE_SETTING(M, TYPE, NAME, DEFAULT) \
     M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE)
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 19aa63d90a2..20f387137e7 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -140,7 +140,6 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const
     auto queryId = getQueryId();
     context->setCurrentQueryId(queryId);
     context->setBackgroundOperationTypeForContext(ClientInfo::BackgroundOperationType::MUTATION);
-    // context->setSetting("lightweight_mutation_projection_mode", merge_mutate_entry->lightweight_delete_projection_mode);
     return context;
 }
 
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index ed603abd9c3..0734174dbef 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -2200,7 +2200,6 @@ bool MutateTask::prepare()
     context_for_reading->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false);
     context_for_reading->setSetting("max_streams_for_merge_tree_reading", Field(0));
     context_for_reading->setSetting("read_from_filesystem_cache_if_exists_otherwise_bypass_cache", 1);
-    context_for_reading->setSetting("lightweight_mutation_projection_mode", Field(ctx->context->getSettingsRef().lightweight_mutation_projection_mode));
 
     MutationHelpers::splitAndModifyMutationCommands(
         ctx->source_part, ctx->metadata_snapshot,
@@ -2225,15 +2224,6 @@ bool MutateTask::prepare()
         ctx->mutating_pipeline_builder = ctx->interpreter->execute();
         ctx->updated_header = ctx->interpreter->getUpdatedHeader();
         ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress);
-
-        // ctx->updated_header.has(RowExistsColumn::name);
-        // for (const auto & projection : ctx->metadata_snapshot->getProjections())
-        // {
-        //     if (!ctx->source_part->hasProjection(projection.name))
-        //         continue;
-
-        //     ctx->materialized_projections.insert(projection.name);
-        // }
     }
 
     auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0);
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 7f210779916..8404e5c9cd9 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -517,8 +517,7 @@ Int64 StorageMergeTree::startMutation(const MutationCommands & commands, Context
     {
         std::lock_guard lock(currently_processing_in_background_mutex);
 
-        MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings(),
-                                     query_context->getSettingsRef().lightweight_mutation_projection_mode);
+        MergeTreeMutationEntry entry(commands, disk, relative_data_path, insert_increment.get(), current_tid, getContext()->getWriteSettings());
         version = increment.get();
         entry.commit(version);
         String mutation_id = entry.file_name;
@@ -1284,17 +1283,12 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
         size_t current_ast_elements = 0;
         auto last_mutation_to_apply = mutations_end_it;
 
-        /// Trying to grab it from query context.
-        LightweightMutationProjectionMode lightweight_delete_projection_mode = LightweightMutationProjectionMode::THROW;
-
         for (auto it = mutations_begin_it; it != mutations_end_it; ++it)
         {
             /// Do not squash mutations from different transactions to be able to commit/rollback them independently.
             if (first_mutation_tid != it->second.tid)
                 break;
 
-            lightweight_delete_projection_mode = it->second.lightweight_delete_projection_mode;
-
             size_t commands_size = 0;
             MutationCommands commands_for_size_validation;
             for (const auto & command : it->second.commands)
@@ -1371,7 +1365,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
             future_part->part_format = part->getFormat();
 
             tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true);
-            return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands, txn, lightweight_delete_projection_mode);
+            return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands, txn);
         }
     }
 

From 68ed5767d795e7b5792fed839198f53d43581c47 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 15 Jul 2024 15:31:17 +0000
Subject: [PATCH 106/265] fix merge problem

---
 src/Storages/MergeTree/MergeTreeData.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 125b2c8c513..38ca0aed9da 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6160,6 +6160,11 @@ bool MergeTreeData::supportsLightweightDelete() const
     return true;
 }
 
+bool MergeTreeData::areAsynchronousInsertsEnabled() const
+{
+    return getSettings()->async_insert;
+}
+
 MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const
 {
     ProjectionPartsVector res;

From 7841a8b401d0ddb3f9bce9e5dc03049a65a1067a Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 15 Jul 2024 16:27:38 +0000
Subject: [PATCH 107/265] fixes after review

---
 docs/en/operations/settings/settings.md       |   2 +-
 .../table-functions/azureBlobStorage.md       |  13 ++++
 docs/en/sql-reference/table-functions/file.md |   8 +--
 docs/en/sql-reference/table-functions/hdfs.md |   4 +-
 docs/en/sql-reference/table-functions/s3.md   |   8 +--
 docs/en/sql-reference/table-functions/url.md  |  13 ++++
 .../ObjectStorage/StorageObjectStorage.cpp    |  16 +++--
 .../StorageObjectStorageCluster.cpp           |   4 +-
 .../StorageObjectStorageSource.cpp            |  17 ++---
 .../StorageObjectStorageSource.h              |   4 +-
 src/Storages/StorageFile.cpp                  |  20 ++----
 src/Storages/StorageURL.cpp                   |  11 +--
 src/Storages/VirtualColumnUtils.cpp           |  64 +++++++++---------
 src/Storages/VirtualColumnUtils.h             |   7 +-
 .../03203_hive_style_partitioning.reference   |   5 ++
 .../03203_hive_style_partitioning.sh          |  13 ++++
 .../column0=Elizabeth/sample.parquet          | Bin 0 -> 1308 bytes
 .../partitioning/identifier=2070/email.csv    |   5 ++
 18 files changed, 114 insertions(+), 100 deletions(-)
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet
 create mode 100644 tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 84912b4574f..fb076e76bdd 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -5611,6 +5611,6 @@ Default value: `1GiB`.
 
 ## use_hive_partitioning
 
-Allows the usage of Hive-style partitioning in queries. When enabled, ClickHouse interprets and maintains table partitions in a way that is consistent with the Hive partitioning scheme, which is commonly used in Hadoop ecosystems.
+When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
 
 Default value: `0`.
diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md
index f59fedeb3a2..104ac4e26df 100644
--- a/docs/en/sql-reference/table-functions/azureBlobStorage.md
+++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md
@@ -77,3 +77,16 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
 **See Also**
 
 - [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)
+
+## Hive-style partitioning {#hive-style-partitioning}
+
+When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
+
+**Example**
+
+Use virtual column, created with Hive-style partitioning
+
+``` sql
+SET use_hive_partitioning = 1;
+SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet', format='Parquet', structure='Date DateTime64, Country String, Code UInt64') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
+```
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 838a7ab61de..0669609a22a 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -208,7 +208,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 
 ## Hive-style partitioning {#hive-style-partitioning}
 
-When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
+When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
 
 **Example**
 
@@ -216,11 +216,7 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT _specified_column from file('/specified_column=specified_data/file.txt');
-```
-
-``` reference
-specified_data
+SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
 ```
 
 ## Settings {#settings}
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index fc84c431066..6963d4e4b79 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -101,7 +101,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
 
 ## Hive-style partitioning {#hive-style-partitioning}
 
-When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
+When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
 
 **Example**
 
@@ -109,7 +109,7 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT _specified_column from HDFS('hdfs://hdfs1:9000/specified_column=specified_data/file.txt');
+SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
 ```
 
 ``` reference
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 15074a77475..f3ee83afef4 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -276,7 +276,7 @@ FROM s3(
 
 ## Hive-style partitioning {#hive-style-partitioning}
 
-When setting `use_hive_partitioning` is set to 1, ClickHouse can introduce virtual columns due to Hive partitioning style if the path has the specific structure.
+When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
 
 **Example**
 
@@ -284,11 +284,7 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT _specified_column from HDFS('hdfs://hdfs1:9000/specified_column=specified_data/file.txt');
-```
-
-``` reference
-specified_data
+SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
 ```
 
 ## Storage Settings {#storage-settings}
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 3bb7aff53a7..596355e2577 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -55,6 +55,19 @@ Character `|` inside patterns is used to specify failover addresses. They are it
 - `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
 - `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
 
+## Hive-style partitioning {#hive-style-partitioning}
+
+When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
+
+**Example**
+
+Use virtual column, created with Hive-style partitioning
+
+``` sql
+SET use_hive_partitioning = 1;
+SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
+```
+
 ## Storage Settings {#storage-settings}
 
 - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 48e9118e321..35cd1492642 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -35,16 +35,19 @@ namespace ErrorCodes
 
 std::string StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context)
 {
+    auto query_settings = configuration->getQuerySettings(context);
+    /// We don't want to throw an exception if there are no files with specified path.
+    query_settings.throw_on_zero_files_match = false;
     auto file_iterator = StorageObjectStorageSource::createFileIterator(
         configuration,
+        query_settings,
         object_storage,
         distributed_processing,
         context,
         {}, // predicate
         metadata.getColumns().getAll(), // virtual_columns
         nullptr, // read_keys
-        {}, // file_progress_callback
-        true // override_settings_for_hive_partitioning
+        {} // file_progress_callback
     );
 
     if (auto file = file_iterator->next(0))
@@ -82,12 +85,10 @@ StorageObjectStorage::StorageObjectStorage(
     metadata.setConstraints(constraints_);
     metadata.setComment(comment);
 
-    if (sample_path.empty() && context->getSettings().use_hive_partitioning)
+    if (sample_path.empty())
         sample_path = getPathSample(metadata, context);
-    else if (!context->getSettings().use_hive_partitioning)
-        sample_path = "";
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings));
     setInMemoryMetadata(metadata);
 }
 
@@ -224,7 +225,7 @@ private:
             return;
         auto context = getContext();
         iterator_wrapper = StorageObjectStorageSource::createFileIterator(
-            configuration, object_storage, distributed_processing,
+            configuration, configuration->getQuerySettings(context), object_storage, distributed_processing,
             context, predicate, virtual_columns, nullptr, context->getFileProgressCallback());
     }
 };
@@ -376,6 +377,7 @@ std::unique_ptr<ReadBufferIterator> StorageObjectStorage::createReadBufferIterat
 {
     auto file_iterator = StorageObjectStorageSource::createFileIterator(
         configuration,
+        configuration->getQuerySettings(context),
         object_storage,
         false/* distributed_processing */,
         context,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 92327b4cde0..a88532e1ea9 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -84,8 +84,8 @@ RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExten
     const ActionsDAG::Node * predicate, const ContextPtr & local_context) const
 {
     auto iterator = StorageObjectStorageSource::createFileIterator(
-        configuration, object_storage, /* distributed_processing */false, local_context,
-        predicate, virtual_columns, nullptr, local_context->getFileProgressCallback());
+        configuration, configuration->getQuerySettings(local_context), object_storage, /* distributed_processing */false,
+        local_context, predicate, virtual_columns, nullptr, local_context->getFileProgressCallback());
 
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
     {
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 2e5416d1ffd..707e7603368 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -99,14 +99,14 @@ std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier(
 
 std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator(
     ConfigurationPtr configuration,
+    const StorageObjectStorage::QuerySettings & query_settings,
     ObjectStoragePtr object_storage,
     bool distributed_processing,
     const ContextPtr & local_context,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
     ObjectInfos * read_keys,
-    std::function<void(FileProgress)> file_progress_callback,
-    bool override_settings_for_hive_partitioning)
+    std::function<void(FileProgress)> file_progress_callback)
 {
     if (distributed_processing)
         return std::make_shared<ReadTaskIterator>(
@@ -117,20 +117,16 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
                         "Expression can not have wildcards inside {} name", configuration->getNamespaceType());
 
-    auto settings = configuration->getQuerySettings(local_context);
     const bool is_archive = configuration->isArchive();
 
     std::unique_ptr<IIterator> iterator;
     if (configuration->isPathWithGlobs())
     {
-        bool throw_on_zero_files_match = settings.throw_on_zero_files_match;
-        if (override_settings_for_hive_partitioning)
-            throw_on_zero_files_match = false;
         /// Iterate through disclosed globs and make a source for each file
         iterator = std::make_unique<GlobIterator>(
             object_storage, configuration, predicate, virtual_columns,
-            local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size,
-            throw_on_zero_files_match, file_progress_callback);
+            local_context, is_archive ? nullptr : read_keys, query_settings.list_object_keys_size,
+            query_settings.throw_on_zero_files_match, file_progress_callback);
     }
     else
     {
@@ -149,7 +145,7 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
 
         iterator = std::make_unique<KeysIterator>(
             object_storage, copy_configuration, virtual_columns, is_archive ? nullptr : read_keys,
-            settings.ignore_non_existent_file, file_progress_callback);
+            query_settings.ignore_non_existent_file, file_progress_callback);
     }
 
     if (is_archive)
@@ -208,8 +204,7 @@ Chunk StorageObjectStorageSource::generate()
                   .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes,
                   .filename = &filename,
                   .last_modified = object_info->metadata->last_modified,
-                  .hive_partitioning_path = object_info->getPath(),
-                });
+                }, read_from_format_info.columns_description, getContext());
 
             const auto & partition_columns = configuration->getPartitionColumns();
             if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index a99bb068372..ff6d588b364 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -52,14 +52,14 @@ public:
 
     static std::shared_ptr<IIterator> createFileIterator(
         ConfigurationPtr configuration,
+        const StorageObjectStorage::QuerySettings & query_settings,
         ObjectStoragePtr object_storage,
         bool distributed_processing,
         const ContextPtr & local_context,
         const ActionsDAG::Node * predicate,
         const NamesAndTypesList & virtual_columns,
         ObjectInfos * read_keys,
-        std::function<void(FileProgress)> file_progress_callback = {},
-        bool override_settings_for_hive_partitioning = false);
+        std::function<void(FileProgress)> file_progress_callback = {});
 
     static std::string getUniqueStoragePathIdentifier(
         const Configuration & configuration,
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 047631cbc54..42e27a13ca9 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1108,9 +1108,9 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     setInMemoryMetadata(storage_metadata);
 
     std::string path_for_virtuals;
-    if (args.getContext()->getSettingsRef().use_hive_partitioning && !paths.empty())
+    if (!paths.empty())
         path_for_virtuals = paths[0];
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), path_for_virtuals, format_settings.value_or(FormatSettings{})));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), path_for_virtuals, format_settings));
 }
 
 
@@ -1452,10 +1452,6 @@ Chunk StorageFileSource::generate()
                 chunk_size = input_format->getApproxBytesReadForChunk();
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
 
-            std::string hive_partitioning_path;
-            if (getContext()->getSettingsRef().use_hive_partitioning)
-                hive_partitioning_path = current_path;
-
             /// Enrich with virtual columns.
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
                 chunk, requested_virtual_columns,
@@ -1463,9 +1459,8 @@ Chunk StorageFileSource::generate()
                     .path = current_path,
                     .size = current_file_size,
                     .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
-                    .last_modified = current_file_last_modified,
-                    .hive_partitioning_path = hive_partitioning_path,
-                });
+                    .last_modified = current_file_last_modified
+                }, columns_description, getContext());
 
             return chunk;
         }
@@ -1648,17 +1643,10 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
-    Strings paths;
     if (storage->archive_info)
-    {
         files_to_read = storage->archive_info->paths_to_archives.size();
-        paths = storage->archive_info->paths_to_archives;
-    }
     else
-    {
         files_to_read = storage->paths.size();
-        paths = storage->paths;
-    }
 
     if (max_num_streams > files_to_read)
         num_streams = files_to_read;
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 5da42638b87..f7560fa7910 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -164,10 +164,7 @@ IStorageURLBase::IStorageURLBase(
     storage_metadata.setComment(comment);
     setInMemoryMetadata(storage_metadata);
 
-    std::string uri_for_partitioning;
-    if (context_->getSettingsRef().use_hive_partitioning)
-        uri_for_partitioning = getSampleURI(uri, context_);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, uri_for_partitioning, format_settings.value_or(FormatSettings{})));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings));
 }
 
 
@@ -426,9 +423,6 @@ Chunk StorageURLSource::generate()
             size_t chunk_size = 0;
             if (input_format)
                 chunk_size = input_format->getApproxBytesReadForChunk();
-            std::string hive_partitioning_path;
-            if (getContext()->getSettingsRef().use_hive_partitioning)
-                hive_partitioning_path = curr_uri.getPath();
 
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
             VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk(
@@ -436,8 +430,7 @@ Chunk StorageURLSource::generate()
                 {
                     .path = curr_uri.getPath(),
                     .size = current_file_size,
-                    .hive_partitioning_path = hive_partitioning_path,
-                });
+                }, columns_description, getContext());
             return chunk;
         }
 
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 0efa9522ac6..fb5a345f424 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -119,15 +119,25 @@ NameSet getVirtualNamesForFileLikeStorage()
     return {"_path", "_file", "_size", "_time"};
 }
 
-std::map<std::string, std::string> parseHivePartitioningKeysAndValues(const std::string& path)
+std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const std::string& path, const ColumnsDescription & storage_columns)
 {
     std::string pattern = "/([^/]+)=([^/]+)";
     re2::StringPiece input_piece(path);
 
-    std::map<std::string, std::string> key_values;
+    std::unordered_map<std::string, std::string> key_values;
     std::string key, value;
+    std::set<String> used_keys;
     while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
-        key_values["_" + key] = value;
+    {
+        if (used_keys.contains(key))
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Link to file with enabled hive-style partitioning contains duplicated key {}, only unique keys required", key);
+        used_keys.insert(key);
+
+        auto col_name = "_" + key;
+        while (storage_columns.has(col_name))
+            col_name = "_" + col_name;
+        key_values[col_name] = value;
+    }
     return key_values;
 }
 
@@ -148,17 +158,20 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription
     add_virtual("_size", makeNullable(std::make_shared<DataTypeUInt64>()));
     add_virtual("_time", makeNullable(std::make_shared<DataTypeDateTime>()));
 
-    auto map = parseHivePartitioningKeysAndValues(path);
-    for (auto& item : map)
+    if (context->getSettingsRef().use_hive_partitioning)
     {
+        auto map = parseHivePartitioningKeysAndValues(path, storage_columns);
         auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
-        auto type = tryInferDataTypeByEscapingRule(item.second, format_settings, FormatSettings::EscapingRule::Raw);
-        if (type == nullptr)
-            type = std::make_shared<DataTypeString>();
-        if (type->canBeInsideLowCardinality())
-            add_virtual(item.first, std::make_shared<DataTypeLowCardinality>(type));
-        else
-            add_virtual(item.first, type);
+        for (auto & item : map)
+        {
+            auto type = tryInferDataTypeByEscapingRule(item.second, format_settings, FormatSettings::EscapingRule::Raw);
+            if (type == nullptr)
+                type = std::make_shared<DataTypeString>();
+            if (type->canBeInsideLowCardinality())
+                add_virtual(item.first, std::make_shared<DataTypeLowCardinality>(type));
+            else
+                add_virtual(item.first, type);
+        }
     }
 
     return desc;
@@ -207,8 +220,6 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
     {
         if (column.name == "_file" || column.name == "_path")
             block.insert({column.type->createColumn(), column.type, column.name});
-        if (!getVirtualNamesForFileLikeStorage().contains(column.name))
-            block.insert({column.type->createColumn(), column.type, column.name});
     }
     block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
 
@@ -222,9 +233,12 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values)
+    VirtualsForFileLikeStorage virtual_values, ColumnsDescription columns, ContextPtr context)
 {
-    auto hive_map = parseHivePartitioningKeysAndValues(virtual_values.hive_partitioning_path);
+    std::unordered_map<std::string, std::string> hive_map;
+    if (context->getSettingsRef().use_hive_partitioning)
+        hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns);
+
     for (const auto & virtual_column : requested_virtual_columns)
     {
         if (virtual_column.name == "_path")
@@ -258,23 +272,9 @@ void addRequestedFileLikeStorageVirtualsToChunk(
             else
                 chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst());
         }
-        else if (!hive_map.empty())
+        else if (auto it = hive_map.find(virtual_column.getNameInStorage()); it != hive_map.end())
         {
-            bool contains_virtual_column = std::any_of(hive_map.begin(), hive_map.end(),
-                [&](const auto& pair)
-                {
-                    return requested_virtual_columns.contains(pair.first);
-                });
-
-            if (!contains_virtual_column)
-                hive_map.clear(); // If we cannot find any virtual column in requested, we don't add any of them to chunk
-
-            auto it = hive_map.find(virtual_column.getNameInStorage());
-            if (it != hive_map.end())
-            {
-                chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), convertFieldToType(Field(it->second), *virtual_column.type))->convertToFullColumnIfConst());
-                hive_map.erase(it);
-            }
+            chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), convertFieldToType(Field(it->second), *virtual_column.type))->convertToFullColumnIfConst());
         }
     }
 }
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index aa7d4c4605b..29ec32ab375 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -7,8 +7,6 @@
 #include <Storages/VirtualColumnsDescription.h>
 #include <Formats/FormatSettings.h>
 
-#include <map>
-#include <string>
 #include <unordered_set>
 
 
@@ -81,14 +79,11 @@ struct VirtualsForFileLikeStorage
     std::optional<size_t> size { std::nullopt };
     const String * filename { nullptr };
     std::optional<Poco::Timestamp> last_modified { std::nullopt };
-    const String & hive_partitioning_path = "";
 };
 
-std::map<std::string, std::string> parseFromPath(const std::string& path);
-
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values);
+    VirtualsForFileLikeStorage virtual_values, ColumnsDescription columns = {}, ContextPtr context = {});
 }
 
 }
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index be43048dd01..fc6da3a55c1 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -33,6 +33,11 @@ Elizabeth	Gordon	Elizabeth
 [1,2,3]	42.42
 Array(Int64)	LowCardinality(Float64)
 101
+2070
+4081
+2070
+2070
+1
 1
 TESTING THE URL PARTITIONING
 first	 last	Elizabeth
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 58a74a3ca8f..a5f3e36763d 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -36,6 +36,19 @@ SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/parti
 SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
 """
 
+$CLICKHOUSE_LOCAL -n -q """
+set use_hive_partitioning = 1;
+
+SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identitier=*/email.csv') LIMIT 2;
+SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identitier=*/email.csv') LIMIT 2;
+"""
+
+$CLICKHOUSE_LOCAL -n -q """
+set use_hive_partitioning = 1;
+
+SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet') LIMIT 10;
+""" 2>&1 | grep -c "INCORRECT_DATA"
+
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
 
diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b6a78cf8cc7cd3ece15e13c9b2f222c8f09b81e
GIT binary patch
literal 1308
zcmWG=3^EjD5Z%Hr`iosh^b{kI%_hpmz#!kv!2kyTLxU6Z9~tnZHa*@opEc(Au?K1g
zOD4aYo#~scS*oJ`_R8<h@2kbEU3>gD$~^!1^Jl8v?6d#uZT@&1Z&h*OEsK+iS@vM(
z^NvM<l1Z}{$Di<VTzp?b`Qdqyxk?)j2Oi#(KY`Qa&cmloMp}=KHAdw19tv6bEUr+Y
z?uy<CC(hGcd;%x0E@0JXTo9$PEFiUH&BD07zt?Rpd*3O_79mk$5LNFn%jdO6D0fhH
z(b26BmN#C_>D|`UJH2qG^xTWJ-dT6$7G6DVTky7Woy5#*nvWV<FEJY4bF-c(wnkj7
zyZBeZ?RJNX$v&t5H5nQGaklz=dX;NI*B-0+pPMhgVB56NPvZ8`B}>EJpR{CJ{Fy0-
zE8ux@_5^8x!<w<v%fCL;c&PJs?+?DM8BZH>?dEIRau&2MyW=j!5h*xtj<|H$T%nI_
zrsjz?W}YW@dt8{DRBI|`*(jU(m2ZmM@u#NQ!s{)z%{yLgtZF$)cAddC?xOT5D^_mz
z-x7J9sr1v$v$K{(^`5h;Sz-1gc2*AGUh7}8F0R?}-B&E(I<xrA!nIL)-H#sBm2b<v
zyjvzg$!p$w!}-F@uPeSV#+c22IV)Y8y+ql+f3=-R_;f${)b5vuC%*hU>rH;G`GUhY
z?q@1K*wQW0otd;iYI&}N?~AIE{%tkCroWN7t$#4bGw~KP0|PJ-eBc+|z=1tM#0JF{
zU3TEfTh6OHr)jl`=8?k_CV5&tiR=x1?{{sI`|Af*?oUEIqS_tiuleY8e||}EY3bMB
zzp9qaKhIf|e>9xYs^&t{(WWC|y8X+=Uc{}=?T>Xh_5JxVk(1Vsywf&)T&i$tu2}yJ
zsTDW>>9!Q_yZT7oEaCof4t43<N8&XSbmN1%Zq~hB6nbq-g9n>QdkFv1JFG`q9?h6g
zxTpBgk6%&qwlli6{)!hkc#l_C=)}P;-Ys+NvjP>bYG~cCGCw}YQ1x-0z@w1)u@}^n
zTV#|>Z7-{GtbTT=rr=<<tA1bk1fe$<H&s4s+_Y&%zj~{|nXL}@f9ITO++Dxs?4FP*
z-VAJ?csr+9-~P<yF5>)~?``+iT<fQ2-$`Z2U-&$y`AaJAZH7ytl`C~W<;4FMTI*@`
z{^2?o6Tjwc5u2Hxk6ygVyyaTp(Pf=a+T?#Z>xh4l+3|MS-tdVRHm+9w`h0!z=3knV
zrSnX_{WmK}KJ?@4(a#30zmF(AmC{<k`F~;CHFxnWo|XEEYfXdH7SHZ~G<&wOu+{!&
z6_1zHsTFsYHvbfgwKOc8(ZxRDR{uYZSc&}Fybq!4rYJp_a60mZ;`vj*+KY;QUpRK_
z`mUxWSDuQ#Dtz{HpYN-Rl2ZF6{qri8u4K5Hma$q>eNN7s8Lx}H>x1pMHFk2oys;%$
zvXN_R)m$dd8M|y^7q?Bh-x;&%icdYm3!CL}KR{`PNz%rYL4r4>G&wsZDZV&4BQ-Zs
zl!ZZ*N0mu}Jvl$8G&j!xn4o|vkwidc4g-VODMm>dNgXu?8BrcdQ3gqbdKRFR7=zd%
z4mA!N3D&gCqT&(>R>!2I%v3Q34HQ1GkiyV!C<@hogF|f<&;XY3{QMLNR)w6z;u4^K
eWG+xU(4JF_Y8(t2Y%V}QxHvIf1_}lM%S8a*|2_@?

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv b/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv
new file mode 100644
index 00000000000..936d995cc64
--- /dev/null
+++ b/tests/queries/0_stateless/data_hive/partitioning/identifier=2070/email.csv
@@ -0,0 +1,5 @@
+_login_email,_identifier,_first_name,_last_name
+laura@example.com,2070,Laura,Grey
+craig@example.com,4081,Craig,Johnson
+mary@example.com,9346,Mary,Jenkins
+jamie@example.com,5079,Jamie,Smith

From 899c5a64e078820caf2e68cbaf892d5d39e0af06 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 15 Jul 2024 17:14:11 +0000
Subject: [PATCH 108/265] some more fixes (docs + storageObjectStorage)

---
 docs/en/sql-reference/table-functions/azureBlobStorage.md | 2 +-
 docs/en/sql-reference/table-functions/file.md             | 2 +-
 docs/en/sql-reference/table-functions/hdfs.md             | 2 +-
 docs/en/sql-reference/table-functions/s3.md               | 2 +-
 docs/en/sql-reference/table-functions/url.md              | 2 +-
 src/Storages/ObjectStorage/StorageObjectStorage.cpp       | 7 ++++---
 6 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md
index 104ac4e26df..6936c807f96 100644
--- a/docs/en/sql-reference/table-functions/azureBlobStorage.md
+++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md
@@ -88,5 +88,5 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet', format='Parquet', structure='Date DateTime64, Country String, Code UInt64') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
+SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
 ```
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 0669609a22a..7908a3cb934 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -216,7 +216,7 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
+SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
 ```
 
 ## Settings {#settings}
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 6963d4e4b79..73fdc263d68 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -109,7 +109,7 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
+SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
 ```
 
 ``` reference
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index f3ee83afef4..1bd9f38517e 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -284,7 +284,7 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
+SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
 ```
 
 ## Storage Settings {#storage-settings}
diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md
index 596355e2577..b4027594e7c 100644
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@@ -65,7 +65,7 @@ Use virtual column, created with Hive-style partitioning
 
 ``` sql
 SET use_hive_partitioning = 1;
-SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and code = 42;
+SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
 ```
 
 ## Storage Settings {#storage-settings}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 35cd1492642..d2cc73f14d7 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -85,7 +85,7 @@ StorageObjectStorage::StorageObjectStorage(
     metadata.setConstraints(constraints_);
     metadata.setComment(comment);
 
-    if (sample_path.empty())
+    if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning)
         sample_path = getPathSample(metadata, context);
 
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings));
@@ -412,8 +412,9 @@ std::string StorageObjectStorage::resolveFormatFromData(
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    auto format_and_schema = detectFormatAndReadSchema(format_settings, *iterator, context).second;
     sample_path = iterator->getLastFilePath();
-    return detectFormatAndReadSchema(format_settings, *iterator, context).second;
+    return format_and_schema;
 }
 
 std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAndFormatFromData(
@@ -425,8 +426,8 @@ std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAn
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
-    sample_path = iterator->getLastFilePath();
     auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context);
+    sample_path = iterator->getLastFilePath();
     configuration->format = format;
     return std::pair(columns, format);
 }

From 7d39535c989e16d818370cbbd9cbea891b21d07a Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 15 Jul 2024 17:20:46 +0000
Subject: [PATCH 109/265] storageObjectStorage small fix

---
 src/Storages/ObjectStorage/StorageObjectStorage.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 8291327992c..ee1169d2c5c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -400,8 +400,9 @@ ColumnsDescription StorageObjectStorage::resolveSchemaFromData(
 {
     ObjectInfos read_keys;
     auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    auto schema = readSchemaFromFormat(configuration->format, format_settings, *iterator, context);
     sample_path = iterator->getLastFilePath();
-    return readSchemaFromFormat(configuration->format, format_settings, *iterator, context);
+    return schema;
 }
 
 std::string StorageObjectStorage::resolveFormatFromData(

From 1bd9a1623f246dbf2a3098a4a022b6764aa3094d Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 15 Jul 2024 19:23:11 +0000
Subject: [PATCH 110/265] add throw option in low level

---
 src/Interpreters/MutationsInterpreter.cpp        | 15 +++++++++++++++
 src/Interpreters/MutationsInterpreter.h          |  1 +
 src/Storages/IStorage.h                          |  3 +++
 src/Storages/MergeTree/MergeTreeData.cpp         | 15 +++++++++++++++
 src/Storages/MergeTree/MergeTreeData.h           |  2 ++
 .../03161_lightweight_delete_projection.sql      | 16 ++++++++++------
 6 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 480c6736bc5..b61f7f78885 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -344,6 +344,11 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const
     return part && part->hasProjection(name);
 }
 
+bool MutationsInterpreter::Source::hasProjection() const
+{
+    return part && part->hasProjection();
+}
+
 bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const
 {
     return part && part->hasBrokenProjection(name);
@@ -491,6 +496,16 @@ static void validateUpdateColumns(
             {
                 if (!source.supportsLightweightDelete())
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
+
+                if (const MergeTreeData * merge_tree_data = source.getMergeTreeData(); merge_tree_data != nullptr)
+                {
+                    if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW
+                        && merge_tree_data->hasProjection())
+                        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                            "DELETE query is not supported for table {} as it has projections. "
+                            "User should drop all the projections manually before running the query",
+                            source.getStorage()->getStorageID().getFullTableName());
+                }
             }
             else if (virtual_columns.tryGet(column_name))
             {
diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h
index 6aaa233cda3..b792a33f904 100644
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@@ -126,6 +126,7 @@ public:
         bool materializeTTLRecalculateOnly() const;
         bool hasSecondaryIndex(const String & name) const;
         bool hasProjection(const String & name) const;
+        bool hasProjection() const;
         bool hasBrokenProjection(const String & name) const;
         bool isCompactPart() const;
 
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 991c8ff64af..d302fcb26a7 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -259,6 +259,9 @@ public:
     /// Return true if there is at least one part containing lightweight deleted mask.
     virtual bool hasLightweightDeletedMask() const { return false; }
 
+    /// Return true if storage has any projection.
+    virtual bool hasProjection() const { return false; }
+
     /// Return true if storage can execute lightweight delete mutations.
     virtual bool supportsLightweightDelete() const { return false; }
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 38ca0aed9da..78a551591a6 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6160,6 +6160,21 @@ bool MergeTreeData::supportsLightweightDelete() const
     return true;
 }
 
+bool MergeTreeData::hasProjection() const
+{
+    auto lock = lockParts();
+    for (const auto & part : data_parts_by_info)
+    {
+        if (part->getState() == MergeTreeDataPartState::Outdated
+            || part->getState() == MergeTreeDataPartState::Deleting)
+            continue;
+
+        if (part->hasProjection())
+            return true;
+    }
+    return false;
+}
+
 bool MergeTreeData::areAsynchronousInsertsEnabled() const
 {
     return getSettings()->async_insert;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index d880928098b..7076b680521 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -439,6 +439,8 @@ public:
 
     bool supportsLightweightDelete() const override;
 
+    bool hasProjection() const override;
+
     bool areAsynchronousInsertsEnabled() const override;
 
     bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override;
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index bfeb0127fa4..16a7468234b 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -13,11 +13,13 @@ SETTINGS min_bytes_for_wide_part = 10485760;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 
-DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED }
+ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
-DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw';  -- { serverError NOT_IMPLEMENTED }
+DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
 
-DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop';
+ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
+
+DELETE FROM users WHERE uid = 1231;
 
 SYSTEM FLUSH LOGS;
 
@@ -45,11 +47,13 @@ SETTINGS min_bytes_for_wide_part = 0;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 
-DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED }
+ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
-DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'throw';  -- { serverError NOT_IMPLEMENTED }
+DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
 
-DELETE FROM users WHERE uid = 1231 SETTINGS lightweight_mutation_projection_mode = 'drop';
+ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
+
+DELETE FROM users WHERE uid = 1231;
 
 SYSTEM FLUSH LOGS;
 

From 12794601921e2d465b27e665b072267b658b8e4c Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 15 Jul 2024 22:09:30 +0000
Subject: [PATCH 111/265] fix after review

---
 .../ObjectStorage/StorageObjectStorage.cpp    |  2 +-
 .../ObjectStorage/StorageObjectStorage.h      |  2 +-
 .../StorageObjectStorageCluster.cpp           | 29 ++++++++++++++++++-
 .../StorageObjectStorageCluster.h             |  2 ++
 src/Storages/StorageFile.cpp                  |  5 +---
 src/Storages/VirtualColumnUtils.cpp           |  6 ++--
 src/Storages/VirtualColumnUtils.h             |  2 +-
 .../03203_hive_style_partitioning.reference   | 17 +++++++++++
 .../03203_hive_style_partitioning.sh          | 16 ++++++++++
 9 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index ee1169d2c5c..ca0ced8dcd3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -34,7 +34,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-std::string StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context)
+String StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context)
 {
     auto query_settings = configuration->getQuerySettings(context);
     /// We don't want to throw an exception if there are no files with specified path.
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 6ee4ce0c16f..cae0db48f31 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -122,7 +122,7 @@ public:
 protected:
     virtual void updateConfiguration(ContextPtr local_context);
 
-    std::string getPathSample(StorageInMemoryMetadata metadata, ContextPtr context);
+    String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context);
 
     virtual ReadFromFormatInfo prepareReadingFromFormat(
         const Strings & requested_columns,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index a88532e1ea9..7f6b3338f9b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -1,6 +1,8 @@
 #include "Storages/ObjectStorage/StorageObjectStorageCluster.h"
 
 #include <Common/Exception.h>
+#include <Core/Settings.h>
+#include <Formats/FormatFactory.h>
 #include <Parsers/queryToString.h>
 #include <Processors/Sources/RemoteSource.h>
 #include <QueryPipeline/RemoteQueryExecutor.h>
@@ -19,6 +21,28 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+String StorageObjectStorageCluster::getPathSample(StorageInMemoryMetadata metadata, ContextPtr context)
+{
+    auto query_settings = configuration->getQuerySettings(context);
+    /// We don't want to throw an exception if there are no files with specified path.
+    query_settings.throw_on_zero_files_match = false;
+    auto file_iterator = StorageObjectStorageSource::createFileIterator(
+        configuration,
+        query_settings,
+        object_storage,
+        false, // distributed_processing
+        context,
+        {}, // predicate
+        metadata.getColumns().getAll(), // virtual_columns
+        nullptr, // read_keys
+        {} // file_progress_callback
+    );
+
+    if (auto file = file_iterator->next(0))
+        return file->getPath();
+    return "";
+}
+
 StorageObjectStorageCluster::StorageObjectStorageCluster(
     const String & cluster_name_,
     ConfigurationPtr configuration_,
@@ -41,7 +65,10 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     metadata.setColumns(columns);
     metadata.setConstraints(constraints_);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_));
+    if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
+        sample_path = getPathSample(metadata, context_);
+
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path, getFormatSettings(context_)));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index 108aa109616..0088ff28fc2 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -27,6 +27,8 @@ public:
     RemoteQueryExecutor::Extension getTaskIteratorExtension(
         const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
+    String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context);
+
 private:
     void updateQueryToSendIfNeeded(
         ASTPtr & query,
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index e2b010bf48f..b43fce370a1 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1109,10 +1109,7 @@ void StorageFile::setStorageMetadata(CommonArguments args)
     storage_metadata.setComment(args.comment);
     setInMemoryMetadata(storage_metadata);
 
-    std::string path_for_virtuals;
-    if (!paths.empty())
-        path_for_virtuals = paths[0];
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), path_for_virtuals, format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings));
 }
 
 
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index fb5a345f424..352fd0d7a76 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -119,18 +119,18 @@ NameSet getVirtualNamesForFileLikeStorage()
     return {"_path", "_file", "_size", "_time"};
 }
 
-std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const std::string& path, const ColumnsDescription & storage_columns)
+std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
 {
     std::string pattern = "/([^/]+)=([^/]+)";
     re2::StringPiece input_piece(path);
 
     std::unordered_map<std::string, std::string> key_values;
     std::string key, value;
-    std::set<String> used_keys;
+    std::unordered_set<String> used_keys;
     while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
     {
         if (used_keys.contains(key))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Link to file with enabled hive-style partitioning contains duplicated key {}, only unique keys required", key);
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key);
         used_keys.insert(key);
 
         auto col_name = "_" + key;
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 29ec32ab375..fef32b149ec 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -51,7 +51,7 @@ NameSet getVirtualNamesForFileLikeStorage();
 VirtualColumnsDescription getVirtualsForFileLikeStorage(
     const ColumnsDescription & storage_columns,
     const ContextPtr & context,
-    std::string sample_path = "",
+    const std::string & sample_path = "",
     std::optional<FormatSettings> format_settings_ = std::nullopt);
 
 ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index fc6da3a55c1..430a3582f65 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -102,3 +102,20 @@ Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 Elizabeth	Gordon	Elizabeth
 OK
+TESTING THE S3CLUSTER PARTITIONING
+first	 last	Elizabeth
+Jorge	Frank	Elizabeth
+Hunter	Moreno	Elizabeth
+Esther	Guzman	Elizabeth
+Dennis	Stephens	Elizabeth
+Nettie	Franklin	Elizabeth
+Stanley	Gibson	Elizabeth
+Eugenia	Greer	Elizabeth
+Jeffery	Delgado	Elizabeth
+Clara	Cross	Elizabeth
+Elizabeth	Gordon	Elizabeth
+Eva	Schmidt	Elizabeth	Schmidt
+Samuel	Schmidt	Elizabeth	Schmidt
+Eva	Schmidt	Elizabeth
+Samuel	Schmidt	Elizabeth
+Elizabeth	Gordon	Elizabeth	Gordon
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index a5f3e36763d..d2b1f31c85f 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -118,3 +118,19 @@ set use_hive_partitioning = 0;
 
 SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -F -q "UNKNOWN_IDENTIFIER" && echo "OK" || echo "FAIL";
+
+$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'"
+
+$CLICKHOUSE_CLIENT -n -q """
+set use_hive_partitioning = 1;
+
+SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+
+SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+
+SELECT *, _column0, _column1 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+
+SELECT *, _column0, _column1 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+"""

From 4305a38e7394d7bbf3c3455c3b52b1dc9b86f3c9 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 15 Jul 2024 22:11:08 +0000
Subject: [PATCH 112/265] add include

---
 src/Storages/VirtualColumnUtils.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 352fd0d7a76..938972cffca 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -41,6 +41,7 @@
 #include <Formats/SchemaInferenceUtils.h>
 #include <Formats/EscapingRuleUtils.h>
 #include <Formats/FormatFactory.h>
+#include <Core/Settings.h>
 #include "Functions/FunctionsLogical.h"
 #include "Functions/IFunction.h"
 #include "Functions/IFunctionAdaptors.h"

From dcf14e68afbc741fdbf830fc7d01ba84817c0760 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 15 Jul 2024 22:45:21 +0000
Subject: [PATCH 113/265] small fix

---
 src/Storages/VirtualColumnUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 938972cffca..e84979833ab 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -142,7 +142,7 @@ std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(
     return key_values;
 }
 
-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, std::string path, std::optional<FormatSettings> format_settings_)
+VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
 {
     VirtualColumnsDescription desc;
 

From 177d006307515e62bbe60082c293c03de4d4cc7c Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Mon, 15 Jul 2024 23:02:24 +0000
Subject: [PATCH 114/265] add errorcodes

---
 src/Storages/VirtualColumnUtils.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index e84979833ab..24d0b7160b2 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -56,6 +56,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+}
+
 namespace VirtualColumnUtils
 {
 

From 82d283357755e3b667074596ec254ca54598ee5d Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 15 Jul 2024 23:29:25 +0000
Subject: [PATCH 115/265] clang tidy fix

---
 src/Interpreters/Context.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index fc1e87e7b7e..2602afd8b78 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -51,7 +51,6 @@
 #include <Interpreters/SessionTracker.h>
 #include <Core/ServerSettings.h>
 #include <Interpreters/PreparedSets.h>
-#include <Core/Settings.h>
 #include <Core/SettingsQuirks.h>
 #include <Access/AccessControl.h>
 #include <Access/ContextAccess.h>

From 9690a5a334b4991eaa9dfa58ce804c91bbff4385 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 16 Jul 2024 13:37:59 +0000
Subject: [PATCH 116/265] fix

---
 tests/ci/libfuzzer_test_check.py          | 2 +-
 {utils/libfuzzer => tests/fuzz}/runner.py | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename {utils/libfuzzer => tests/fuzz}/runner.py (100%)

diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index d9e33229932..8f19dd7d023 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -75,7 +75,7 @@ def get_run_command(
         f"--volume={result_path}:/test_output "
         "--security-opt seccomp=unconfined "  # required to issue io_uring sys-calls
         f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} "
-        "python3 ./utils/runner.py"
+        "python3 /usr/share/clickhouse-test/fuzz/runner.py"
     )
 
 
diff --git a/utils/libfuzzer/runner.py b/tests/fuzz/runner.py
similarity index 100%
rename from utils/libfuzzer/runner.py
rename to tests/fuzz/runner.py

From c9e02eee7a1d7b2e7aa85bfc87d6a54a3bcaedfa Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 16 Jul 2024 13:48:52 +0000
Subject: [PATCH 117/265] fix after review

---
 docs/en/sql-reference/table-functions/hdfs.md |  4 ----
 .../StorageObjectStorageSource.cpp            |  2 +-
 .../ObjectStorageQueueSource.cpp              |  2 +-
 src/Storages/StorageFile.cpp                  |  2 +-
 src/Storages/StorageFileCluster.cpp           |  2 +-
 src/Storages/StorageURL.cpp                   | 19 +------------------
 src/Storages/StorageURL.h                     | 18 ++++++++++++++++++
 src/Storages/StorageURLCluster.cpp            |  3 +--
 src/Storages/VirtualColumnUtils.cpp           |  2 +-
 src/Storages/VirtualColumnUtils.h             |  2 +-
 10 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 73fdc263d68..60c2fd40e6a 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -112,10 +112,6 @@ SET use_hive_partitioning = 1;
 SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
 ```
 
-``` reference
-specified_data
-```
-
 ## Storage Settings {#storage-settings}
 
 - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 93f8eaacbc0..d29e33444b0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -204,7 +204,7 @@ Chunk StorageObjectStorageSource::generate()
                   .size = object_info->isArchive() ? object_info->fileSizeInArchive() : object_info->metadata->size_bytes,
                   .filename = &filename,
                   .last_modified = object_info->metadata->last_modified,
-                }, read_from_format_info.columns_description, getContext());
+                }, getContext(), read_from_format_info.columns_description);
 
             const auto & partition_columns = configuration->getPartitionColumns();
             if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
index 4d921003e04..2634a7b2f1e 100644
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
@@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl()
                     {
                         .path = path,
                         .size = reader.getObjectInfo()->metadata->size_bytes
-                    });
+                    }, getContext(), read_from_format_info.columns_description);
 
                 return chunk;
             }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index b43fce370a1..5cbc2b38887 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1459,7 +1459,7 @@ Chunk StorageFileSource::generate()
                     .size = current_file_size,
                     .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
                     .last_modified = current_file_last_modified
-                }, columns_description, getContext());
+                }, getContext(), columns_description);
 
             return chunk;
         }
diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp
index f7684182e79..82ae0b761ae 100644
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@@ -61,7 +61,7 @@ StorageFileCluster::StorageFileCluster(
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0]));
 }
 
 void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 87911230819..6e7788cfc1d 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -39,7 +39,6 @@
 
 #include <Formats/SchemaInferenceUtils.h>
 #include <Core/ServerSettings.h>
-#include <Core/Settings.h>
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>
 
@@ -92,27 +91,11 @@ static const std::vector<std::shared_ptr<re2::RE2>> optional_regex_keys = {
     std::make_shared<re2::RE2>(R"(headers.header\[[0-9]*\].value)"),
 };
 
-static bool urlWithGlobs(const String & uri)
-{
-    return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos;
-}
-
 static ConnectionTimeouts getHTTPTimeouts(ContextPtr context)
 {
     return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout);
 }
 
-String getSampleURI(String uri, ContextPtr context)
-{
-    if (urlWithGlobs(uri))
-    {
-        auto uris = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements);
-        if (!uris.empty())
-            return uris[0];
-    }
-    return uri;
-}
-
 IStorageURLBase::IStorageURLBase(
     const String & uri_,
     const ContextPtr & context_,
@@ -433,7 +416,7 @@ Chunk StorageURLSource::generate()
                 {
                     .path = curr_uri.getPath(),
                     .size = current_file_size,
-                }, columns_description, getContext());
+                }, getContext(), columns_description);
             return chunk;
         }
 
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index fa7cc6eeeef..a874ca9147c 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -12,6 +12,8 @@
 #include <Storages/StorageFactory.h>
 #include <Storages/prepareReadingFromFormat.h>
 #include <Poco/URI.h>
+#include <Common/parseRemoteDescription.h>
+#include <Core/Settings.h>
 
 
 namespace DB
@@ -267,6 +269,22 @@ private:
     bool cancelled = false;
 };
 
+static bool urlWithGlobs(const String & uri)
+{
+    return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos;
+}
+
+inline String getSampleURI(String uri, ContextPtr context)
+{
+    if (urlWithGlobs(uri))
+    {
+        auto uris = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements);
+        if (!uris.empty())
+            return uris[0];
+    }
+    return uri;
+}
+
 class StorageURL : public IStorageURLBase
 {
 public:
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index 664f170c17e..e80f4ebcd06 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -3,7 +3,6 @@
 #include <Storages/StorageURLCluster.h>
 
 #include <Core/QueryProcessingStage.h>
-#include <Core/Settings.h>
 #include <DataTypes/DataTypeString.h>
 #include <Interpreters/getHeaderForProcessingStage.h>
 #include <Interpreters/InterpreterSelectQuery.h>
@@ -76,7 +75,7 @@ StorageURLCluster::StorageURLCluster(
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context), getFormatSettings(context)));
 }
 
 void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 24d0b7160b2..31cee485dde 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -239,7 +239,7 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ColumnsDescription columns, ContextPtr context)
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns)
 {
     std::unordered_map<std::string, std::string> hive_map;
     if (context->getSettingsRef().use_hive_partitioning)
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index fef32b149ec..1bd74189559 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -83,7 +83,7 @@ struct VirtualsForFileLikeStorage
 
 void addRequestedFileLikeStorageVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ColumnsDescription columns = {}, ContextPtr context = {});
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns);
 }
 
 }

From e7e62b358360083eda6d2ec983fb5a1b733d1eba Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 16 Jul 2024 14:17:51 +0000
Subject: [PATCH 118/265] fix style

---
 tests/fuzz/runner.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/fuzz/runner.py b/tests/fuzz/runner.py
index bbe648dbbc2..0862ea29e42 100644
--- a/tests/fuzz/runner.py
+++ b/tests/fuzz/runner.py
@@ -11,7 +11,7 @@ FUZZER_ARGS = os.getenv("FUZZER_ARGS", "")
 
 
 def run_fuzzer(fuzzer: str):
-    logging.info(f"Running fuzzer {fuzzer}...")
+    logging.info("Running fuzzer %s...", fuzzer)
 
     corpus_dir = f"{fuzzer}.in"
     with Path(corpus_dir) as path:
@@ -29,28 +29,28 @@ def run_fuzzer(fuzzer: str):
 
             if parser.has_section("asan"):
                 os.environ["ASAN_OPTIONS"] = (
-                    f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}"
+                    f"{os.environ['ASAN_OPTIONS']}:{':'.join(f"{key}={value}" for key, value in parser['asan'].items())}"
                 )
 
             if parser.has_section("msan"):
                 os.environ["MSAN_OPTIONS"] = (
-                    f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}"
+                    f"{os.environ['MSAN_OPTIONS']}:{':'.join(f"{key}={value}" for key, value in parser['msan'].items())}"
                 )
 
             if parser.has_section("ubsan"):
                 os.environ["UBSAN_OPTIONS"] = (
-                    f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}"
+                    f"{os.environ['UBSAN_OPTIONS']}:{':'.join(f"{key}={value}" for key, value in parser['ubsan'].items())}"
                 )
 
             if parser.has_section("libfuzzer"):
                 custom_libfuzzer_options = " ".join(
-                    "-%s=%s" % (key, value)
+                    f"-{key}={value}"
                     for key, value in parser["libfuzzer"].items()
                 )
 
             if parser.has_section("fuzzer_arguments"):
                 fuzzer_arguments = " ".join(
-                    ("%s" % key) if value == "" else ("%s=%s" % (key, value))
+                    (f"{key}") if value == "" else (f"{key}={value}")
                     for key, value in parser["fuzzer_arguments"].items()
                 )
 
@@ -65,7 +65,7 @@ def run_fuzzer(fuzzer: str):
 
     cmd_line += " < /dev/null"
 
-    logging.info(f"...will execute: {cmd_line}")
+    logging.info("...will execute: %s", cmd_line)
     subprocess.check_call(cmd_line, shell=True)
 
 
From c974430e68bff97986379410e9a94c1ea641d1bd Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 16 Jul 2024 15:01:43 +0000
Subject: [PATCH 119/265] fix

---
 tests/fuzz/runner.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/fuzz/runner.py b/tests/fuzz/runner.py
index 0862ea29e42..047a2245bfa 100644
--- a/tests/fuzz/runner.py
+++ b/tests/fuzz/runner.py
@@ -29,17 +29,17 @@ def run_fuzzer(fuzzer: str):
 
             if parser.has_section("asan"):
                 os.environ["ASAN_OPTIONS"] = (
-                    f"{os.environ['ASAN_OPTIONS']}:{':'.join(f"{key}={value}" for key, value in parser['asan'].items())}"
+                    f"{os.environ['ASAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['asan'].items())}"
                 )
 
             if parser.has_section("msan"):
                 os.environ["MSAN_OPTIONS"] = (
-                    f"{os.environ['MSAN_OPTIONS']}:{':'.join(f"{key}={value}" for key, value in parser['msan'].items())}"
+                    f"{os.environ['MSAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['msan'].items())}"
                 )
 
             if parser.has_section("ubsan"):
                 os.environ["UBSAN_OPTIONS"] = (
-                    f"{os.environ['UBSAN_OPTIONS']}:{':'.join(f"{key}={value}" for key, value in parser['ubsan'].items())}"
+                    f"{os.environ['UBSAN_OPTIONS']}:{':'.join(f'{key}={value}' for key, value in parser['ubsan'].items())}"
                 )
 
             if parser.has_section("libfuzzer"):

From 8660aec5d79f7a16ab3bcac2aaab291e4bcf0c2d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 16 Jul 2024 15:16:11 +0000
Subject: [PATCH 120/265] Automatic style fix

---
 tests/fuzz/runner.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/fuzz/runner.py b/tests/fuzz/runner.py
index 047a2245bfa..44259228f60 100644
--- a/tests/fuzz/runner.py
+++ b/tests/fuzz/runner.py
@@ -44,8 +44,7 @@ def run_fuzzer(fuzzer: str):
 
             if parser.has_section("libfuzzer"):
                 custom_libfuzzer_options = " ".join(
-                    f"-{key}={value}"
-                    for key, value in parser["libfuzzer"].items()
+                    f"-{key}={value}" for key, value in parser["libfuzzer"].items()
                 )
 
             if parser.has_section("fuzzer_arguments"):

From b6672b9952caeff523b2836a710dd3be3d6ed4e8 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Tue, 16 Jul 2024 15:20:01 +0000
Subject: [PATCH 121/265] add rebuild for compact part

---
 src/Core/SettingsEnums.cpp                    |  3 +-
 src/Core/SettingsEnums.h                      |  1 +
 src/Interpreters/MutationsInterpreter.cpp     |  5 --
 src/Interpreters/MutationsInterpreter.h       |  1 -
 .../MergeTree/MergeMutateSelectedEntry.h      |  1 -
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         | 24 ++++++++--
 src/Storages/StorageMergeTree.cpp             |  1 -
 ...61_lightweight_delete_projection.reference |  5 ++
 .../03161_lightweight_delete_projection.sql   | 46 +++++++++++++++++--
 10 files changed, 69 insertions(+), 20 deletions(-)

diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 82e7d6db410..6c000d83254 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -175,7 +175,8 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU
 
 IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS,
     {{"throw", LightweightMutationProjectionMode::THROW},
-     {"drop", LightweightMutationProjectionMode::DROP}})
+     {"drop", LightweightMutationProjectionMode::DROP},
+     {"rebuild", LightweightMutationProjectionMode::REBUILD}})
 
 IMPLEMENT_SETTING_AUTO_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS)
 
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index f6d9593ca56..0281176417a 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -311,6 +311,7 @@ enum class LightweightMutationProjectionMode : uint8_t
 {
     THROW,
     DROP,
+    REBUILD,
 };
 
 DECLARE_SETTING_ENUM(LightweightMutationProjectionMode)
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index b61f7f78885..db4ea9c0754 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -344,11 +344,6 @@ bool MutationsInterpreter::Source::hasProjection(const String & name) const
     return part && part->hasProjection(name);
 }
 
-bool MutationsInterpreter::Source::hasProjection() const
-{
-    return part && part->hasProjection();
-}
-
 bool MutationsInterpreter::Source::hasBrokenProjection(const String & name) const
 {
     return part && part->hasBrokenProjection(name);
diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h
index b792a33f904..6aaa233cda3 100644
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@@ -126,7 +126,6 @@ public:
         bool materializeTTLRecalculateOnly() const;
         bool hasSecondaryIndex(const String & name) const;
         bool hasProjection(const String & name) const;
-        bool hasProjection() const;
         bool hasBrokenProjection(const String & name) const;
         bool isCompactPart() const;
 
diff --git a/src/Storages/MergeTree/MergeMutateSelectedEntry.h b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
index e7efe00741c..c420cbca12b 100644
--- a/src/Storages/MergeTree/MergeMutateSelectedEntry.h
+++ b/src/Storages/MergeTree/MergeMutateSelectedEntry.h
@@ -3,7 +3,6 @@
 #include <Storages/MergeTree/FutureMergedMutatedPart.h>
 #include <Storages/MutationCommands.h>
 
-
 namespace DB
 {
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index c84ca9956fc..74e7a7f43bc 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -215,7 +215,7 @@ struct Settings;
     M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \
     /** Projection settings. */ \
     M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \
-    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts.", 0) \
+    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts, or rebuild the projections.", 0) \
 
 #define MAKE_OBSOLETE_MERGE_TREE_SETTING(M, TYPE, NAME, DEFAULT) \
     M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE)
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 8790ce6628e..092a6d0d6ed 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -662,7 +662,7 @@ static NameSet collectFilesToSkip(
     const std::set<ProjectionDescriptionRawPtr> & projections_to_recalc,
     const std::set<ColumnStatisticsPtr> & stats_to_recalc,
     const StorageMetadataPtr & metadata_snapshot,
-    bool lightweight_delete_mode)
+    bool skip_all_projections)
 {
     NameSet files_to_skip = source_part->getFileNamesWithoutChecksums();
 
@@ -686,7 +686,7 @@ static NameSet collectFilesToSkip(
         }
     }
 
-    if (lightweight_delete_mode)
+    if (skip_all_projections)
     {
         for (const auto & projection : metadata_snapshot->getProjections())
             files_to_skip.insert(projection.getDirectoryName());
@@ -2211,6 +2211,8 @@ bool MutateTask::prepare()
 
     ctx->stage_progress = std::make_unique<MergeStageProgress>(1.0);
 
+    bool lightweight_delete_mode = false;
+
     if (!ctx->for_interpreter.empty())
     {
         /// Always disable filtering in mutations: we want to read and write all rows because for updates we rewrite only some of the
@@ -2228,6 +2230,16 @@ bool MutateTask::prepare()
         ctx->mutating_pipeline_builder = ctx->interpreter->execute();
         ctx->updated_header = ctx->interpreter->getUpdatedHeader();
         ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress);
+
+        lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
+        /// If under the condition of lightweight delete mode with rebuild option, add projections again here as we can only know
+        /// the condition as early as from here.
+        if (lightweight_delete_mode 
+            && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD)
+        {
+            for (const auto & projection : ctx->metadata_snapshot->getProjections())
+                ctx->materialized_projections.insert(projection.name);
+        }
     }
 
     auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0);
@@ -2269,7 +2281,6 @@ bool MutateTask::prepare()
     if (ctx->mutating_pipeline_builder.initialized())
         ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies());
 
-    bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
     if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && lightweight_delete_mode)
     {
         /// This mutation contains lightweight delete and we need to count the deleted rows,
@@ -2307,7 +2318,10 @@ bool MutateTask::prepare()
             ctx->context,
             ctx->materialized_indices);
 
-        if (!lightweight_delete_mode)
+        bool lightweight_delete_projection_drop = lightweight_delete_mode
+            && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP;
+        /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty.
+        if (!lightweight_delete_projection_drop)
         {
             ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(
                 ctx->source_part,
@@ -2326,7 +2340,7 @@ bool MutateTask::prepare()
             ctx->projections_to_recalc,
             ctx->stats_to_recalc,
             ctx->metadata_snapshot,
-            lightweight_delete_mode);
+            lightweight_delete_projection_drop);
 
         ctx->files_to_rename = MutationHelpers::collectFilesForRenames(
             ctx->source_part,
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index d8c61da2a98..40b3a12297b 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -1285,7 +1285,6 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
         auto commands = std::make_shared<MutationCommands>();
         size_t current_ast_elements = 0;
         auto last_mutation_to_apply = mutations_end_it;
-
         for (auto it = mutations_begin_it; it != mutations_end_it; ++it)
         {
             /// Do not squash mutations from different transactions to be able to commit/rollback them independently.
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index e69de29bb2d..bc7e1faecff 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -0,0 +1,5 @@
+8888	Alice	50
+p1
+p2
+p1
+p2
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 16a7468234b..b63341f5371 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -13,25 +13,43 @@ SETTINGS min_bytes_for_wide_part = 10485760;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 
+-- testing throw default mode
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
 DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
 
+-- testing drop mode
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
 
 DELETE FROM users WHERE uid = 1231;
 
+SELECT * FROM users ORDER BY uid;
+
 SYSTEM FLUSH LOGS;
 
 -- expecting no projection
 SELECT
-    name,
-    `table`
+    name
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
+-- testing rebuild mode
+INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+
+ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
+
+DELETE FROM users WHERE uid = 6666;
+
 SELECT * FROM users ORDER BY uid;
 
+SYSTEM FLUSH LOGS;
+
+-- expecting projection p1, p2 in 2 parts
+SELECT
+    name
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+
 DROP TABLE users;
 
 
@@ -47,23 +65,41 @@ SETTINGS min_bytes_for_wide_part = 0;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 
+-- testing throw default mode
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
 DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
 
+-- testing drop mode
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
 
 DELETE FROM users WHERE uid = 1231;
 
+SELECT * FROM users ORDER BY uid;
+
 SYSTEM FLUSH LOGS;
 
 -- expecting no projection
 SELECT
-    name,
-    `table`
+    name
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
-SELECT * FROM users ORDER BY uid;
+-- -- testing rebuild mode
+-- INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+
+-- ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
+
+-- DELETE FROM users WHERE uid = 6666;
+
+-- SELECT * FROM users ORDER BY uid;
+
+-- SYSTEM FLUSH LOGS;
+
+-- -- expecting projection p1, p2 in 2 parts
+-- SELECT
+--     name
+-- FROM system.projection_parts
+-- WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
 DROP TABLE users;
\ No newline at end of file

From c3507979cfc0359ab38762525ab0306904a387b8 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Tue, 16 Jul 2024 15:41:54 +0000
Subject: [PATCH 122/265] fix

---
 src/Storages/MergeTree/MutateTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 092a6d0d6ed..489c8863a8a 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -2234,7 +2234,7 @@ bool MutateTask::prepare()
         lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
         /// If under the condition of lightweight delete mode with rebuild option, add projections again here as we can only know
         /// the condition as early as from here.
-        if (lightweight_delete_mode 
+        if (lightweight_delete_mode
             && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD)
         {
             for (const auto & projection : ctx->metadata_snapshot->getProjections())

From 3ae4211b3af575cf8d7186a4cc915f9ecb6b4182 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 16 Jul 2024 17:59:57 +0200
Subject: [PATCH 123/265] fix tests

---
 tests/queries/0_stateless/03203_hive_style_partitioning.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index d2b1f31c85f..0f687d532b0 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -39,8 +39,8 @@ SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;
 
-SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identitier=*/email.csv') LIMIT 2;
-SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identitier=*/email.csv') LIMIT 2;
+SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
+SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
 """
 
 $CLICKHOUSE_LOCAL -n -q """

From d91cb40bbdbd18a2bef811002033d0c99fe693d3 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 16 Jul 2024 16:15:24 +0000
Subject: [PATCH 124/265] fix include and remove unused getFormatSettings

---
 src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp | 2 +-
 src/Storages/StorageFile.cpp                               | 1 -
 src/Storages/StorageURLCluster.cpp                         | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 7f6b3338f9b..c214665f7e0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
         sample_path = getPathSample(metadata, context_);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path, getFormatSettings(context_)));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path));
     setInMemoryMetadata(metadata);
 }
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 5cbc2b38887..ed05f57b418 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -52,7 +52,6 @@
 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
 #include <Common/re2.h>
-#include "Formats/FormatSettings.h"
 #include <Formats/SchemaInferenceUtils.h>
 
 #include <Core/Settings.h>
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index e80f4ebcd06..1522a18a083 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -75,7 +75,7 @@ StorageURLCluster::StorageURLCluster(
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context), getFormatSettings(context)));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context)));
 }
 
 void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)

From d11d1a42bc7101993e5c85f1c1c3298e6334dbf9 Mon Sep 17 00:00:00 2001
From: yariks5s <yaroslav.briukhovetskyi@clickhouse.com>
Date: Tue, 16 Jul 2024 16:49:33 +0000
Subject: [PATCH 125/265] fix for storageURL functions

---
 src/Storages/StorageURL.cpp | 16 ++++++++++++++++
 src/Storages/StorageURL.h   | 19 +++----------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 6e7788cfc1d..1d1deebf9f5 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -91,6 +91,22 @@ static const std::vector<std::shared_ptr<re2::RE2>> optional_regex_keys = {
     std::make_shared<re2::RE2>(R"(headers.header\[[0-9]*\].value)"),
 };
 
+bool urlWithGlobs(const String & uri)
+{
+    return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos;
+}
+
+String getSampleURI(String uri, ContextPtr context)
+{
+    if (urlWithGlobs(uri))
+    {
+        auto uris = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements);
+        if (!uris.empty())
+            return uris[0];
+    }
+    return uri;
+}
+
 static ConnectionTimeouts getHTTPTimeouts(ContextPtr context)
 {
     return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), context->getServerSettings().keep_alive_timeout);
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index a874ca9147c..cd48ecb767b 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -143,6 +143,9 @@ private:
     virtual Block getHeaderBlock(const Names & column_names, const StorageSnapshotPtr & storage_snapshot) const = 0;
 };
 
+bool urlWithGlobs(const String & uri);
+
+String getSampleURI(String uri, ContextPtr context);
 
 class StorageURLSource : public SourceWithKeyCondition, WithContext
 {
@@ -269,22 +272,6 @@ private:
     bool cancelled = false;
 };
 
-static bool urlWithGlobs(const String & uri)
-{
-    return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos;
-}
-
-inline String getSampleURI(String uri, ContextPtr context)
-{
-    if (urlWithGlobs(uri))
-    {
-        auto uris = parseRemoteDescription(uri, 0, uri.size(), ',', context->getSettingsRef().glob_expansion_max_elements);
-        if (!uris.empty())
-            return uris[0];
-    }
-    return uri;
-}
-
 class StorageURL : public IStorageURLBase
 {
 public:

From 14dcb97e353fb4739c7f7d37b9c3c11c9ad40923 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 16 Jul 2024 19:09:18 +0200
Subject: [PATCH 126/265] Update src/Storages/StorageURL.h

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Storages/StorageURL.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index cd48ecb767b..1f3d63b4c85 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -12,8 +12,6 @@
 #include <Storages/StorageFactory.h>
 #include <Storages/prepareReadingFromFormat.h>
 #include <Poco/URI.h>
-#include <Common/parseRemoteDescription.h>
-#include <Core/Settings.h>
 
 
 namespace DB

From 771b39fa2179a9a548580c41859bbecf0165000d Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 16 Jul 2024 19:44:10 +0200
Subject: [PATCH 127/265] Update StorageURLCluster.cpp

---
 src/Storages/StorageURLCluster.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index 1522a18a083..7c7a299c64e 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -3,6 +3,7 @@
 #include <Storages/StorageURLCluster.h>
 
 #include <Core/QueryProcessingStage.h>
+#include <Core/Settings.h>
 #include <DataTypes/DataTypeString.h>
 #include <Interpreters/getHeaderForProcessingStage.h>
 #include <Interpreters/InterpreterSelectQuery.h>

From 9d332911fb0f4b25bbdb67a9c10c5f3b42db4ea6 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 16 Jul 2024 20:10:19 +0200
Subject: [PATCH 128/265] Update StorageURL.cpp

---
 src/Storages/StorageURL.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 1d1deebf9f5..4cf191f7e8a 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -39,6 +39,7 @@
 
 #include <Formats/SchemaInferenceUtils.h>
 #include <Core/ServerSettings.h>
+#include <Core/Settings.h>
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>
 

From 949e69c0573354417e21bd83d446e1ea085db04d Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 16 Jul 2024 21:58:48 +0200
Subject: [PATCH 129/265] add documentation for getSubcolumn and
 getTypeSerializationStreams

---
 .../functions/other-functions.md              | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 260457b3be1..40f1b82562d 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -4055,3 +4055,94 @@ Result:
 │                          32 │
 └─────────────────────────────┘
 ```
+
+## getSubcolumn
+
+Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression.
+
+**Syntax**
+
+```sql
+getSubcolumn(col_name, subcol_name)
+```
+
+**Arguments**
+
+- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers).
+- `subcol_name` — The name of the sub-column. [String](../data-types/string.md).
+
+**Returned value**
+
+- Returns the extracted sub-colum.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple();
+INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]);
+SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr;
+```
+
+Result:
+
+```response
+   ┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐
+1. │ [1,2]                           │ ['Hello','World']               │
+2. │ [3,4,5]                         │ ['This','is','subcolumn']       │
+   └─────────────────────────────────┴─────────────────────────────────┘
+```
+
+## getTypeSerializationStreams
+
+Enumerates stream paths of a data type.
+
+:::note
+This function is intended for use by developers.
+:::
+
+**Syntax**
+
+```sql
+getTypeSerializationStreams(col)
+```
+
+**Arguments**
+
+- `col` — Column or string representation of a data-type from which the data type will be detected.
+
+**Returned value**
+
+- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)).
+
+**Examples**
+
+Query:
+
+```sql
+SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2));
+```
+
+Result:
+
+```response
+   ┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐
+1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │
+   └───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+Query:
+
+```sql
+SELECT getTypeSerializationStreams('Map(String, Int64)');
+```
+
+Result:
+
+```response
+   ┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐
+1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
+   └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+

From 122673592b21a2a0e60d1cedc9f9337b471ebcb8 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Wed, 17 Jul 2024 02:37:48 +0000
Subject: [PATCH 130/265] add rebuild for wide part

---
 src/Storages/MergeTree/MutateTask.cpp         |  5 ++++
 src/Storages/StorageInMemoryMetadata.cpp      | 12 +++++++--
 ...61_lightweight_delete_projection.reference |  1 +
 .../03161_lightweight_delete_projection.sql   | 25 ++++++++++---------
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 489c8863a8a..fe14c5a4f05 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -2238,7 +2238,12 @@ bool MutateTask::prepare()
             && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD)
         {
             for (const auto & projection : ctx->metadata_snapshot->getProjections())
+            {
+                if (!ctx->source_part->hasProjection(projection.name))
+                    continue;
+
                 ctx->materialized_projections.insert(projection.name);
+            }
         }
     }
 
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 2226de3e64f..4a655cac566 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -16,6 +16,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>
+#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
 
 
 namespace DB
@@ -334,10 +335,17 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
     NameSet required_ttl_columns;
     NameSet updated_ttl_columns;
 
-    auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set)
+    auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set, bool is_projection = false)
     {
         for (const auto & dependency : required_columns)
         {
+            /// useful in the case of lightweight delete with wide part and option of rebuild projection
+            if (is_projection && updated_columns.contains(RowExistsColumn::name))
+            {
+                to_set.insert(required_columns.begin(), required_columns.end());
+                return true;
+            }
+
             if (updated_columns.contains(dependency))
             {
                 to_set.insert(required_columns.begin(), required_columns.end());
@@ -357,7 +365,7 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
     for (const auto & projection : getProjections())
     {
         if (has_dependency(projection.name, ColumnDependency::PROJECTION))
-            add_dependent_columns(projection.getRequiredColumns(), projections_columns);
+            add_dependent_columns(projection.getRequiredColumns(), projections_columns, true);
     }
 
     auto add_for_rows_ttl = [&](const auto & expression, auto & to_set)
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index bc7e1faecff..3401eaf6162 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -1,5 +1,6 @@
 8888	Alice	50
 p1
 p2
+8888	Alice	50
 p1
 p2
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index b63341f5371..2c60d83d74d 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -44,7 +44,7 @@ SELECT * FROM users ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
--- expecting projection p1, p2 in 2 parts
+-- expecting projection p1, p2
 SELECT
     name
 FROM system.projection_parts
@@ -85,21 +85,22 @@ SELECT
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
--- -- testing rebuild mode
--- INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+-- testing rebuild mode
+INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
 
--- ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
+ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
 
--- DELETE FROM users WHERE uid = 6666;
+DELETE FROM users WHERE uid = 6666;
 
--- SELECT * FROM users ORDER BY uid;
+SELECT * FROM users ORDER BY uid;
 
--- SYSTEM FLUSH LOGS;
+SYSTEM FLUSH LOGS;
+
+-- expecting projection p1, p2
+SELECT
+    name
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
--- -- expecting projection p1, p2 in 2 parts
--- SELECT
---     name
--- FROM system.projection_parts
--- WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
 DROP TABLE users;
\ No newline at end of file

From 63936364b1abf345349403e656c4cf58c44715bc Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 17 Jul 2024 16:53:35 +0200
Subject: [PATCH 131/265] fixes of tests

---
 src/Storages/VirtualColumnUtils.cpp                  |  2 +-
 .../0_stateless/03203_hive_style_partitioning.sh     | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 31cee485dde..87c1aecc3a7 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -127,7 +127,7 @@ NameSet getVirtualNamesForFileLikeStorage()
 
 std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
 {
-    std::string pattern = "/([^/]+)=([^/]+)";
+    std::string pattern = "([^/]+)=([^/]+)/";
     re2::StringPiece input_piece(path);
 
     std::unordered_map<std::string, std::string> key_values;
diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
index 0f687d532b0..db1f073d736 100755
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@@ -124,13 +124,13 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'"
 $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;
 
-SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
 
-SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
 
-SELECT *, _column0, _column1 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
+SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
 
-SELECT *, _column0, _column1 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3Cluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
 """

From 938071cd55913c3bb2b8781750ef37bf6307acab Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 17 Jul 2024 14:56:00 +0000
Subject: [PATCH 132/265] add ci_include_fuzzer to PR body template

---
 .github/PULL_REQUEST_TEMPLATE.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index e045170561d..146542e980c 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -59,6 +59,8 @@ At a minimum, the following information should be added (but add more as needed)
 - [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
 - [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
 ---
+- [ ] <!---ci_include_fuzzer--> Run only libFuzzer related jobs
+---
 - [ ] <!---do_not_test--> Do not test
 - [ ] <!---woolen_wolfdog--> Woolen Wolfdog
 - [ ] <!---upload_all--> Upload binaries for special builds

From 542542b44d4688bc125887f811843249a4024379 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Wed, 17 Jul 2024 14:58:58 +0000
Subject: [PATCH 133/265] fix test

---
 .../queries/0_stateless/03161_lightweight_delete_projection.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 2c60d83d74d..3bf459cc32d 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -1,4 +1,6 @@
 
+SET lightweight_deletes_sync = 2;
+
 DROP TABLE IF EXISTS users;
 
 -- compact part

From bb01920370e1dd5faa7b17694f74175190537445 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 17 Jul 2024 16:19:02 +0000
Subject: [PATCH 134/265] add ci_exclude_ast to PR body template

---
 .github/PULL_REQUEST_TEMPLATE.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 146542e980c..8b6e957e1d8 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -59,7 +59,8 @@ At a minimum, the following information should be added (but add more as needed)
 - [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
 - [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
 ---
-- [ ] <!---ci_include_fuzzer--> Run only libFuzzer related jobs
+- [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
+- [ ] <!---ci_exclude_ast--> Exclude AST fuzzers
 ---
 - [ ] <!---do_not_test--> Do not test
 - [ ] <!---woolen_wolfdog--> Woolen Wolfdog

From 275b3666dadece731e368dd672e8d6e83ec22d8f Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Thu, 18 Jul 2024 01:18:34 +0000
Subject: [PATCH 135/265] try to fix the test

---
 .../03161_lightweight_delete_projection.sql   | 114 +++++++++++++++++-
 1 file changed, 111 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 3bf459cc32d..9d577f8a701 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -1,5 +1,73 @@
 
-SET lightweight_deletes_sync = 2;
+SET lightweight_deletes_sync = 2, alter_sync = 2;
+
+Set max_insert_threads = 2,
+group_by_two_level_threshold = 704642,
+group_by_two_level_threshold_bytes = 49659607,
+distributed_aggregation_memory_efficient = 0,
+fsync_metadata = 0,
+output_format_parallel_formatting = 0,
+input_format_parallel_parsing = 1,
+min_chunk_bytes_for_parallel_parsing = 14437539,
+max_read_buffer_size = 887507,
+prefer_localhost_replica = 0,
+max_block_size = 73908,
+max_joined_block_size_rows = 21162,
+max_threads = 2,
+optimize_append_index = 0,
+optimize_if_chain_to_multiif = 1,
+optimize_if_transform_strings_to_enum = 0,
+optimize_read_in_order = 0,
+optimize_or_like_chain = 1,
+optimize_substitute_columns = 1,
+enable_multiple_prewhere_read_steps = 1,
+read_in_order_two_level_merge_threshold = 13,
+optimize_aggregation_in_order = 1,
+aggregation_in_order_max_block_bytes = 37110261,
+use_uncompressed_cache = 0,
+min_bytes_to_use_direct_io = 10737418240,
+min_bytes_to_use_mmap_io = 1,
+local_filesystem_read_method ='pread',
+remote_filesystem_read_method ='threadpool',
+local_filesystem_read_prefetch = 0,
+filesystem_cache_segments_batch_size = 3,
+read_from_filesystem_cache_if_exists_otherwise_bypass_cache = 1,
+throw_on_error_from_cache_on_write_operations = 0,
+remote_filesystem_read_prefetch = 1,
+allow_prefetched_read_pool_for_remote_filesystem = 0,
+filesystem_prefetch_max_memory_usage = '32Mi',
+filesystem_prefetches_limit = 0,
+filesystem_prefetch_min_bytes_for_single_read_task ='16Mi',
+filesystem_prefetch_step_marks = 50,
+filesystem_prefetch_step_bytes = 0,
+compile_aggregate_expressions = 0,
+compile_sort_description = 1,
+merge_tree_coarse_index_granularity = 16,
+optimize_distinct_in_order = 0,
+max_bytes_before_external_sort = 0,
+max_bytes_before_external_group_by = 0,
+max_bytes_before_remerge_sort = 820113150,
+min_compress_block_size = 1262249,
+max_compress_block_size = 1472188,
+merge_tree_compact_parts_min_granules_to_multibuffer_read = 56,
+optimize_sorting_by_input_stream_properties = 1,
+http_response_buffer_size = 1883022,
+http_wait_end_of_query = False,
+enable_memory_bound_merging_of_aggregation_results = 1,
+min_count_to_compile_expression = 0,
+min_count_to_compile_aggregate_expression = 0,
+min_count_to_compile_sort_description = 0,
+session_timezone ='Africa/Khartoum',
+prefer_warmed_unmerged_parts_seconds = 10,
+use_page_cache_for_disks_without_file_cache = True,
+page_cache_inject_eviction = False,
+merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.02,
+prefer_external_sort_block_bytes = 100000000,
+cross_join_min_rows_to_compress = 100000000,
+cross_join_min_bytes_to_compress = 1,
+min_external_table_block_size_bytes = 100000000,
+max_parsing_threads = 0;
+
 
 DROP TABLE IF EXISTS users;
 
@@ -11,7 +79,27 @@ CREATE TABLE users (
     projection p1 (select count(), age group by age),
     projection p2 (select age, name group by age, name)
 ) ENGINE = MergeTree order by uid
-SETTINGS min_bytes_for_wide_part = 10485760;
+SETTINGS min_bytes_for_wide_part = 10485760,
+ratio_of_defaults_for_sparse_serialization = 1.0,
+prefer_fetch_merged_part_size_threshold = 1,
+vertical_merge_algorithm_min_rows_to_activate = 1,
+vertical_merge_algorithm_min_columns_to_activate = 100,
+allow_vertical_merges_from_compact_to_wide_parts = 0,
+min_merge_bytes_to_use_direct_io = 114145183,
+index_granularity_bytes = 2660363,
+merge_max_block_size = 13460,
+index_granularity = 51768,
+marks_compress_block_size = 59418,
+primary_key_compress_block_size = 88795,
+replace_long_file_name_to_hash = 0,
+max_file_name_length = 0,
+min_bytes_for_full_part_storage = 536870912,
+compact_parts_max_bytes_to_buffer = 378557913,
+compact_parts_max_granules_to_buffer = 254,
+compact_parts_merge_max_bytes_to_prefetch_part = 26969686,
+cache_populated_by_fetch = 0,
+concurrent_part_removal_threshold = 38,
+old_parts_lifetime = 480;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 
@@ -63,7 +151,27 @@ CREATE TABLE users (
     projection p1 (select count(), age group by age),
     projection p2 (select age, name group by age, name)
 ) ENGINE = MergeTree order by uid
-SETTINGS min_bytes_for_wide_part = 0;
+SETTINGS min_bytes_for_wide_part = 0,
+ratio_of_defaults_for_sparse_serialization = 1.0,
+prefer_fetch_merged_part_size_threshold = 1,
+vertical_merge_algorithm_min_rows_to_activate = 1,
+vertical_merge_algorithm_min_columns_to_activate = 100,
+allow_vertical_merges_from_compact_to_wide_parts = 0,
+min_merge_bytes_to_use_direct_io = 114145183,
+index_granularity_bytes = 2660363,
+merge_max_block_size = 13460,
+index_granularity = 51768,
+marks_compress_block_size = 59418,
+primary_key_compress_block_size = 88795,
+replace_long_file_name_to_hash = 0,
+max_file_name_length = 0,
+min_bytes_for_full_part_storage = 536870912,
+compact_parts_max_bytes_to_buffer = 378557913,
+compact_parts_max_granules_to_buffer = 254,
+compact_parts_merge_max_bytes_to_prefetch_part = 26969686,
+cache_populated_by_fetch = 0,
+concurrent_part_removal_threshold = 38,
+old_parts_lifetime = 480;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 

From 2504a6c36016b41e33ee5323fca79f5d511fb3ce Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Fri, 19 Jul 2024 14:59:38 +0000
Subject: [PATCH 136/265] make test output a bit clear

---
 ...61_lightweight_delete_projection.reference |   8 ++
 .../03161_lightweight_delete_projection.sql   | 131 ++----------------
 2 files changed, 21 insertions(+), 118 deletions(-)

diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index 3401eaf6162..cb623ea2b50 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -1,6 +1,14 @@
+compact part
+testing throw default mode
+testing drop mode
+testing rebuild mode
 8888	Alice	50
 p1
 p2
+wide part
+testing throw default mode
+testing drop mode
+testing rebuild mode
 8888	Alice	50
 p1
 p2
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 9d577f8a701..f2d6dcb164f 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -1,77 +1,11 @@
 
 SET lightweight_deletes_sync = 2, alter_sync = 2;
 
-Set max_insert_threads = 2,
-group_by_two_level_threshold = 704642,
-group_by_two_level_threshold_bytes = 49659607,
-distributed_aggregation_memory_efficient = 0,
-fsync_metadata = 0,
-output_format_parallel_formatting = 0,
-input_format_parallel_parsing = 1,
-min_chunk_bytes_for_parallel_parsing = 14437539,
-max_read_buffer_size = 887507,
-prefer_localhost_replica = 0,
-max_block_size = 73908,
-max_joined_block_size_rows = 21162,
-max_threads = 2,
-optimize_append_index = 0,
-optimize_if_chain_to_multiif = 1,
-optimize_if_transform_strings_to_enum = 0,
-optimize_read_in_order = 0,
-optimize_or_like_chain = 1,
-optimize_substitute_columns = 1,
-enable_multiple_prewhere_read_steps = 1,
-read_in_order_two_level_merge_threshold = 13,
-optimize_aggregation_in_order = 1,
-aggregation_in_order_max_block_bytes = 37110261,
-use_uncompressed_cache = 0,
-min_bytes_to_use_direct_io = 10737418240,
-min_bytes_to_use_mmap_io = 1,
-local_filesystem_read_method ='pread',
-remote_filesystem_read_method ='threadpool',
-local_filesystem_read_prefetch = 0,
-filesystem_cache_segments_batch_size = 3,
-read_from_filesystem_cache_if_exists_otherwise_bypass_cache = 1,
-throw_on_error_from_cache_on_write_operations = 0,
-remote_filesystem_read_prefetch = 1,
-allow_prefetched_read_pool_for_remote_filesystem = 0,
-filesystem_prefetch_max_memory_usage = '32Mi',
-filesystem_prefetches_limit = 0,
-filesystem_prefetch_min_bytes_for_single_read_task ='16Mi',
-filesystem_prefetch_step_marks = 50,
-filesystem_prefetch_step_bytes = 0,
-compile_aggregate_expressions = 0,
-compile_sort_description = 1,
-merge_tree_coarse_index_granularity = 16,
-optimize_distinct_in_order = 0,
-max_bytes_before_external_sort = 0,
-max_bytes_before_external_group_by = 0,
-max_bytes_before_remerge_sort = 820113150,
-min_compress_block_size = 1262249,
-max_compress_block_size = 1472188,
-merge_tree_compact_parts_min_granules_to_multibuffer_read = 56,
-optimize_sorting_by_input_stream_properties = 1,
-http_response_buffer_size = 1883022,
-http_wait_end_of_query = False,
-enable_memory_bound_merging_of_aggregation_results = 1,
-min_count_to_compile_expression = 0,
-min_count_to_compile_aggregate_expression = 0,
-min_count_to_compile_sort_description = 0,
-session_timezone ='Africa/Khartoum',
-prefer_warmed_unmerged_parts_seconds = 10,
-use_page_cache_for_disks_without_file_cache = True,
-page_cache_inject_eviction = False,
-merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.02,
-prefer_external_sort_block_bytes = 100000000,
-cross_join_min_rows_to_compress = 100000000,
-cross_join_min_bytes_to_compress = 1,
-min_external_table_block_size_bytes = 100000000,
-max_parsing_threads = 0;
-
-
 DROP TABLE IF EXISTS users;
 
--- compact part
+
+SELECT 'compact part';
+
 CREATE TABLE users (
     uid Int16,
     name String,
@@ -79,36 +13,17 @@ CREATE TABLE users (
     projection p1 (select count(), age group by age),
     projection p2 (select age, name group by age, name)
 ) ENGINE = MergeTree order by uid
-SETTINGS min_bytes_for_wide_part = 10485760,
-ratio_of_defaults_for_sparse_serialization = 1.0,
-prefer_fetch_merged_part_size_threshold = 1,
-vertical_merge_algorithm_min_rows_to_activate = 1,
-vertical_merge_algorithm_min_columns_to_activate = 100,
-allow_vertical_merges_from_compact_to_wide_parts = 0,
-min_merge_bytes_to_use_direct_io = 114145183,
-index_granularity_bytes = 2660363,
-merge_max_block_size = 13460,
-index_granularity = 51768,
-marks_compress_block_size = 59418,
-primary_key_compress_block_size = 88795,
-replace_long_file_name_to_hash = 0,
-max_file_name_length = 0,
-min_bytes_for_full_part_storage = 536870912,
-compact_parts_max_bytes_to_buffer = 378557913,
-compact_parts_max_granules_to_buffer = 254,
-compact_parts_merge_max_bytes_to_prefetch_part = 26969686,
-cache_populated_by_fetch = 0,
-concurrent_part_removal_threshold = 38,
-old_parts_lifetime = 480;
+SETTINGS min_bytes_for_wide_part = 10485760;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 
--- testing throw default mode
+SELECT 'testing throw default mode';
+
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
 DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
 
--- testing drop mode
+SELECT 'testing drop mode';
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
 
 DELETE FROM users WHERE uid = 1231;
@@ -123,7 +38,7 @@ SELECT
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
--- testing rebuild mode
+SELECT 'testing rebuild mode';
 INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
 
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
@@ -143,7 +58,7 @@ WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 DROP TABLE users;
 
 
--- wide part
+SELECT 'wide part';
 CREATE TABLE users (
     uid Int16,
     name String,
@@ -151,36 +66,16 @@ CREATE TABLE users (
     projection p1 (select count(), age group by age),
     projection p2 (select age, name group by age, name)
 ) ENGINE = MergeTree order by uid
-SETTINGS min_bytes_for_wide_part = 0,
-ratio_of_defaults_for_sparse_serialization = 1.0,
-prefer_fetch_merged_part_size_threshold = 1,
-vertical_merge_algorithm_min_rows_to_activate = 1,
-vertical_merge_algorithm_min_columns_to_activate = 100,
-allow_vertical_merges_from_compact_to_wide_parts = 0,
-min_merge_bytes_to_use_direct_io = 114145183,
-index_granularity_bytes = 2660363,
-merge_max_block_size = 13460,
-index_granularity = 51768,
-marks_compress_block_size = 59418,
-primary_key_compress_block_size = 88795,
-replace_long_file_name_to_hash = 0,
-max_file_name_length = 0,
-min_bytes_for_full_part_storage = 536870912,
-compact_parts_max_bytes_to_buffer = 378557913,
-compact_parts_max_granules_to_buffer = 254,
-compact_parts_merge_max_bytes_to_prefetch_part = 26969686,
-cache_populated_by_fetch = 0,
-concurrent_part_removal_threshold = 38,
-old_parts_lifetime = 480;
+SETTINGS min_bytes_for_wide_part = 0;
 
 INSERT INTO users VALUES (1231, 'John', 33);
 
--- testing throw default mode
+SELECT 'testing throw default mode';
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
 DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
 
--- testing drop mode
+SELECT 'testing drop mode';
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
 
 DELETE FROM users WHERE uid = 1231;
@@ -195,7 +90,7 @@ SELECT
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
--- testing rebuild mode
+SELECT 'testing rebuild mode';
 INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
 
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';

From 4ae0daf5d3149a2e9e4e8494e52164c91c27af0e Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Fri, 19 Jul 2024 18:46:37 +0000
Subject: [PATCH 137/265] output more info

---
 ...61_lightweight_delete_projection.reference | 14 +++++---
 .../03161_lightweight_delete_projection.sql   | 32 ++++++++++++++++---
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index cb623ea2b50..960fa1dcc33 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -1,14 +1,20 @@
 compact part
 testing throw default mode
 testing drop mode
+all_1_1_0_2
 testing rebuild mode
 8888	Alice	50
-p1
-p2
+all_1_1_0_4
+all_3_3_0_4
+p1	all_3_3_0_4
+p2	all_3_3_0_4
 wide part
 testing throw default mode
 testing drop mode
+all_1_1_0_2
 testing rebuild mode
 8888	Alice	50
-p1
-p2
+all_1_1_0_4
+all_3_3_0_4
+p1	all_3_3_0_4
+p2	all_3_3_0_4
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index f2d6dcb164f..f33653fc652 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -32,9 +32,15 @@ SELECT * FROM users ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
--- expecting no projection
+-- all_1_1_0_2
 SELECT
     name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+
+-- expecting no projection
+SELECT
+    name, parent_name
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
@@ -49,9 +55,15 @@ SELECT * FROM users ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
--- expecting projection p1, p2
+-- all_1_1_0_4, all_3_3_0_4
 SELECT
     name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+
+-- expecting projection p1, p2
+SELECT
+    name, parent_name
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
@@ -84,9 +96,15 @@ SELECT * FROM users ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
--- expecting no projection
+-- all_1_1_0_2
 SELECT
     name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+
+-- expecting no projection
+SELECT
+    name, parent_name
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 
@@ -101,9 +119,15 @@ SELECT * FROM users ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
--- expecting projection p1, p2
+-- all_1_1_0_4, all_3_3_0_4
 SELECT
     name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+
+-- expecting projection p1, p2
+SELECT
+    name, parent_name
 FROM system.projection_parts
 WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
 

From d5065a43ae4ae5ba0f068e3fdf5952dd5319f561 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 29 Jul 2024 17:55:01 +0200
Subject: [PATCH 138/265] Update StorageObjectStorage.cpp

---
 src/Storages/ObjectStorage/StorageObjectStorage.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index ca0ced8dcd3..d9c82d68791 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -39,11 +39,16 @@ String StorageObjectStorage::getPathSample(StorageInMemoryMetadata metadata, Con
     auto query_settings = configuration->getQuerySettings(context);
     /// We don't want to throw an exception if there are no files with specified path.
     query_settings.throw_on_zero_files_match = false;
+
+    bool local_distributed_processing = distributed_processing;
+    if (context->getSettingsRef().use_hive_partitioning)
+        local_distributed_processing = false;
+
     auto file_iterator = StorageObjectStorageSource::createFileIterator(
         configuration,
         query_settings,
         object_storage,
-        distributed_processing,
+        local_distributed_processing,
         context,
         {}, // predicate
         metadata.getColumns().getAll(), // virtual_columns

From f9a5210bacc418e354ddcf8893fa8c5a291b46d4 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 29 Jul 2024 19:36:31 +0200
Subject: [PATCH 139/265] solve Alexey's review

---
 src/Storages/VirtualColumnUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 87c1aecc3a7..257a77547c0 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -127,7 +127,7 @@ NameSet getVirtualNamesForFileLikeStorage()
 
 std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
 {
-    std::string pattern = "([^/]+)=([^/]+)/";
+    std::string pattern = "([^/])=([^/]+)/";
     re2::StringPiece input_piece(path);
 
     std::unordered_map<std::string, std::string> key_values;

From b0e6b3e88930d3ca493dddb688235c64cec1d893 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 30 Jul 2024 06:30:12 +0000
Subject: [PATCH 140/265] Kick off CI build


From 0c9fa155d4993220c00e4b41c0354b20d3312f33 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 30 Jul 2024 11:58:35 +0200
Subject: [PATCH 141/265] revert last commit

---
 src/Storages/VirtualColumnUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 4edab01925d..f16eff7edb6 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -131,7 +131,7 @@ NameSet getVirtualNamesForFileLikeStorage()
 
 std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
 {
-    std::string pattern = "([^/])=([^/]+)/";
+    std::string pattern = "([^/]+)=([^/]+)/";
     re2::StringPiece input_piece(path);
 
     std::unordered_map<std::string, std::string> key_values;

From 9d0608ce001b35e17ff81c00d4965dbe4938b56b Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 30 Jul 2024 16:55:37 +0200
Subject: [PATCH 142/265] Update Runner.cpp

---
 utils/keeper-bench/Runner.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index d99f2645a31..59761d827e1 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -545,8 +545,7 @@ struct ZooKeeperRequestFromLogReader
         file_read_buf = DB::wrapReadBufferWithCompressionMethod(std::move(file_read_buf), compression_method);
 
         DB::SingleReadBufferIterator read_buffer_iterator(std::move(file_read_buf));
-        std::string sample_path;
-        auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, sample_path, context);
+        auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
 
         DB::ColumnsWithTypeAndName columns;
         columns.reserve(columns_description.size());

From 956f8762fef7473804f7d82d63f076e09736f42c Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 31 Jul 2024 05:11:34 +0000
Subject: [PATCH 143/265] fix after merge

---
 src/Client/ClientApplicationBase.cpp | 37 ++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/Client/ClientApplicationBase.cpp b/src/Client/ClientApplicationBase.cpp
index 9f133616d2e..1b2ae16a479 100644
--- a/src/Client/ClientApplicationBase.cpp
+++ b/src/Client/ClientApplicationBase.cpp
@@ -158,6 +158,8 @@ void ClientApplicationBase::init(int argc, char ** argv)
 
         ("config-file,C", po::value<std::string>(), "config-file path")
 
+        ("proto_caps", po::value<std::string>(), "enable/disable chunked protocol: chunked_optional, notchunked, notchunked_optional, send_chunked, send_chunked_optional, send_notchunked, send_notchunked_optional, recv_chunked, recv_chunked_optional, recv_notchunked, recv_notchunked_optional")
+
         ("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)")
         ("queries-file", po::value<std::vector<std::string>>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
         ("multiquery,n", "Obsolete, does nothing")
@@ -339,6 +341,41 @@ void ClientApplicationBase::init(int argc, char ** argv)
     if (options.count("server_logs_file"))
         server_logs_file = options["server_logs_file"].as<std::string>();
 
+    if (options.count("proto_caps"))
+    {
+        std::string proto_caps_str = options["proto_caps"].as<std::string>();
+
+        std::vector<std::string_view> proto_caps;
+        splitInto<','>(proto_caps, proto_caps_str);
+
+        for (auto cap_str : proto_caps)
+        {
+            std::string direction;
+
+            if (cap_str.starts_with("send_"))
+            {
+                direction = "send";
+                cap_str = cap_str.substr(std::string_view("send_").size());
+            }
+            else if (cap_str.starts_with("recv_"))
+            {
+                direction = "recv";
+                cap_str = cap_str.substr(std::string_view("recv_").size());
+            }
+
+            if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional")
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str);
+
+            if (direction.empty())
+            {
+                config().setString("proto_caps.send", std::string(cap_str));
+                config().setString("proto_caps.recv", std::string(cap_str));
+            }
+            else
+                config().setString("proto_caps." + direction, std::string(cap_str));
+        }
+    }
+
     query_processing_stage = QueryProcessingStage::fromString(options["stage"].as<std::string>());
     query_kind = parseQueryKind(options["query_kind"].as<std::string>());
     profile_events.print = options.count("print-profile-events");

From 9fb610ae10da22f521a1ab2e4442c78766d5be37 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 31 Jul 2024 11:52:36 +0200
Subject: [PATCH 144/265] fix tests

---
 .../queries/0_stateless/03203_hive_style_partitioning.reference  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
index 430a3582f65..a4a2e48e046 100644
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@@ -118,4 +118,3 @@ Eva	Schmidt	Elizabeth	Schmidt
 Samuel	Schmidt	Elizabeth	Schmidt
 Eva	Schmidt	Elizabeth
 Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon

From 0cd37533a1e9873632cff7dc6debbbf802a29742 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 31 Jul 2024 10:10:44 +0000
Subject: [PATCH 145/265] fix after merge

---
 src/Client/ClientBase.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index f8c2fb0d6bc..0c26b77bcec 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1039,10 +1039,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
                     connection_parameters.timeouts,
                     query,
                     query_parameters,
-                    global_context->getCurrentQueryId(),
+                    client_context->getCurrentQueryId(),
                     query_processing_stage,
-                    &global_context->getSettingsRef(),
-                    &global_context->getClientInfo(),
+                    &client_context->getSettingsRef(),
+                    &client_context->getClientInfo(),
                     true,
                     [&](const Progress & progress) { onProgress(progress); });
 

From beb5d02cdc1f5fae58a8ee43fadb1c581868b894 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 31 Jul 2024 17:58:20 +0000
Subject: [PATCH 146/265] Move THROW back to InterpreterDelete.

---
 src/Interpreters/InterpreterDeleteQuery.cpp         | 13 +++++++++++++
 src/Interpreters/MutationsInterpreter.cpp           | 10 ----------
 src/Storages/MergeTree/MutateTask.cpp               | 12 ++++++++----
 .../03161_lightweight_delete_projection.sql         |  4 ++--
 4 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp
index 0e988e7d031..3000292f047 100644
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@@ -16,6 +16,7 @@
 #include <Storages/AlterCommands.h>
 #include <Storages/IStorage.h>
 #include <Storages/MutationCommands.h>
+#include <Storages/MergeTree/MergeTreeSettings.h>
 
 
 namespace DB
@@ -85,6 +86,18 @@ BlockIO InterpreterDeleteQuery::execute()
                             "Lightweight delete mutate is disabled. "
                             "Set `enable_lightweight_delete` setting to enable it");
 
+        if (metadata_snapshot->hasProjections())
+        {
+            if (const auto * merge_tree_data = dynamic_cast<const MergeTreeData *>(table.get()))
+                if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW)
+                    throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
+                        "DELETE query is not allowed for table {} because as it has projections and setting "
+                        "lightweight_mutation_projection_mode is set to THROW. "
+                        "User should change lightweight_mutation_projection_mode OR "
+                        "drop all the projections manually before running the query",
+                        table_id.getFullTableName());
+        }
+
         /// Build "ALTER ... UPDATE _row_exists = 0 WHERE predicate" query
         String alter_query =
             "ALTER TABLE " + table->getStorageID().getFullTableName()
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index db4ea9c0754..480c6736bc5 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -491,16 +491,6 @@ static void validateUpdateColumns(
             {
                 if (!source.supportsLightweightDelete())
                     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
-
-                if (const MergeTreeData * merge_tree_data = source.getMergeTreeData(); merge_tree_data != nullptr)
-                {
-                    if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW
-                        && merge_tree_data->hasProjection())
-                        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                            "DELETE query is not supported for table {} as it has projections. "
-                            "User should drop all the projections manually before running the query",
-                            source.getStorage()->getStorageID().getFullTableName());
-                }
             }
             else if (virtual_columns.tryGet(column_name))
             {
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 6245d80508b..8b5829eb058 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -2320,10 +2320,14 @@ bool MutateTask::prepare()
             ctx->context,
             ctx->materialized_indices);
 
-        bool lightweight_delete_projection_drop = lightweight_delete_mode
-            && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP;
+        auto lightweight_mutation_projection_mode = ctx->data->getSettings()->lightweight_mutation_projection_mode;
+        bool lightweight_delete_drops_projections =
+            lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP
+            || lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW;
+
+        bool should_create_projections = !(lightweight_delete_mode && lightweight_delete_drops_projections);
         /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty.
-        if (!lightweight_delete_projection_drop)
+        if (should_create_projections)
         {
             ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(
                 ctx->source_part,
@@ -2342,7 +2346,7 @@ bool MutateTask::prepare()
             ctx->projections_to_recalc,
             ctx->stats_to_recalc,
             ctx->metadata_snapshot,
-            lightweight_delete_projection_drop);
+            !should_create_projections);
 
         ctx->files_to_rename = MutationHelpers::collectFilesForRenames(
             ctx->source_part,
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index f33653fc652..02b880d620a 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -21,7 +21,7 @@ SELECT 'testing throw default mode';
 
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
-DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
+DELETE FROM users WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
 
 SELECT 'testing drop mode';
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
@@ -85,7 +85,7 @@ INSERT INTO users VALUES (1231, 'John', 33);
 SELECT 'testing throw default mode';
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
-DELETE FROM users WHERE uid = 1231;  -- { serverError NOT_IMPLEMENTED }
+DELETE FROM users WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
 
 SELECT 'testing drop mode';
 ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';

From 557f9dbe3fb02e3bce62adbeb1fd5056f2d36b6c Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Wed, 31 Jul 2024 18:51:27 +0000
Subject: [PATCH 147/265] fix test

---
 .../0_stateless/02319_lightweight_delete_on_merge_tree.sql      | 2 +-
 tests/queries/0_stateless/02792_drop_projection_lwd.sql         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
index f82f79dbe44..6491253cd5f 100644
--- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
+++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
@@ -102,7 +102,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()) SETTINGS
 
 INSERT INTO t_proj SELECT number + 1, number + 1  FROM numbers(1000);
 
-DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED }
+DELETE FROM t_proj WHERE a < 100; -- { serverError SUPPORT_IS_DISABLED }
 
 SELECT avg(a), avg(b), count() FROM t_proj;
 
diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql
index dcde7dcc600..dad7f7cd028 100644
--- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql
+++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql
@@ -7,7 +7,7 @@ CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER
 INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100);
 
 -- LWD does not work, as expected
-DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError NOT_IMPLEMENTED }
+DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError SUPPORT_IS_DISABLED }
 KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null;
 
 -- drop projection

From d2e0668d5129a4e60f462de5e5b683099f49bf4b Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 1 Aug 2024 13:51:35 +0200
Subject: [PATCH 148/265] fix settingsChangesHistory after merge with master

---
 src/Core/SettingsChangesHistory.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index a01c5faaf10..28a732c6177 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -81,9 +81,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
               {"backup_restore_s3_retry_attempts", 1000,1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore."},
               {"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
               {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."},
-              {"restore_replace_external_table_functions_to_null", false, false, "New setting."},
-              {"restore_replace_external_engines_to_null", false, false, "New setting."},
-              {"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."},
               {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."}
               }},
     {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},

From 3150833aa517bd0293a6f3b7f9114cfb89af06ad Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 1 Aug 2024 20:16:10 +0200
Subject: [PATCH 149/265] fix merge with master

---
 docs/en/operations/settings/settings.md | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index b880c42a45b..c621f2db5ae 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -5615,18 +5615,6 @@ Disable all insert and mutations (alter table update / alter table delete / alte
 
 Default value: `false`.
 
-## restore_replace_external_engines_to_null
-
-For testing purposes. Replaces all external engines to Null to not initiate external connections.
-
-Default value: `False`
-
-## restore_replace_external_table_functions_to_null
-
-For testing purposes. Replaces all external table functions to Null to not initiate external connections.
-
-Default value: `False`
-
 ## use_hive_partitioning
 
 When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.

From c50ef37a03438003c21076c5700d9c1f52c1c435 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Fri, 2 Aug 2024 00:01:41 -0400
Subject: [PATCH 150/265] Fix inconsistent formatting for `GRANT CURRENT
 GRANTS`

---
 src/Parsers/Access/ASTGrantQuery.cpp | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp
index f60fa7e4a23..eac88c75513 100644
--- a/src/Parsers/Access/ASTGrantQuery.cpp
+++ b/src/Parsers/Access/ASTGrantQuery.cpp
@@ -97,24 +97,9 @@ namespace
 
     void formatCurrentGrantsElements(const AccessRightsElements & elements, const IAST::FormatSettings & settings)
     {
-        for (size_t i = 0; i != elements.size(); ++i)
-        {
-            const auto & element = elements[i];
-
-            bool next_element_on_same_db_and_table = false;
-            if (i != elements.size() - 1)
-            {
-                const auto & next_element = elements[i + 1];
-                if (element.sameDatabaseAndTableAndParameter(next_element))
-                    next_element_on_same_db_and_table = true;
-            }
-
-            if (!next_element_on_same_db_and_table)
-            {
-                settings.ostr << " ";
-                formatONClause(element, settings);
-            }
-        }
+        settings.ostr << "(";
+        formatElementsWithoutOptions(elements, settings);
+        settings.ostr << ")";
     }
 }
 

From 77a2eb61ef965a6460bbdb74447aa3871cb1d0c7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 2 Aug 2024 17:43:33 +0000
Subject: [PATCH 151/265] Update test.

---
 ...61_lightweight_delete_projection.reference | 70 ++++++++++++++++++
 .../03161_lightweight_delete_projection.sql   | 74 ++++++++++---------
 2 files changed, 111 insertions(+), 33 deletions(-)

diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index 960fa1dcc33..eef0c5a41b5 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -1,20 +1,90 @@
 compact part
 testing throw default mode
+-- { echoOn }
+
+ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
+DELETE FROM users_compact WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
+SELECT 'testing drop mode';
 testing drop mode
+ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
+DELETE FROM users_compact WHERE uid = 1231;
+SELECT * FROM users_compact ORDER BY uid;
+SYSTEM FLUSH LOGS;
+-- all_1_1_0_2
+SELECT
+    name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
 all_1_1_0_2
+-- expecting no projection
+SELECT
+    name, parent_name
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
+SELECT 'testing rebuild mode';
 testing rebuild mode
+INSERT INTO users_compact VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
+DELETE FROM users_compact WHERE uid = 6666;
+SELECT * FROM users_compact ORDER BY uid;
 8888	Alice	50
+SYSTEM FLUSH LOGS;
+-- all_1_1_0_4, all_3_3_0_4
+SELECT
+    name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
 all_1_1_0_4
 all_3_3_0_4
+-- expecting projection p1, p2
+SELECT
+    name, parent_name
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
 p1	all_3_3_0_4
 p2	all_3_3_0_4
 wide part
 testing throw default mode
+-- { echoOn }
+
+ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
+DELETE FROM users_wide WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
+SELECT 'testing drop mode';
 testing drop mode
+ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
+DELETE FROM users_wide WHERE uid = 1231;
+SELECT * FROM users_wide ORDER BY uid;
+SYSTEM FLUSH LOGS;
+-- all_1_1_0_2
+SELECT
+    name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
 all_1_1_0_2
+-- expecting no projection
+SELECT
+    name, parent_name
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
+SELECT 'testing rebuild mode';
 testing rebuild mode
+INSERT INTO users_wide VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
+DELETE FROM users_wide WHERE uid = 6666;
+SELECT * FROM users_wide ORDER BY uid;
 8888	Alice	50
+SYSTEM FLUSH LOGS;
+-- all_1_1_0_4, all_3_3_0_4
+SELECT
+    name
+FROM system.parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
 all_1_1_0_4
 all_3_3_0_4
+-- expecting projection p1, p2
+SELECT
+    name, parent_name
+FROM system.projection_parts
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
 p1	all_3_3_0_4
 p2	all_3_3_0_4
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 02b880d620a..28e5612a529 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -1,12 +1,12 @@
 
 SET lightweight_deletes_sync = 2, alter_sync = 2;
 
-DROP TABLE IF EXISTS users;
+DROP TABLE IF EXISTS users_compact;
 
 
 SELECT 'compact part';
 
-CREATE TABLE users (
+CREATE TABLE users_compact (
     uid Int16,
     name String,
     age Int16,
@@ -15,20 +15,22 @@ CREATE TABLE users (
 ) ENGINE = MergeTree order by uid
 SETTINGS min_bytes_for_wide_part = 10485760;
 
-INSERT INTO users VALUES (1231, 'John', 33);
+INSERT INTO users_compact VALUES (1231, 'John', 33);
 
 SELECT 'testing throw default mode';
 
-ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
+-- { echoOn }
 
-DELETE FROM users WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
+ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
+
+DELETE FROM users_compact WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
 
 SELECT 'testing drop mode';
-ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
+ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
 
-DELETE FROM users WHERE uid = 1231;
+DELETE FROM users_compact WHERE uid = 1231;
 
-SELECT * FROM users ORDER BY uid;
+SELECT * FROM users_compact ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
@@ -36,22 +38,22 @@ SYSTEM FLUSH LOGS;
 SELECT
     name
 FROM system.parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
 
 -- expecting no projection
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
 
 SELECT 'testing rebuild mode';
-INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+INSERT INTO users_compact VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
 
-ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
+ALTER TABLE users_compact MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
 
-DELETE FROM users WHERE uid = 6666;
+DELETE FROM users_compact WHERE uid = 6666;
 
-SELECT * FROM users ORDER BY uid;
+SELECT * FROM users_compact ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
@@ -59,19 +61,21 @@ SYSTEM FLUSH LOGS;
 SELECT
     name
 FROM system.parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
 
 -- expecting projection p1, p2
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
 
-DROP TABLE users;
+-- { echoOff }
+
+DROP TABLE users_compact;
 
 
 SELECT 'wide part';
-CREATE TABLE users (
+CREATE TABLE users_wide (
     uid Int16,
     name String,
     age Int16,
@@ -80,19 +84,22 @@ CREATE TABLE users (
 ) ENGINE = MergeTree order by uid
 SETTINGS min_bytes_for_wide_part = 0;
 
-INSERT INTO users VALUES (1231, 'John', 33);
+INSERT INTO users_wide VALUES (1231, 'John', 33);
 
 SELECT 'testing throw default mode';
-ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
 
-DELETE FROM users WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
+-- { echoOn }
+
+ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'throw';
+
+DELETE FROM users_wide WHERE uid = 1231;  -- { serverError SUPPORT_IS_DISABLED }
 
 SELECT 'testing drop mode';
-ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
+ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'drop';
 
-DELETE FROM users WHERE uid = 1231;
+DELETE FROM users_wide WHERE uid = 1231;
 
-SELECT * FROM users ORDER BY uid;
+SELECT * FROM users_wide ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
@@ -100,22 +107,22 @@ SYSTEM FLUSH LOGS;
 SELECT
     name
 FROM system.parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
 
 -- expecting no projection
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
 
 SELECT 'testing rebuild mode';
-INSERT INTO users VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
+INSERT INTO users_wide VALUES (6666, 'Ksenia', 48), (8888, 'Alice', 50);
 
-ALTER TABLE users MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
+ALTER TABLE users_wide MODIFY SETTING lightweight_mutation_projection_mode = 'rebuild';
 
-DELETE FROM users WHERE uid = 6666;
+DELETE FROM users_wide WHERE uid = 6666;
 
-SELECT * FROM users ORDER BY uid;
+SELECT * FROM users_wide ORDER BY uid;
 
 SYSTEM FLUSH LOGS;
 
@@ -123,13 +130,14 @@ SYSTEM FLUSH LOGS;
 SELECT
     name
 FROM system.parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
 
 -- expecting projection p1, p2
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
 
+-- { echoOff }
 
-DROP TABLE users;
\ No newline at end of file
+DROP TABLE users_wide;

From 2605bb36b66ccfb4621244a28475a242778b6cc4 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sat, 3 Aug 2024 01:42:11 +0000
Subject: [PATCH 152/265] fix conflict

---
 src/Core/SettingsChangesHistory.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 8f73e10c44f..107a8e451c5 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -92,7 +92,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
             {"collect_hash_table_stats_during_joins", false, true, "New setting."},
             {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
-            {"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."},            {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
+            {"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."},
             {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
             {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
             {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},

From fd1e354e8503a968eff9dbe614a47c1135f67bdd Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 5 Aug 2024 13:43:06 +0200
Subject: [PATCH 153/265] fix flaky check for integration tests

---
 .../test_storage_azure_blob_storage/test.py   | 82 +++++++++++++------
 .../test_cluster.py                           | 18 ++--
 tests/integration/test_storage_hdfs/test.py   | 47 +++++++----
 3 files changed, 94 insertions(+), 53 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 6966abfee4f..15a1f6db2c1 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -135,6 +135,7 @@ def test_create_table_connection_string(cluster):
         Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV')
         """,
     )
+    azure_query(node, "DROP TABLE IF EXISTS test_create_table_conn_string")
 
 
 def test_create_table_account_string(cluster):
@@ -144,6 +145,7 @@ def test_create_table_account_string(cluster):
         f"CREATE TABLE test_create_table_account_url (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
         f"'cont', 'test_create_connection_string', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV')",
     )
+    azure_query(node, "DROP TABLE IF EXISTS test_create_table_account_url")
 
 
 def test_simple_write_account_string(cluster):
@@ -157,6 +159,7 @@ def test_simple_write_account_string(cluster):
     azure_query(node, "INSERT INTO test_simple_write VALUES (1, 'a')")
     print(get_azure_file_content("test_simple_write.csv", port))
     assert get_azure_file_content("test_simple_write.csv", port) == '1,"a"\n'
+    azure_query(node, "DROP TABLE test_simple_write")
 
 
 def test_simple_write_connection_string(cluster):
@@ -170,6 +173,7 @@ def test_simple_write_connection_string(cluster):
     azure_query(node, "INSERT INTO test_simple_write_connection_string VALUES (1, 'a')")
     print(get_azure_file_content("test_simple_write_c.csv", port))
     assert get_azure_file_content("test_simple_write_c.csv", port) == '1,"a"\n'
+    azure_query(node, "DROP TABLE test_simple_write_connection_string")
 
 
 def test_simple_write_named_collection_1(cluster):
@@ -185,7 +189,7 @@ def test_simple_write_named_collection_1(cluster):
     )
     print(get_azure_file_content("test_simple_write_named.csv", port))
     assert get_azure_file_content("test_simple_write_named.csv", port) == '1,"a"\n'
-    azure_query(node, "TRUNCATE TABLE test_simple_write_named_collection_1")
+    azure_query(node, "DROP TABLE test_simple_write_named_collection_1")
 
 
 def test_simple_write_named_collection_2(cluster):
@@ -202,6 +206,7 @@ def test_simple_write_named_collection_2(cluster):
     )
     print(get_azure_file_content("test_simple_write_named_2.csv", port))
     assert get_azure_file_content("test_simple_write_named_2.csv", port) == '1,"a"\n'
+    azure_query(node, "DROP TABLE test_simple_write_named_collection_2")
 
 
 def test_partition_by(cluster):
@@ -223,6 +228,7 @@ def test_partition_by(cluster):
     assert "1,2,3\n" == get_azure_file_content("test_3.csv", port)
     assert "3,2,1\n" == get_azure_file_content("test_1.csv", port)
     assert "78,43,45\n" == get_azure_file_content("test_45.csv", port)
+    azure_query(node, "DROP TABLE test_partitioned_write")
 
 
 def test_partition_by_string_column(cluster):
@@ -243,6 +249,7 @@ def test_partition_by_string_column(cluster):
     assert '1,"foo/bar"\n' == get_azure_file_content("test_foo/bar.csv", port)
     assert '3,"йцук"\n' == get_azure_file_content("test_йцук.csv", port)
     assert '78,"你好"\n' == get_azure_file_content("test_你好.csv", port)
+    azure_query(node, "DROP TABLE test_partitioned_string_write")
 
 
 def test_partition_by_const_column(cluster):
@@ -261,6 +268,7 @@ def test_partition_by_const_column(cluster):
     )
     azure_query(node, f"INSERT INTO test_partitioned_const_write VALUES {values}")
     assert values_csv == get_azure_file_content("test_88.csv", port)
+    azure_query(node, "DROP TABLE test_partitioned_const_write")
 
 
 def test_truncate(cluster):
@@ -276,6 +284,7 @@ def test_truncate(cluster):
     azure_query(node, "TRUNCATE TABLE test_truncate")
     with pytest.raises(Exception):
         print(get_azure_file_content("test_truncate.csv", port))
+    azure_query(node, "DROP TABLE test_truncate")
 
 
 def test_simple_read_write(cluster):
@@ -292,6 +301,7 @@ def test_simple_read_write(cluster):
     assert get_azure_file_content("test_simple_read_write.csv", port) == '1,"a"\n'
     print(azure_query(node, "SELECT * FROM test_simple_read_write"))
     assert azure_query(node, "SELECT * FROM test_simple_read_write") == "1\ta\n"
+    azure_query(node, "DROP TABLE test_simple_read_write")
 
 
 def test_create_new_files_on_insert(cluster):
@@ -344,6 +354,7 @@ def test_overwrite(cluster):
 
     result = azure_query(node, f"select count() from test_overwrite")
     assert int(result) == 200
+    azure_query(node, f"DROP TABLE test_overwrite")
 
 
 def test_insert_with_path_with_globs(cluster):
@@ -356,6 +367,7 @@ def test_insert_with_path_with_globs(cluster):
     node.query_and_get_error(
         f"insert into table function test_insert_globs SELECT number, randomString(100) FROM numbers(500)"
     )
+    azure_query(node, f"DROP TABLE test_insert_globs")
 
 
 def test_put_get_with_globs(cluster):
@@ -364,6 +376,7 @@ def test_put_get_with_globs(cluster):
     node = cluster.instances["node"]  # type: ClickHouseInstance
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
     max_path = ""
+    used_names = []
     for i in range(10):
         for j in range(10):
             path = "{}/{}_{}/{}.csv".format(
@@ -372,6 +385,8 @@ def test_put_get_with_globs(cluster):
             max_path = max(path, max_path)
             values = f"({i},{j},{i + j})"
 
+            used_names.append(f"test_put_{i}_{j}")
+
             azure_query(
                 node,
                 f"CREATE TABLE test_put_{i}_{j} ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
@@ -392,6 +407,9 @@ def test_put_get_with_globs(cluster):
             bucket="cont", max_path=max_path
         )
     ]
+    azure_query(node, "DROP TABLE test_glob_select")
+    for name in used_names:
+        azure_query(node, f"DROP TABLE {name}")
 
 
 def test_azure_glob_scheherazade(cluster):
@@ -400,12 +418,14 @@ def test_azure_glob_scheherazade(cluster):
     values = "(1, 1, 1)"
     nights_per_job = 1001 // 30
     jobs = []
+    used_names = []
     for night in range(0, 1001, nights_per_job):
 
         def add_tales(start, end):
             for i in range(start, end):
                 path = "night_{}/tale.csv".format(i)
                 unique_num = random.randint(1, 10000)
+                used_names.append(f"test_scheherazade_{i}_{unique_num}")
                 azure_query(
                     node,
                     f"CREATE TABLE test_scheherazade_{i}_{unique_num} ({table_format}) Engine = AzureBlobStorage(azure_conf2, "
@@ -433,6 +453,9 @@ def test_azure_glob_scheherazade(cluster):
     )
     query = "select count(), sum(column1), sum(column2), sum(column3) from test_glob_select_scheherazade"
     assert azure_query(node, query).splitlines() == ["1001\t1001\t1001\t1001"]
+    azure_query(node, "DROP TABLE test_glob_select_scheherazade")
+    for name in used_names:
+        azure_query(node, f"DROP TABLE {name}")
 
 
 @pytest.mark.parametrize(
@@ -506,6 +529,8 @@ def test_schema_inference_no_globs(cluster):
     assert azure_query(node, query).splitlines() == [
         "499500\t2890\t332833500\ttest_schema_inference_no_globs.csv\tcont/test_schema_inference_no_globs.csv"
     ]
+    azure_query(node, f"DROP TABLE test_schema_inference_src")
+    azure_query(node, f"DROP TABLE test_select_inference")
 
 
 def test_schema_inference_from_globs(cluster):
@@ -514,6 +539,7 @@ def test_schema_inference_from_globs(cluster):
     node = cluster.instances["node"]  # type: ClickHouseInstance
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
     max_path = ""
+    used_names = []
     for i in range(10):
         for j in range(10):
             path = "{}/{}_{}/{}.csv".format(
@@ -521,6 +547,7 @@ def test_schema_inference_from_globs(cluster):
             )
             max_path = max(path, max_path)
             values = f"({i},{j},{i + j})"
+            used_names.append(f"test_schema_{i}_{j}")
 
             azure_query(
                 node,
@@ -546,6 +573,9 @@ def test_schema_inference_from_globs(cluster):
             bucket="cont", max_path=max_path
         )
     ]
+    azure_query(node, "DROP TABLE test_glob_select_inference")
+    for name in used_names:
+        azure_query(node, f"DROP TABLE {name}")
 
 
 def test_simple_write_account_string_table_function(cluster):
@@ -595,7 +625,7 @@ def test_simple_write_named_collection_1_table_function(cluster):
 
     azure_query(
         node,
-        "TRUNCATE TABLE drop_table",
+        "DROP TABLE drop_table",
     )
 
 
@@ -605,7 +635,7 @@ def test_simple_write_named_collection_2_table_function(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a')",
+        f" container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a') SETTINGS azure_truncate_on_insert=1",
     )
     print(get_azure_file_content("test_simple_write_named_2_tf.csv", port))
     assert get_azure_file_content("test_simple_write_named_2_tf.csv", port) == '1,"a"\n'
@@ -628,7 +658,7 @@ def test_put_get_with_globs_tf(cluster):
             azure_query(
                 node,
                 f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-                f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+                f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values} SETTINGS azure_truncate_on_insert=1",
             )
     query = (
         f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, "
@@ -649,7 +679,7 @@ def test_schema_inference_no_globs_tf(cluster):
     query = (
         f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
         f"container='cont', blob_path='test_schema_inference_no_globs_tf.csv', format='CSVWithNames', structure='{table_format}') "
-        f"SELECT number, toString(number), number * number FROM numbers(1000)"
+        f"SELECT number, toString(number), number * number FROM numbers(1000) SETTINGS azure_truncate_on_insert=1"
     )
     azure_query(node, query)
 
@@ -680,7 +710,7 @@ def test_schema_inference_from_globs_tf(cluster):
 
             query = (
                 f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
-                f"container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values}"
+                f"container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values} SETTINGS azure_truncate_on_insert=1"
             )
             azure_query(node, query)
 
@@ -708,7 +738,7 @@ def test_partition_by_tf(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
         f"'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', "
-        f"'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}",
+        f"'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values} SETTINGS azure_truncate_on_insert=1",
     )
 
     assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv", port)
@@ -727,7 +757,7 @@ def test_filter_using_file(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont', '{filename}', "
         f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', "
-        f"'{table_format}') PARTITION BY {partition_by} VALUES {values}",
+        f"'{table_format}') PARTITION BY {partition_by} VALUES {values} SETTINGS azure_truncate_on_insert=1",
     )
 
     query = (
@@ -745,7 +775,7 @@ def test_read_subcolumns(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv', "
         f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto',"
-        f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)",
+        f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
@@ -795,7 +825,7 @@ def test_read_subcolumn_time(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumn_time.tsv', "
         f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto',"
-        f" 'a UInt32') select (42)",
+        f" 'a UInt32') select (42) SETTINGS azure_truncate_on_insert=1",
     )
 
     res = node.query(
@@ -825,7 +855,7 @@ def test_function_signatures(cluster):
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)  SETTINGS azure_truncate_on_insert=1",
     )
 
     # " - connection_string, container_name, blobpath\n"
@@ -939,12 +969,12 @@ def test_union_schema_inference_mode(cluster):
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference1.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'a UInt32') VALUES (1)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference1.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'a UInt32') VALUES (1)  SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'b UInt32') VALUES (2)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'b UInt32') VALUES (2) SETTINGS azure_truncate_on_insert=1",
     )
 
     node.query("system drop schema cache for azure")
@@ -981,7 +1011,7 @@ def test_union_schema_inference_mode(cluster):
     assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference3.jsonl', '{account_name}', '{account_key}', 'CSV', 'auto', 's String') VALUES ('Error')",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference3.jsonl', '{account_name}', '{account_key}', 'CSV', 'auto', 's String') VALUES ('Error') SETTINGS azure_truncate_on_insert=1",
     )
 
     error = azure_query(
@@ -1003,7 +1033,7 @@ def test_schema_inference_cache(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cache0.jsonl', '{account_name}', '{account_key}') "
-        f"select * from numbers(100)",
+        f"select * from numbers(100) SETTINGS azure_truncate_on_insert=1",
     )
 
     time.sleep(1)
@@ -1210,19 +1240,19 @@ def test_filtering_by_file_or_path(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter1.tsv', 'devstoreaccount1',  "
-        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1",
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1 SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_filter2.tsv', 'devstoreaccount1',  "
-        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2",
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 2 SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_filter3.tsv', 'devstoreaccount1', "
-        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3",
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 3 SETTINGS azure_truncate_on_insert=1",
     )
 
     node.query(
@@ -1246,19 +1276,19 @@ def test_size_virtual_column(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column1.tsv', 'devstoreaccount1',  "
-        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1",
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 1 SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}','cont', 'test_size_virtual_column2.tsv', 'devstoreaccount1',  "
-        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 11",
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 11 SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_size_virtual_column3.tsv', 'devstoreaccount1', "
-        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111",
+        f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto', 'x UInt64') select 111 SETTINGS azure_truncate_on_insert=1",
     )
 
     result = azure_query(
@@ -1281,7 +1311,7 @@ def test_format_detection(cluster):
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt64, y String') select number as x, 'str_' || toString(number) from numbers(0) SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
@@ -1351,7 +1381,7 @@ def test_write_to_globbed_partitioned_path(cluster):
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
     error = azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_data_*_{{_partition_id}}', '{account_name}', '{account_key}', 'CSV', 'auto', 'x UInt64') partition by 42 select 42",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_data_*_{{_partition_id}}', '{account_name}', '{account_key}', 'CSV', 'auto', 'x UInt64') partition by 42 select 42 SETTINGS azure_truncate_on_insert=1",
         expect_error="true",
     )
 
@@ -1475,7 +1505,7 @@ def test_hive_partitioning_with_one_parameter(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values} SETTINGS azure_truncate_on_insert=1",
     )
 
     query = (
@@ -1512,7 +1542,7 @@ def test_hive_partitioning_with_two_parameters(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2} SETTINGS azure_truncate_on_insert=1",
     )
 
     query = (
@@ -1558,7 +1588,7 @@ def test_hive_partitioning_without_setting(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2} SETTINGS azure_truncate_on_insert=1",
     )
 
     query = (
diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py
index 6c5e2d20ca5..4d63016cf9a 100644
--- a/tests/integration/test_storage_azure_blob_storage/test_cluster.py
+++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py
@@ -71,7 +71,7 @@ def test_select_all(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1',"
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', 'key UInt64, data String') "
-        f"VALUES (1, 'a'), (2, 'b')",
+        f"VALUES (1, 'a'), (2, 'b') SETTINGS azure_truncate_on_insert=1",
     )
     print(get_azure_file_content("test_cluster_select_all.csv", port))
 
@@ -100,7 +100,7 @@ def test_count(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', "
-        f"'auto', 'key UInt64') VALUES (1), (2)",
+        f"'auto', 'key UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
     )
     print(get_azure_file_content("test_cluster_count.csv", port))
 
@@ -128,7 +128,7 @@ def test_union_all(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_parquet_union_all', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', "
-        f"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')",
+        f"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd') SETTINGS azure_truncate_on_insert=1",
     )
 
     pure_azure = azure_query(
@@ -179,7 +179,7 @@ def test_skip_unavailable_shards(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
-        f"'auto', 'a UInt64') VALUES (1), (2)",
+        f"'auto', 'a UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
     )
     result = azure_query(
         node,
@@ -199,7 +199,7 @@ def test_unset_skip_unavailable_shards(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
-        f"'auto', 'a UInt64') VALUES (1), (2)",
+        f"'auto', 'a UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
     )
     result = azure_query(
         node,
@@ -217,7 +217,7 @@ def test_cluster_with_named_collection(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
-        f"'auto', 'a UInt64') VALUES (1), (2)",
+        f"'auto', 'a UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
     )
 
     pure_azure = azure_query(
@@ -248,7 +248,7 @@ def test_partition_parallel_reading_with_cluster(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', '{filename}', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') "
-        f"PARTITION BY {partition_by} VALUES {values}",
+        f"PARTITION BY {partition_by} VALUES {values} SETTINGS azure_truncate_on_insert=1",
     )
 
     assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv", port)
@@ -272,12 +272,12 @@ def test_format_detection(cluster):
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection0', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10) SETTINGS azure_truncate_on_insert=1",
     )
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10, 10)",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'x UInt32, y String') select number as x, 'str_' || toString(number) from numbers(10, 10) SETTINGS azure_truncate_on_insert=1",
     )
 
     expected_desc_result = azure_query(
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 79914855782..ca072f59e4b 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -41,6 +41,7 @@ def test_read_write_storage(started_cluster):
     node1.query("insert into SimpleHDFSStorage values (1, 'Mark', 72.53)")
     assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n"
     assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n"
+    node1.query("drop table if exists SimpleHDFSStorage")
 
 
 def test_read_write_storage_with_globs(started_cluster):
@@ -94,6 +95,11 @@ def test_read_write_storage_with_globs(started_cluster):
         print(ex)
         assert "in readonly mode" in str(ex)
 
+    node1.query("DROP TABLE HDFSStorageWithRange")
+    node1.query("DROP TABLE HDFSStorageWithEnum")
+    node1.query("DROP TABLE HDFSStorageWithQuestionMark")
+    node1.query("DROP TABLE HDFSStorageWithAsterisk")
+
 
 def test_storage_with_multidirectory_glob(started_cluster):
     hdfs_api = started_cluster.hdfs_api
@@ -335,6 +341,7 @@ def test_virtual_columns(started_cluster):
         )
         == expected
     )
+    node1.query("DROP TABLE virual_cols")
 
 
 def test_read_files_with_spaces(started_cluster):
@@ -356,6 +363,7 @@ def test_read_files_with_spaces(started_cluster):
     )
     assert node1.query("select * from test order by id") == "1\n2\n3\n"
     fs.delete(dir, recursive=True)
+    node1.query("DROP TABLE test")
 
 
 def test_truncate_table(started_cluster):
@@ -427,7 +435,7 @@ def test_seekable_formats(started_cluster):
         f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')"
     )
     node1.query(
-        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000)"
+        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) SETTINGS hdfs_truncate_on_insert=1"
     )
 
     result = node1.query(f"SELECT count() FROM {table_function}")
@@ -435,7 +443,7 @@ def test_seekable_formats(started_cluster):
 
     table_function = f"hdfs('hdfs://hdfs1:9000/orc', 'ORC', 'a Int32, b String')"
     node1.query(
-        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000)"
+        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) SETTINGS hdfs_truncate_on_insert=1"
     )
     result = node1.query(f"SELECT count() FROM {table_function}")
     assert int(result) == 5000000
@@ -459,7 +467,7 @@ def test_read_table_with_default(started_cluster):
 
 def test_schema_inference(started_cluster):
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/native', 'Native', 'a Int32, b String') SELECT number, randomString(100) FROM numbers(5000000)"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/native', 'Native', 'a Int32, b String') SELECT number, randomString(100) FROM numbers(5000000) SETTINGS hdfs_truncate_on_insert=1"
     )
 
     result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/native', 'Native')")
@@ -512,6 +520,7 @@ def test_hdfs_directory_not_exist(started_cluster):
     assert "" == node1.query(
         "select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1"
     )
+    node1.query("DROP TABLE HDFSStorageWithNotExistDir")
 
 
 def test_overwrite(started_cluster):
@@ -531,6 +540,7 @@ def test_overwrite(started_cluster):
 
     result = node1.query(f"select count() from test_overwrite")
     assert int(result) == 10
+    node1.query(f"DROP TABLE test_overwrite")
 
 
 def test_multiple_inserts(started_cluster):
@@ -567,6 +577,7 @@ def test_multiple_inserts(started_cluster):
 
     result = node1.query(f"select count() from test_multiple_inserts")
     assert int(result) == 60
+    node1.query(f"DROP TABLE test_multiple_inserts")
 
 
 def test_format_detection(started_cluster):
@@ -580,10 +591,10 @@ def test_format_detection(started_cluster):
 
 def test_schema_inference_with_globs(started_cluster):
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data1.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data1.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL SETTINGS hdfs_truncate_on_insert=1"
     )
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data2.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select 0"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data2.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select 0 SETTINGS hdfs_truncate_on_insert=1"
     )
 
     result = node1.query(
@@ -597,7 +608,7 @@ def test_schema_inference_with_globs(started_cluster):
     assert sorted(result.split()) == ["0", "\\N"]
 
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data3.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data3.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL SETTINGS hdfs_truncate_on_insert=1"
     )
 
     filename = "data{1,3}.jsoncompacteachrow"
@@ -609,7 +620,7 @@ def test_schema_inference_with_globs(started_cluster):
     assert "All attempts to extract table structure from files failed" in result
 
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data0.jsoncompacteachrow', 'TSV', 'x String') select '[123;]'"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data0.jsoncompacteachrow', 'TSV', 'x String') select '[123;]' SETTINGS hdfs_truncate_on_insert=1"
     )
 
     result = node1.query_and_get_error(
@@ -621,7 +632,7 @@ def test_schema_inference_with_globs(started_cluster):
 
 def test_insert_select_schema_inference(started_cluster):
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/test.native.zst') select toUInt64(1) as x"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/test.native.zst') select toUInt64(1) as x SETTINGS hdfs_truncate_on_insert=1"
     )
 
     result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/test.native.zst')")
@@ -664,7 +675,7 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')"
     )
-    node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
+    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "parquet_2"
@@ -672,7 +683,7 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
     )
-    node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
+    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "kek"
@@ -969,11 +980,11 @@ def test_read_subcolumns(started_cluster):
     node = started_cluster.instances["node1"]
 
     node.query(
-        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)"
+        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS hdfs_truncate_on_insert=1"
     )
 
     node.query(
-        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)"
+        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS hdfs_truncate_on_insert=1"
     )
 
     res = node.query(
@@ -1019,11 +1030,11 @@ def test_union_schema_inference_mode(started_cluster):
     node = started_cluster.instances["node1"]
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference1.jsonl') select 1 as a"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference1.jsonl') select 1 as a SETTINGS hdfs_truncate_on_insert=1"
     )
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference2.jsonl') select 2 as b"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference2.jsonl') select 2 as b SETTINGS hdfs_truncate_on_insert=1"
     )
 
     node.query("system drop schema cache for hdfs")
@@ -1055,7 +1066,7 @@ def test_union_schema_inference_mode(started_cluster):
     )
     assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
     node.query(
-        f"insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference3.jsonl', TSV) select 'Error'"
+        f"insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference3.jsonl', TSV) select 'Error' SETTINGS hdfs_truncate_on_insert=1"
     )
 
     error = node.query_and_get_error(
@@ -1068,11 +1079,11 @@ def test_format_detection(started_cluster):
     node = started_cluster.instances["node1"]
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection0', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(0)"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection0', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(0) SETTINGS hdfs_truncate_on_insert=1"
     )
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection1', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(10)"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection1', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(10) SETTINGS hdfs_truncate_on_insert=1"
     )
 
     expected_desc_result = node.query(
@@ -1136,7 +1147,7 @@ def test_write_to_globbed_partitioned_path(started_cluster):
     node = started_cluster.instances["node1"]
 
     error = node.query_and_get_error(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_data_*_{_partition_id}.csv') partition by 42 select 42"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_data_*_{_partition_id}.csv') partition by 42 select 42 SETTINGS hdfs_truncate_on_insert=1"
     )
 
     assert "DATABASE_ACCESS_DENIED" in error

From d080f863ea41420ecbb1c5d65769d74e21a46aba Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 5 Aug 2024 17:07:17 +0200
Subject: [PATCH 154/265] fix black

---
 tests/integration/test_storage_hdfs/test.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 9ee8ac4cdfd..856715f28c8 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -675,7 +675,9 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')"
     )
-    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
+    node1.query(
+        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
+    )
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "parquet_2"
@@ -683,7 +685,9 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
     )
-    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
+    node1.query(
+        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
+    )
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "kek"

From d8aa219783b6715f5424772bf25c092a26be5e2d Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 5 Aug 2024 17:43:29 +0200
Subject: [PATCH 155/265] fix build

---
 src/Core/SettingsChangesHistory.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 03e0f4f0dc8..2080e8fbf0d 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -77,7 +77,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
         {
             {"input_format_json_max_depth", 1000000, 1000, "It was unlimited in previous versions, but that was unsafe."},
             {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
-            {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."}
+            {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
             {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
         }
     },

From 9d4b0d2579ef199518707ecb6a2258951466d041 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 5 Aug 2024 18:31:16 +0000
Subject: [PATCH 156/265] set max_threads

---
 .../queries/0_stateless/03161_lightweight_delete_projection.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 28e5612a529..618f3ac0cb8 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -1,5 +1,5 @@
 
-SET lightweight_deletes_sync = 2, alter_sync = 2;
+SET max_threads = 1, lightweight_deletes_sync = 2, alter_sync = 2;
 
 DROP TABLE IF EXISTS users_compact;
 

From e6f566e49d78080a954ca992d8d5e0f5fb1bb1e2 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 6 Aug 2024 13:23:12 +0800
Subject: [PATCH 157/265] Small refactors in ORC output format

---
 .../Formats/Impl/ORCBlockOutputFormat.cpp     | 99 +++++++------------
 1 file changed, 33 insertions(+), 66 deletions(-)

diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 6f543a05fba..bd89ae0fa86 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -12,6 +12,7 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnMap.h>
+#include <Columns/ColumnsCommon.h>
 
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDateTime64.h>
@@ -203,25 +204,15 @@ template <typename NumberType, typename NumberVectorBatch, typename ConvertFunc>
 void ORCBlockOutputFormat::writeNumbers(
         orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
-        const PaddedPODArray<UInt8> * null_bytemap,
+        const PaddedPODArray<UInt8> * /*null_bytemap*/,
         ConvertFunc convert)
 {
     NumberVectorBatch & number_orc_column = dynamic_cast<NumberVectorBatch &>(orc_column);
     const auto & number_column = assert_cast<const ColumnVector<NumberType> &>(column);
-    number_orc_column.resize(number_column.size());
 
+    number_orc_column.data.resize(number_column.size());
     for (size_t i = 0; i != number_column.size(); ++i)
-    {
-        if (null_bytemap && (*null_bytemap)[i])
-        {
-            number_orc_column.notNull[i] = 0;
-            continue;
-        }
-
-        number_orc_column.notNull[i] = 1;
         number_orc_column.data[i] = convert(number_column.getElement(i));
-    }
-    number_orc_column.numElements = number_column.size();
 }
 
 template <typename Decimal, typename DecimalVectorBatch, typename ConvertFunc>
@@ -229,7 +220,7 @@ void ORCBlockOutputFormat::writeDecimals(
         orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
         DataTypePtr & type,
-        const PaddedPODArray<UInt8> * null_bytemap,
+        const PaddedPODArray<UInt8> * /*null_bytemap*/,
         ConvertFunc convert)
 {
     DecimalVectorBatch & decimal_orc_column = dynamic_cast<DecimalVectorBatch &>(orc_column);
@@ -238,71 +229,49 @@ void ORCBlockOutputFormat::writeDecimals(
     decimal_orc_column.precision = decimal_type->getPrecision();
     decimal_orc_column.scale = decimal_type->getScale();
     decimal_orc_column.resize(decimal_column.size());
-    for (size_t i = 0; i != decimal_column.size(); ++i)
-    {
-        if (null_bytemap && (*null_bytemap)[i])
-        {
-            decimal_orc_column.notNull[i] = 0;
-            continue;
-        }
 
-        decimal_orc_column.notNull[i] = 1;
+    decimal_orc_column.values.resize(decimal_column.size());
+    for (size_t i = 0; i != decimal_column.size(); ++i)
         decimal_orc_column.values[i] = convert(decimal_column.getElement(i).value);
-    }
-    decimal_orc_column.numElements = decimal_column.size();
 }
 
 template <typename ColumnType>
 void ORCBlockOutputFormat::writeStrings(
         orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
-        const PaddedPODArray<UInt8> * null_bytemap)
+        const PaddedPODArray<UInt8> * /*null_bytemap*/)
 {
     orc::StringVectorBatch & string_orc_column = dynamic_cast<orc::StringVectorBatch &>(orc_column);
     const auto & string_column = assert_cast<const ColumnType &>(column);
-    string_orc_column.resize(string_column.size());
 
+    string_orc_column.data.resize(string_column.size());
+    string_orc_column.length.resize(string_column.size());
     for (size_t i = 0; i != string_column.size(); ++i)
     {
-        if (null_bytemap && (*null_bytemap)[i])
-        {
-            string_orc_column.notNull[i] = 0;
-            continue;
-        }
-
-        string_orc_column.notNull[i] = 1;
         const std::string_view & string = string_column.getDataAt(i).toView();
         string_orc_column.data[i] = const_cast<char *>(string.data());
         string_orc_column.length[i] = string.size();
     }
-    string_orc_column.numElements = string_column.size();
 }
 
 template <typename ColumnType, typename GetSecondsFunc, typename GetNanosecondsFunc>
 void ORCBlockOutputFormat::writeDateTimes(
         orc::ColumnVectorBatch & orc_column,
         const IColumn & column,
-        const PaddedPODArray<UInt8> * null_bytemap,
+        const PaddedPODArray<UInt8> * /*null_bytemap*/,
         GetSecondsFunc get_seconds,
         GetNanosecondsFunc get_nanoseconds)
 {
     orc::TimestampVectorBatch & timestamp_orc_column = dynamic_cast<orc::TimestampVectorBatch &>(orc_column);
     const auto & timestamp_column = assert_cast<const ColumnType &>(column);
-    timestamp_orc_column.resize(timestamp_column.size());
 
+    timestamp_orc_column.data.resize(timestamp_column.size());
+    timestamp_orc_column.nanoseconds.resize(timestamp_column.size());
     for (size_t i = 0; i != timestamp_column.size(); ++i)
     {
-        if (null_bytemap && (*null_bytemap)[i])
-        {
-            timestamp_orc_column.notNull[i] = 0;
-            continue;
-        }
-
-        timestamp_orc_column.notNull[i] = 1;
         timestamp_orc_column.data[i] = static_cast<int64_t>(get_seconds(timestamp_column.getElement(i)));
         timestamp_orc_column.nanoseconds[i] = static_cast<int64_t>(get_nanoseconds(timestamp_column.getElement(i)));
     }
-    timestamp_orc_column.numElements = timestamp_column.size();
 }
 
 void ORCBlockOutputFormat::writeColumn(
@@ -311,9 +280,19 @@ void ORCBlockOutputFormat::writeColumn(
     DataTypePtr & type,
     const PaddedPODArray<UInt8> * null_bytemap)
 {
-    orc_column.notNull.resize(column.size());
+    orc_column.numElements = column.size();
     if (null_bytemap)
-        orc_column.hasNulls = true;
+    {
+        orc_column.hasNulls = !memoryIsZero(null_bytemap->data(), 0, null_bytemap->size());
+        if (orc_column.hasNulls)
+        {
+            orc_column.notNull.resize(null_bytemap->size());
+            for (size_t i = 0; i < null_bytemap->size(); ++i)
+                orc_column.notNull[i] = !(*null_bytemap)[i];
+        }
+    }
+    else
+        orc_column.hasNulls = false;
 
     /// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to
     /// make the ORC library calculate min and max correctly.
@@ -471,6 +450,7 @@ void ORCBlockOutputFormat::writeColumn(
         }
         case TypeIndex::Nullable:
         {
+            chassert(!null_bytemap);
             const auto & nullable_column = assert_cast<const ColumnNullable &>(column);
             const PaddedPODArray<UInt8> & new_null_bytemap = assert_cast<const ColumnVector<UInt8> &>(*nullable_column.getNullMapColumnPtr()).getData();
             auto nested_type = removeNullable(type);
@@ -485,19 +465,15 @@ void ORCBlockOutputFormat::writeColumn(
             const ColumnArray::Offsets & offsets = list_column.getOffsets();
 
             size_t column_size = list_column.size();
-            list_orc_column.resize(column_size);
+            list_orc_column.offsets.resize(column_size + 1);
 
             /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
             list_orc_column.offsets[0] = 0;
             for (size_t i = 0; i != column_size; ++i)
-            {
                 list_orc_column.offsets[i + 1] = offsets[i];
-                list_orc_column.notNull[i] = 1;
-            }
 
             orc::ColumnVectorBatch & nested_orc_column = *list_orc_column.elements;
-            writeColumn(nested_orc_column, list_column.getData(), nested_type, null_bytemap);
-            list_orc_column.numElements = column_size;
+            writeColumn(nested_orc_column, list_column.getData(), nested_type, nullptr);
             break;
         }
         case TypeIndex::Tuple:
@@ -505,10 +481,8 @@ void ORCBlockOutputFormat::writeColumn(
             orc::StructVectorBatch & struct_orc_column = dynamic_cast<orc::StructVectorBatch &>(orc_column);
             const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
             auto nested_types = assert_cast<const DataTypeTuple *>(type.get())->getElements();
-            for (size_t i = 0; i != tuple_column.size(); ++i)
-                struct_orc_column.notNull[i] = 1;
             for (size_t i = 0; i != tuple_column.tupleSize(); ++i)
-                writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], null_bytemap);
+                writeColumn(*struct_orc_column.fields[i], tuple_column.getColumn(i), nested_types[i], nullptr);
             break;
         }
         case TypeIndex::Map:
@@ -520,25 +494,21 @@ void ORCBlockOutputFormat::writeColumn(
 
             size_t column_size = list_column.size();
 
-            map_orc_column.resize(list_column.size());
+            map_orc_column.offsets.resize(column_size + 1);
             /// The length of list i in ListVectorBatch is offsets[i+1] - offsets[i].
             map_orc_column.offsets[0] = 0;
             for (size_t i = 0; i != column_size; ++i)
-            {
                 map_orc_column.offsets[i + 1] = offsets[i];
-                map_orc_column.notNull[i] = 1;
-            }
+
             const auto nested_columns = assert_cast<const ColumnTuple *>(list_column.getDataPtr().get())->getColumns();
 
             orc::ColumnVectorBatch & keys_orc_column = *map_orc_column.keys;
             auto key_type = map_type.getKeyType();
-            writeColumn(keys_orc_column, *nested_columns[0], key_type, null_bytemap);
+            writeColumn(keys_orc_column, *nested_columns[0], key_type, nullptr);
 
             orc::ColumnVectorBatch & values_orc_column = *map_orc_column.elements;
             auto value_type = map_type.getValueType();
-            writeColumn(values_orc_column, *nested_columns[1], value_type, null_bytemap);
-
-            map_orc_column.numElements = column_size;
+            writeColumn(values_orc_column, *nested_columns[1], value_type, nullptr);
             break;
         }
         default:
@@ -575,10 +545,7 @@ void ORCBlockOutputFormat::consume(Chunk chunk)
     size_t columns_num = chunk.getNumColumns();
     size_t rows_num = chunk.getNumRows();
 
-    /// getMaxColumnSize is needed to write arrays.
-    /// The size of the batch must be no less than total amount of array elements
-    /// and no less than the number of rows (ORC writes a null bit for every row).
-    std::unique_ptr<orc::ColumnVectorBatch> batch = writer->createRowBatch(getMaxColumnSize(chunk));
+    std::unique_ptr<orc::ColumnVectorBatch> batch = writer->createRowBatch(chunk.getNumRows());
     orc::StructVectorBatch & root = dynamic_cast<orc::StructVectorBatch &>(*batch);
 
     auto columns = chunk.detachColumns();

From aa3d8086c32ce2b5a90fbe4788579cae970ec32f Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 6 Aug 2024 12:30:39 +0200
Subject: [PATCH 158/265] fix integration tests

---
 .../test_storage_azure_blob_storage/test.py   | 37 ++++++++++++-------
 .../test_cluster.py                           | 21 +++++++----
 tests/integration/test_storage_hdfs/test.py   | 10 ++---
 3 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 15a1f6db2c1..092c124855c 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -635,7 +635,8 @@ def test_simple_write_named_collection_2_table_function(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a') SETTINGS azure_truncate_on_insert=1",
+        f" container='cont', blob_path='test_simple_write_named_2_tf.csv', format='CSV', structure='key UInt64, data String') VALUES (1, 'a')",
+        settings={"azure_truncate_on_insert": 1},
     )
     print(get_azure_file_content("test_simple_write_named_2_tf.csv", port))
     assert get_azure_file_content("test_simple_write_named_2_tf.csv", port) == '1,"a"\n'
@@ -658,7 +659,8 @@ def test_put_get_with_globs_tf(cluster):
             azure_query(
                 node,
                 f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-                f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values} SETTINGS azure_truncate_on_insert=1",
+                f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+                settings={"azure_truncate_on_insert": 1},
             )
     query = (
         f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, "
@@ -710,9 +712,9 @@ def test_schema_inference_from_globs_tf(cluster):
 
             query = (
                 f"insert into table function azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
-                f"container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values} SETTINGS azure_truncate_on_insert=1"
+                f"container='cont', blob_path='{path}', format='CSVWithNames', structure='{table_format}') VALUES {values}"
             )
-            azure_query(node, query)
+            azure_query(node, query, settings={"azure_truncate_on_insert": 1})
 
     query = (
         f"select sum(column1), sum(column2), sum(column3), min(_file), max(_path) from azureBlobStorage(azure_conf2, "
@@ -738,7 +740,8 @@ def test_partition_by_tf(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', "
         f"'cont', '{filename}', 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', "
-        f"'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values} SETTINGS azure_truncate_on_insert=1",
+        f"'CSV', 'auto', '{table_format}') PARTITION BY {partition_by} VALUES {values}",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     assert "1,2,3\n" == get_azure_file_content("test_partition_tf_3.csv", port)
@@ -757,7 +760,8 @@ def test_filter_using_file(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', 'cont', '{filename}', "
         f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', "
-        f"'{table_format}') PARTITION BY {partition_by} VALUES {values} SETTINGS azure_truncate_on_insert=1",
+        f"'{table_format}') PARTITION BY {partition_by} VALUES {values}",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
@@ -855,7 +859,8 @@ def test_function_signatures(cluster):
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)  SETTINGS azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_signature.csv', '{account_name}', '{account_key}', 'CSV', 'auto', 'column1 UInt32') VALUES (1),(2),(3)",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     # " - connection_string, container_name, blobpath\n"
@@ -969,12 +974,14 @@ def test_union_schema_inference_mode(cluster):
     account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference1.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'a UInt32') VALUES (1)  SETTINGS azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference1.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'a UInt32') VALUES (1)",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'b UInt32') VALUES (2) SETTINGS azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference2.jsonl', '{account_name}', '{account_key}', 'JSONEachRow', 'auto', 'b UInt32') VALUES (2)",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     node.query("system drop schema cache for azure")
@@ -1011,7 +1018,8 @@ def test_union_schema_inference_mode(cluster):
     assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
     azure_query(
         node,
-        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference3.jsonl', '{account_name}', '{account_key}', 'CSV', 'auto', 's String') VALUES ('Error') SETTINGS azure_truncate_on_insert=1",
+        f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference3.jsonl', '{account_name}', '{account_key}', 'CSV', 'auto', 's String') VALUES ('Error')",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     error = azure_query(
@@ -1505,7 +1513,8 @@ def test_hive_partitioning_with_one_parameter(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values} SETTINGS azure_truncate_on_insert=1",
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
@@ -1542,7 +1551,8 @@ def test_hive_partitioning_with_two_parameters(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2} SETTINGS azure_truncate_on_insert=1",
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
@@ -1588,7 +1598,8 @@ def test_hive_partitioning_without_setting(cluster):
     azure_query(
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2} SETTINGS azure_truncate_on_insert=1",
+        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     query = (
diff --git a/tests/integration/test_storage_azure_blob_storage/test_cluster.py b/tests/integration/test_storage_azure_blob_storage/test_cluster.py
index 4d63016cf9a..04baf007c69 100644
--- a/tests/integration/test_storage_azure_blob_storage/test_cluster.py
+++ b/tests/integration/test_storage_azure_blob_storage/test_cluster.py
@@ -71,7 +71,8 @@ def test_select_all(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_select_all.csv', 'devstoreaccount1',"
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', 'key UInt64, data String') "
-        f"VALUES (1, 'a'), (2, 'b') SETTINGS azure_truncate_on_insert=1",
+        f"VALUES (1, 'a'), (2, 'b')",
+        settings={"azure_truncate_on_insert": 1},
     )
     print(get_azure_file_content("test_cluster_select_all.csv", port))
 
@@ -100,7 +101,8 @@ def test_count(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_count.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', "
-        f"'auto', 'key UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
+        f"'auto', 'key UInt64') VALUES (1), (2)",
+        settings={"azure_truncate_on_insert": 1},
     )
     print(get_azure_file_content("test_cluster_count.csv", port))
 
@@ -128,7 +130,8 @@ def test_union_all(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_parquet_union_all', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'Parquet', "
-        f"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd') SETTINGS azure_truncate_on_insert=1",
+        f"'auto', 'a Int32, b String') VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     pure_azure = azure_query(
@@ -179,7 +182,8 @@ def test_skip_unavailable_shards(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_skip_unavailable.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
-        f"'auto', 'a UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
+        f"'auto', 'a UInt64') VALUES (1), (2)",
+        settings={"azure_truncate_on_insert": 1},
     )
     result = azure_query(
         node,
@@ -199,7 +203,8 @@ def test_unset_skip_unavailable_shards(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_unset_skip_unavailable.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
-        f"'auto', 'a UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
+        f"'auto', 'a UInt64') VALUES (1), (2)",
+        settings={"azure_truncate_on_insert": 1},
     )
     result = azure_query(
         node,
@@ -217,7 +222,8 @@ def test_cluster_with_named_collection(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', 'test_cluster_with_named_collection.csv', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', "
-        f"'auto', 'a UInt64') VALUES (1), (2) SETTINGS azure_truncate_on_insert=1",
+        f"'auto', 'a UInt64') VALUES (1), (2)",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     pure_azure = azure_query(
@@ -248,7 +254,8 @@ def test_partition_parallel_reading_with_cluster(cluster):
         node,
         f"INSERT INTO TABLE FUNCTION azureBlobStorage('{storage_account_url}', 'cont', '{filename}', 'devstoreaccount1', "
         f"'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto', '{table_format}') "
-        f"PARTITION BY {partition_by} VALUES {values} SETTINGS azure_truncate_on_insert=1",
+        f"PARTITION BY {partition_by} VALUES {values}",
+        settings={"azure_truncate_on_insert": 1},
     )
 
     assert "1,2,3\n" == get_azure_file_content("test_tf_3.csv", port)
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 856715f28c8..3fef6bc46cf 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -341,7 +341,7 @@ def test_virtual_columns(started_cluster):
         )
         == expected
     )
-    node1.query("DROP TABLE virual_cols")
+    node1.query("DROP TABLE virtual_cols")
 
 
 def test_read_files_with_spaces(started_cluster):
@@ -675,9 +675,7 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')"
     )
-    node1.query(
-        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
-    )
+    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "parquet_2"
@@ -685,9 +683,7 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
     )
-    node1.query(
-        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
-    )
+    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "kek"

From 27cdbb54d73f8f4b82d63850c1e6f6fd5669646e Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 6 Aug 2024 13:10:03 +0200
Subject: [PATCH 159/265] fix black

---
 tests/integration/test_storage_hdfs/test.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 3fef6bc46cf..77921b885b0 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -675,7 +675,9 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')"
     )
-    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
+    node1.query(
+        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
+    )
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "parquet_2"
@@ -683,7 +685,9 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
     )
-    node1.query(f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1")
+    node1.query(
+        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
+    )
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "kek"

From 72ead6e8432daa1e643a5b0cc8559a4ff4d9efd0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 6 Aug 2024 14:56:42 +0000
Subject: [PATCH 160/265] Cleanup.

---
 src/Storages/IStorage.h                       |  6 ++--
 src/Storages/MergeTree/MutateTask.cpp         | 34 +++++++++----------
 ...61_lightweight_delete_projection.reference |  4 +--
 .../03161_lightweight_delete_projection.sql   |  4 +--
 4 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index d2cdc5af34f..0477a08b0d2 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -259,12 +259,12 @@ public:
     /// Return true if there is at least one part containing lightweight deleted mask.
     virtual bool hasLightweightDeletedMask() const { return false; }
 
-    /// Return true if storage has any projection.
-    virtual bool hasProjection() const { return false; }
-
     /// Return true if storage can execute lightweight delete mutations.
     virtual bool supportsLightweightDelete() const { return false; }
 
+    /// Return true if storage has any projection.
+    virtual bool hasProjection() const { return false; }
+
     /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete
     /// because those are internally translated into 'ALTER UDPATE' mutations.
     virtual bool supportsDelete() const { return false; }
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 8b5829eb058..3d9f49c9a7a 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -659,10 +659,8 @@ static NameSet collectFilesToSkip(
     const Block & updated_header,
     const std::set<MergeTreeIndexPtr> & indices_to_recalc,
     const String & mrk_extension,
-    const std::set<ProjectionDescriptionRawPtr> & projections_to_recalc,
-    const std::set<ColumnStatisticsPtr> & stats_to_recalc,
-    const StorageMetadataPtr & metadata_snapshot,
-    bool skip_all_projections)
+    const std::set<ProjectionDescriptionRawPtr> & projections_to_skip,
+    const std::set<ColumnStatisticsPtr> & stats_to_recalc)
 {
     NameSet files_to_skip = source_part->getFileNamesWithoutChecksums();
 
@@ -686,16 +684,8 @@ static NameSet collectFilesToSkip(
         }
     }
 
-    if (skip_all_projections)
-    {
-        for (const auto & projection : metadata_snapshot->getProjections())
-            files_to_skip.insert(projection.getDirectoryName());
-    }
-    else
-    {
-        for (const auto & projection : projections_to_recalc)
-            files_to_skip.insert(projection->getDirectoryName());
-    }
+    for (const auto & projection : projections_to_skip)
+        files_to_skip.insert(projection->getDirectoryName());
 
     for (const auto & stat : stats_to_recalc)
         files_to_skip.insert(stat->getFileName() + STATS_FILE_SUFFIX);
@@ -2325,6 +2315,9 @@ bool MutateTask::prepare()
             lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP
             || lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW;
 
+        std::set<ProjectionDescriptionRawPtr> projections_to_skip_container;
+        auto * projections_to_skip = &projections_to_skip_container;
+
         bool should_create_projections = !(lightweight_delete_mode && lightweight_delete_drops_projections);
         /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty.
         if (should_create_projections)
@@ -2333,6 +2326,13 @@ bool MutateTask::prepare()
                 ctx->source_part,
                 ctx->metadata_snapshot,
                 ctx->materialized_projections);
+
+            projections_to_skip = &ctx->projections_to_recalc;
+        }
+        else
+        {
+            for (const auto & projection : ctx->metadata_snapshot->getProjections())
+                projections_to_skip->insert(&projection);
         }
 
         ctx->stats_to_recalc = MutationHelpers::getStatisticsToRecalculate(ctx->metadata_snapshot, ctx->materialized_statistics);
@@ -2343,10 +2343,8 @@ bool MutateTask::prepare()
             ctx->updated_header,
             ctx->indices_to_recalc,
             ctx->mrk_extension,
-            ctx->projections_to_recalc,
-            ctx->stats_to_recalc,
-            ctx->metadata_snapshot,
-            !should_create_projections);
+            *projections_to_skip,
+            ctx->stats_to_recalc);
 
         ctx->files_to_rename = MutationHelpers::collectFilesForRenames(
             ctx->source_part,
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
index eef0c5a41b5..8edf541c2a0 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
@@ -40,7 +40,7 @@ all_3_3_0_4
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1) AND parent_name like 'all_3_3%';
 p1	all_3_3_0_4
 p2	all_3_3_0_4
 wide part
@@ -85,6 +85,6 @@ all_3_3_0_4
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1) AND parent_name like 'all_3_3%';
 p1	all_3_3_0_4
 p2	all_3_3_0_4
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 618f3ac0cb8..0b05326e2c1 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -67,7 +67,7 @@ WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_compact') AND (active = 1) AND parent_name like 'all_3_3%';
 
 -- { echoOff }
 
@@ -136,7 +136,7 @@ WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active =
 SELECT
     name, parent_name
 FROM system.projection_parts
-WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1);
+WHERE (database = currentDatabase()) AND (`table` = 'users_wide') AND (active = 1) AND parent_name like 'all_3_3%';
 
 -- { echoOff }
 

From 04438784e2178820537c65b66b5a8341f3d63b8d Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Tue, 6 Aug 2024 16:45:46 -0400
Subject: [PATCH 161/265] add a stateless test for `grant current grants`

---
 .../03215_grant_current_grants.reference      |  2 ++
 .../0_stateless/03215_grant_current_grants.sh | 26 +++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 tests/queries/0_stateless/03215_grant_current_grants.reference
 create mode 100755 tests/queries/0_stateless/03215_grant_current_grants.sh

diff --git a/tests/queries/0_stateless/03215_grant_current_grants.reference b/tests/queries/0_stateless/03215_grant_current_grants.reference
new file mode 100644
index 00000000000..e4f6850b806
--- /dev/null
+++ b/tests/queries/0_stateless/03215_grant_current_grants.reference
@@ -0,0 +1,2 @@
+GRANT SELECT, CREATE TABLE, CREATE VIEW ON default.*
+GRANT SELECT ON default.*
diff --git a/tests/queries/0_stateless/03215_grant_current_grants.sh b/tests/queries/0_stateless/03215_grant_current_grants.sh
new file mode 100755
index 00000000000..68af4a62bba
--- /dev/null
+++ b/tests/queries/0_stateless/03215_grant_current_grants.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+user1="user03215_1_${CLICKHOUSE_DATABASE}_$RANDOM"
+user2="user03215_2_${CLICKHOUSE_DATABASE}_$RANDOM"
+user3="user03215_3_${CLICKHOUSE_DATABASE}_$RANDOM"
+db=${CLICKHOUSE_DATABASE}
+
+
+${CLICKHOUSE_CLIENT} --query "CREATE USER $user1, $user2, $user3;";
+${CLICKHOUSE_CLIENT} --query "GRANT SELECT, CREATE TABLE, CREATE VIEW ON $db.* TO $user1 WITH GRANT OPTION;";
+
+${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS ON $db.* TO $user2" --user $user1;
+${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS ON $db.* TO $user3" --user $user2;
+
+${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user2" | sed 's/ TO.*//';
+${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user3" | sed 's/ TO.*//';
+
+${CLICKHOUSE_CLIENT} --query "GRANT CURRENT GRANTS(SELECT ON $db.*) TO $user3" --user $user1;
+${CLICKHOUSE_CLIENT} --query "SHOW GRANTS FOR $user3" | sed 's/ TO.*//';
+
+${CLICKHOUSE_CLIENT} --query "DROP USER IF EXISTS $user1, $user2, $user3";

From 5ae5cd35b5b263d14bdd62aa5cbaa1e22219208a Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 6 Aug 2024 21:50:31 +0100
Subject: [PATCH 162/265] update

---
 base/poco/Net/include/Poco/Net/HTTPServerSession.h   |  4 ++--
 src/Server/HTTP/sendExceptionToHTTPClient.cpp        |  2 +-
 .../0_stateless/00408_http_keep_alive.reference      |  6 +++---
 tests/queries/0_stateless/00408_http_keep_alive.sh   |  7 ++++---
 tests/queries/0_stateless/00501_http_head.re         | 12 ++++++++++++
 tests/queries/0_stateless/00501_http_head.reference  |  4 ++--
 tests/queries/0_stateless/00501_http_head.sh         |  5 +++--
 7 files changed, 27 insertions(+), 13 deletions(-)
 create mode 100644 tests/queries/0_stateless/00501_http_head.re

diff --git a/base/poco/Net/include/Poco/Net/HTTPServerSession.h b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
index 93f31012336..54e7f2c8c50 100644
--- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
@@ -57,10 +57,10 @@ namespace Net
         /// Returns the server's address.
 
         void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
- 
+
         size_t getKeepAliveTimeout() const { return _keepAliveTimeout.totalSeconds(); }
 
-        size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; } 
+        size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; }
 
     private:
         bool _firstRequest;
diff --git a/src/Server/HTTP/sendExceptionToHTTPClient.cpp b/src/Server/HTTP/sendExceptionToHTTPClient.cpp
index 022a763a9a2..658b7a4707a 100644
--- a/src/Server/HTTP/sendExceptionToHTTPClient.cpp
+++ b/src/Server/HTTP/sendExceptionToHTTPClient.cpp
@@ -29,7 +29,7 @@ void sendExceptionToHTTPClient(
     if (!out)
     {
         /// If nothing was sent yet.
-        WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT};
+        WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD};
 
         out_for_message.writeln(exception_message);
         out_for_message.finalize();
diff --git a/tests/queries/0_stateless/00408_http_keep_alive.reference b/tests/queries/0_stateless/00408_http_keep_alive.reference
index d5d7dacce9e..5402036bfd7 100644
--- a/tests/queries/0_stateless/00408_http_keep_alive.reference
+++ b/tests/queries/0_stateless/00408_http_keep_alive.reference
@@ -1,6 +1,6 @@
 < Connection: Keep-Alive
-< Keep-Alive: timeout=10, max=10000
+< Keep-Alive: timeout=10, max=?
 < Connection: Keep-Alive
-< Keep-Alive: timeout=10, max=10000
+< Keep-Alive: timeout=10, max=?
 < Connection: Keep-Alive
-< Keep-Alive: timeout=10, max=10000
+< Keep-Alive: timeout=10, max=?
diff --git a/tests/queries/0_stateless/00408_http_keep_alive.sh b/tests/queries/0_stateless/00408_http_keep_alive.sh
index 4bd0e494eb8..4a1cb4ed712 100755
--- a/tests/queries/0_stateless/00408_http_keep_alive.sh
+++ b/tests/queries/0_stateless/00408_http_keep_alive.sh
@@ -6,9 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"
 
-${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< "SELECT 1" 2>&1 | perl -lnE 'print if /Keep-Alive/';
-${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< " error here " 2>&1 | perl -lnE 'print if /Keep-Alive/';
-${CLICKHOUSE_CURL} -vsS "${URL}"ping  2>&1 | perl -lnE 'print if /Keep-Alive/';
+# the sed command here replaces the real number of left requests with a question mark, because it can vary and we don't really have control over it
+${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< "SELECT 1" 2>&1 | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -i 'keep-alive';
+${CLICKHOUSE_CURL} -vsS "${URL}" --data-binary @- <<< " error here " 2>&1 | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -i 'keep-alive';
+${CLICKHOUSE_CURL} -vsS "${URL}"ping  2>&1 | perl -lnE 'print if /Keep-Alive/' | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -i 'keep-alive';
 
 # no keep-alive:
 ${CLICKHOUSE_CURL} -vsS "${URL}"404/not/found/ 2>&1 | perl -lnE 'print if /Keep-Alive/';
diff --git a/tests/queries/0_stateless/00501_http_head.re b/tests/queries/0_stateless/00501_http_head.re
new file mode 100644
index 00000000000..807bcd4922e
--- /dev/null
+++ b/tests/queries/0_stateless/00501_http_head.re
@@ -0,0 +1,12 @@
+HTTP/1.1 200 OK
+Connection: Keep-Alive
+Content-Type: text/tab-separated-values; charset=UTF-8
+Transfer-Encoding: chunked
+Keep-Alive: timeout=10, max=?
+
+HTTP/1.1 200 OK
+Connection: Keep-Alive
+Content-Type: text/tab-separated-values; charset=UTF-8
+Transfer-Encoding: chunked
+Keep-Alive: timeout=10, max=?
+
diff --git a/tests/queries/0_stateless/00501_http_head.reference b/tests/queries/0_stateless/00501_http_head.reference
index db82132b145..807bcd4922e 100644
--- a/tests/queries/0_stateless/00501_http_head.reference
+++ b/tests/queries/0_stateless/00501_http_head.reference
@@ -2,11 +2,11 @@ HTTP/1.1 200 OK
 Connection: Keep-Alive
 Content-Type: text/tab-separated-values; charset=UTF-8
 Transfer-Encoding: chunked
-Keep-Alive: timeout=10, max=10000
+Keep-Alive: timeout=10, max=?
 
 HTTP/1.1 200 OK
 Connection: Keep-Alive
 Content-Type: text/tab-separated-values; charset=UTF-8
 Transfer-Encoding: chunked
-Keep-Alive: timeout=10, max=10000
+Keep-Alive: timeout=10, max=?
 
diff --git a/tests/queries/0_stateless/00501_http_head.sh b/tests/queries/0_stateless/00501_http_head.sh
index 60283f26833..30da64c31f0 100755
--- a/tests/queries/0_stateless/00501_http_head.sh
+++ b/tests/queries/0_stateless/00501_http_head.sh
@@ -4,8 +4,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=SELECT%201";
-  ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=select+*+from+system.numbers+limit+1000000" ) | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" | grep -v "X-ClickHouse-Format:" | grep -v "X-ClickHouse-Timezone:"
+# the sed command here replaces the real number of left requests with a question mark, because it can vary and we don't really have control over it
+( ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=SELECT%201" | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I';
+  ${CLICKHOUSE_CURL} -s --head "${CLICKHOUSE_URL}&query=select+*+from+system.numbers+limit+1000000" ) | sed -r 's/(keep-alive: timeout=10, max=)[0-9]+/\1?/I' | grep -v "Date:" | grep -v "X-ClickHouse-Server-Display-Name:" | grep -v "X-ClickHouse-Query-Id:" | grep -v "X-ClickHouse-Format:" | grep -v "X-ClickHouse-Timezone:"
 
 if [[ $(${CLICKHOUSE_CURL} -sS -X POST -I "${CLICKHOUSE_URL}&query=SELECT+1" | grep -c '411 Length Required') -ne 1 ]]; then
     echo FAIL

From 1f5c4101b2d74d7ccf798621083fb536bf35de18 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 6 Aug 2024 21:54:15 +0100
Subject: [PATCH 163/265] rm redundant file

---
 tests/queries/0_stateless/00501_http_head.re | 12 ------------
 1 file changed, 12 deletions(-)
 delete mode 100644 tests/queries/0_stateless/00501_http_head.re

diff --git a/tests/queries/0_stateless/00501_http_head.re b/tests/queries/0_stateless/00501_http_head.re
deleted file mode 100644
index 807bcd4922e..00000000000
--- a/tests/queries/0_stateless/00501_http_head.re
+++ /dev/null
@@ -1,12 +0,0 @@
-HTTP/1.1 200 OK
-Connection: Keep-Alive
-Content-Type: text/tab-separated-values; charset=UTF-8
-Transfer-Encoding: chunked
-Keep-Alive: timeout=10, max=?
-
-HTTP/1.1 200 OK
-Connection: Keep-Alive
-Content-Type: text/tab-separated-values; charset=UTF-8
-Transfer-Encoding: chunked
-Keep-Alive: timeout=10, max=?
-

From e4134f5a51a1ad6d46c60337b9a3b5f8695d8020 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 7 Aug 2024 09:16:19 +0000
Subject: [PATCH 164/265] catch exception in destructor of `LocalFileHolder`

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Storages/Cache/ExternalDataSourceCache.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp
index cffb1dc9ca3..8c778fd511a 100644
--- a/src/Storages/Cache/ExternalDataSourceCache.cpp
+++ b/src/Storages/Cache/ExternalDataSourceCache.cpp
@@ -57,8 +57,15 @@ LocalFileHolder::~LocalFileHolder()
 {
     if (original_readbuffer)
     {
-        assert_cast<SeekableReadBuffer *>(original_readbuffer.get())->seek(0, SEEK_SET);
-        file_cache_controller->value().startBackgroundDownload(std::move(original_readbuffer), *thread_pool);
+        try
+        {
+            assert_cast<SeekableReadBuffer *>(original_readbuffer.get())->seek(0, SEEK_SET);
+            file_cache_controller->value().startBackgroundDownload(std::move(original_readbuffer), *thread_pool);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(getLogger("LocalFileHolder"), "Exception during destructor of LocalFileHolder.");
+        }
     }
 }
 

From 49871bacc1d56fb82b78c70dbfc92d52003e2e99 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 7 Aug 2024 12:37:39 +0100
Subject: [PATCH 165/265] fix test

---
 .../poco/Net/include/Poco/Net/HTTPServerSession.h |  1 -
 base/poco/Net/src/HTTPServerSession.cpp           |  1 -
 tests/integration/test_server_keep_alive/test.py  | 15 ++++++++++++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/base/poco/Net/include/Poco/Net/HTTPServerSession.h b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
index 54e7f2c8c50..b0659ca405c 100644
--- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h
+++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h
@@ -66,7 +66,6 @@ namespace Net
         bool _firstRequest;
         Poco::Timespan _keepAliveTimeout;
         int _maxKeepAliveRequests;
-        HTTPServerParams::Ptr _params;
     };
 
 
diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp
index 3093f215952..8eec3e14872 100644
--- a/base/poco/Net/src/HTTPServerSession.cpp
+++ b/base/poco/Net/src/HTTPServerSession.cpp
@@ -24,7 +24,6 @@ HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerPara
     , _firstRequest(true)
     , _keepAliveTimeout(pParams->getKeepAliveTimeout())
     , _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
-    , _params(pParams)
 {
 	setTimeout(pParams->getTimeout());
 }
diff --git a/tests/integration/test_server_keep_alive/test.py b/tests/integration/test_server_keep_alive/test.py
index 96f08a37adb..e550319b6df 100644
--- a/tests/integration/test_server_keep_alive/test.py
+++ b/tests/integration/test_server_keep_alive/test.py
@@ -1,5 +1,6 @@
 import logging
 import pytest
+import random
 import requests
 
 from helpers.cluster import ClickHouseCluster
@@ -24,19 +25,27 @@ def test_max_keep_alive_requests_on_user_side(start_cluster):
     # In this test we have `keep_alive_timeout` set to one hour to never trigger connection reset by timeout, `max_keep_alive_requests` is set to 5.
     # We expect server to close connection after each 5 requests. We detect connection reset by change in src port.
     # So the first 5 requests should come from the same port, the following 5 requests should come from another port.
+
+    log_comments = []
+    for _ in range(10):
+        rand_id = random.randint(0, 1000000)
+        log_comment = f"test_requests_with_keep_alive_{rand_id}"
+        log_comments.append(log_comment)
+    log_comments = sorted(log_comments)
+
     session = requests.Session()
     for i in range(10):
         session.get(
-            f"http://{node.ip_address}:8123/?query=select%201&log_comment=test_requests_with_keep_alive_{i}"
+            f"http://{node.ip_address}:8123/?query=select%201&log_comment={log_comments[i]}"
         )
 
     ports = node.query(
-        """
+        f"""
         SYSTEM FLUSH LOGS;
 
         SELECT port
           FROM system.query_log
-         WHERE log_comment like 'test_requests_with_keep_alive_%' AND type = 'QueryFinish'
+         WHERE log_comment IN ({", ".join(f"'{comment}'" for comment in log_comments)}) AND type = 'QueryFinish'
       ORDER BY log_comment
         """
     ).split("\n")[:-1]

From 1082792950ca7b962c1288ab49bb8ff3ca855bbe Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Wed, 7 Aug 2024 20:21:50 +0100
Subject: [PATCH 166/265] fix test

---
 .../test_async_metrics_in_cgroup/test.py      | 98 +++++++++----------
 1 file changed, 45 insertions(+), 53 deletions(-)

diff --git a/tests/integration/test_async_metrics_in_cgroup/test.py b/tests/integration/test_async_metrics_in_cgroup/test.py
index 00951c95a0e..d9f2e3aaaed 100644
--- a/tests/integration/test_async_metrics_in_cgroup/test.py
+++ b/tests/integration/test_async_metrics_in_cgroup/test.py
@@ -1,11 +1,10 @@
 import pytest
-import subprocess
-import time
 
 from helpers.cluster import ClickHouseCluster
 
 cluster = ClickHouseCluster(__file__)
-node = cluster.add_instance("node")
+node1 = cluster.add_instance("node1", stay_alive=True)
+node2 = cluster.add_instance("node2", stay_alive=True)
 
 
 @pytest.fixture(scope="module")
@@ -17,61 +16,54 @@ def start_cluster():
         cluster.shutdown()
 
 
-def test_user_cpu_accounting(start_cluster):
-    if node.is_built_with_sanitizer():
-        pytest.skip("Disabled for sanitizers")
-
-    # check that our metrics sources actually exist
-    assert (
-        subprocess.Popen("test -f /sys/fs/cgroup/cpu.stat".split(" ")).wait() == 0
-        or subprocess.Popen(
-            "test -f /sys/fs/cgroup/cpuacct/cpuacct.stat".split(" ")
-        ).wait()
-        == 0
-    )
-
-    # first let's spawn some cpu-intensive process outside of the container and check that it doesn't accounted by ClickHouse server
-    proc = subprocess.Popen(
-        "openssl speed -multi 8".split(" "),
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
-    )
-
-    time.sleep(5)
-
-    metric = node.query(
-        """
-      SELECT max(value)
-        FROM (
-          SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
-            FROM system.asynchronous_metric_log
-           WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
-        GROUP BY t
-        )
-    """
-    ).strip("\n")
-
-    assert float(metric) < 2
-
-    proc.kill()
-
-    # then let's test that we will account cpu time spent by the server itself
+def run_cpu_intensive_task(node):
     node.query(
-        "SELECT cityHash64(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10",
+        "SELECT sum(*) FROM system.numbers_mt FORMAT Null SETTINGS max_execution_time=10",
         ignore_error=True,
     )
 
-    metric = node.query(
+
+def get_async_metric(node, metric):
+    node.query("SYSTEM FLUSH LOGS")
+    return node.query(
+        f"""
+        SELECT max(value)
+            FROM (
+            SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
+                FROM system.asynchronous_metric_log
+            WHERE event_time >= now() - 60 AND metric = '{metric}'
+            GROUP BY t
+            )
+        SETTINGS max_threads = 1
         """
-      SELECT max(value)
-        FROM (
-          SELECT toStartOfInterval(event_time, toIntervalSecond(1)) AS t, avg(value) AS value
-            FROM system.asynchronous_metric_log
-           WHERE event_time >= now() - 60 AND metric = 'OSUserTime'
-        GROUP BY t
-        )
-    """
     ).strip("\n")
 
+
+def test_user_cpu_accounting(start_cluster):
+    if node1.is_built_with_sanitizer():
+        pytest.skip("Disabled for sanitizers")
+
+    # run query on the other node, its usage shouldn't be accounted by node1
+    run_cpu_intensive_task(node2)
+
+    node1_cpu_time = get_async_metric(node1, "OSUserTime")
+    assert float(node1_cpu_time) < 2
+
+    # then let's test that we will account cpu time spent by the server itself
+    node2_cpu_time = get_async_metric(node2, "OSUserTime")
     # this check is really weak, but CI is tough place and we cannot guarantee that test process will get many cpu time
-    assert float(metric) > 1
+    assert float(node2_cpu_time) > 2
+
+
+def test_normalized_user_cpu(start_cluster):
+    if node1.is_built_with_sanitizer():
+        pytest.skip("Disabled for sanitizers")
+
+    # run query on the other node, its usage shouldn't be accounted by node1
+    run_cpu_intensive_task(node2)
+
+    node1_cpu_time = get_async_metric(node1, "OSUserTimeNormalized")
+    assert float(node1_cpu_time) < 1.01
+
+    node2_cpu_time = get_async_metric(node2, "OSUserTimeNormalized")
+    assert float(node2_cpu_time) < 1.01

From c6c0a44b93c382b384eb3ef83cf9da5102629de8 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Wed, 7 Aug 2024 23:57:19 +0200
Subject: [PATCH 167/265] fix flaky tests

---
 .../test_storage_azure_blob_storage/test.py   |  2 +-
 tests/integration/test_storage_hdfs/test.py   | 51 +++++++------------
 2 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 092c124855c..fbdc7f29f98 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1272,7 +1272,7 @@ def test_filtering_by_file_or_path(cluster):
     node.query("SYSTEM FLUSH LOGS")
 
     result = node.query(
-        f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish'"
+        f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query ilike '%select%azure%test_filter%' AND type='QueryFinish' ORDER BY event_time_microseconds DESC LIMIT 1"
     )
 
     assert int(result) == 1
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 77921b885b0..c52e99b800e 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -41,7 +41,6 @@ def test_read_write_storage(started_cluster):
     node1.query("insert into SimpleHDFSStorage values (1, 'Mark', 72.53)")
     assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n"
     assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n"
-    node1.query("drop table if exists SimpleHDFSStorage")
 
 
 def test_read_write_storage_with_globs(started_cluster):
@@ -95,11 +94,6 @@ def test_read_write_storage_with_globs(started_cluster):
         print(ex)
         assert "in readonly mode" in str(ex)
 
-    node1.query("DROP TABLE HDFSStorageWithRange")
-    node1.query("DROP TABLE HDFSStorageWithEnum")
-    node1.query("DROP TABLE HDFSStorageWithQuestionMark")
-    node1.query("DROP TABLE HDFSStorageWithAsterisk")
-
 
 def test_storage_with_multidirectory_glob(started_cluster):
     hdfs_api = started_cluster.hdfs_api
@@ -341,7 +335,6 @@ def test_virtual_columns(started_cluster):
         )
         == expected
     )
-    node1.query("DROP TABLE virtual_cols")
 
 
 def test_read_files_with_spaces(started_cluster):
@@ -363,7 +356,6 @@ def test_read_files_with_spaces(started_cluster):
     )
     assert node1.query("select * from test order by id") == "1\n2\n3\n"
     fs.delete(dir, recursive=True)
-    node1.query("DROP TABLE test")
 
 
 def test_truncate_table(started_cluster):
@@ -435,7 +427,7 @@ def test_seekable_formats(started_cluster):
         f"hdfs('hdfs://hdfs1:9000/parquet', 'Parquet', 'a Int32, b String')"
     )
     node1.query(
-        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000)"
     )
 
     result = node1.query(f"SELECT count() FROM {table_function}")
@@ -443,7 +435,7 @@ def test_seekable_formats(started_cluster):
 
     table_function = f"hdfs('hdfs://hdfs1:9000/orc', 'ORC', 'a Int32, b String')"
     node1.query(
-        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000) SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(5000000)"
     )
     result = node1.query(f"SELECT count() FROM {table_function}")
     assert int(result) == 5000000
@@ -467,7 +459,7 @@ def test_read_table_with_default(started_cluster):
 
 def test_schema_inference(started_cluster):
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/native', 'Native', 'a Int32, b String') SELECT number, randomString(100) FROM numbers(5000000) SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/native', 'Native', 'a Int32, b String') SELECT number, randomString(100) FROM numbers(5000000)"
     )
 
     result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/native', 'Native')")
@@ -520,7 +512,6 @@ def test_hdfs_directory_not_exist(started_cluster):
     assert "" == node1.query(
         "select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1"
     )
-    node1.query("DROP TABLE HDFSStorageWithNotExistDir")
 
 
 def test_overwrite(started_cluster):
@@ -540,7 +531,6 @@ def test_overwrite(started_cluster):
 
     result = node1.query(f"select count() from test_overwrite")
     assert int(result) == 10
-    node1.query(f"DROP TABLE test_overwrite")
 
 
 def test_multiple_inserts(started_cluster):
@@ -577,7 +567,6 @@ def test_multiple_inserts(started_cluster):
 
     result = node1.query(f"select count() from test_multiple_inserts")
     assert int(result) == 60
-    node1.query(f"DROP TABLE test_multiple_inserts")
 
 
 def test_format_detection(started_cluster):
@@ -591,10 +580,10 @@ def test_format_detection(started_cluster):
 
 def test_schema_inference_with_globs(started_cluster):
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data1.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data1.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL"
     )
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data2.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select 0 SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data2.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select 0"
     )
 
     result = node1.query(
@@ -608,7 +597,7 @@ def test_schema_inference_with_globs(started_cluster):
     assert sorted(result.split()) == ["0", "\\N"]
 
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data3.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data3.jsoncompacteachrow', 'JSONCompactEachRow', 'x Nullable(UInt32)') select NULL"
     )
 
     filename = "data{1,3}.jsoncompacteachrow"
@@ -620,7 +609,7 @@ def test_schema_inference_with_globs(started_cluster):
     assert "All attempts to extract table structure from files failed" in result
 
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/data0.jsoncompacteachrow', 'TSV', 'x String') select '[123;]' SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/data0.jsoncompacteachrow', 'TSV', 'x String') select '[123;]'"
     )
 
     result = node1.query_and_get_error(
@@ -632,7 +621,7 @@ def test_schema_inference_with_globs(started_cluster):
 
 def test_insert_select_schema_inference(started_cluster):
     node1.query(
-        f"insert into table function hdfs('hdfs://hdfs1:9000/test.native.zst') select toUInt64(1) as x SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into table function hdfs('hdfs://hdfs1:9000/test.native.zst') select toUInt64(1) as x"
     )
 
     result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/test.native.zst')")
@@ -675,9 +664,7 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_2', 'Parquet', 'a Int32, b String')"
     )
-    node1.query(
-        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
-    )
+    node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "parquet_2"
@@ -685,9 +672,7 @@ def test_virtual_columns_2(started_cluster):
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
     )
-    node1.query(
-        f"insert into table function {table_function} SELECT 1, 'kek' SETTINGS hdfs_truncate_on_insert=1"
-    )
+    node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
     assert result.strip() == "kek"
@@ -984,11 +969,11 @@ def test_read_subcolumns(started_cluster):
     node = started_cluster.instances["node1"]
 
     node.query(
-        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)"
     )
 
     node.query(
-        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3) SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into function hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)') select ((1, 2), 3)"
     )
 
     res = node.query(
@@ -1034,11 +1019,11 @@ def test_union_schema_inference_mode(started_cluster):
     node = started_cluster.instances["node1"]
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference1.jsonl') select 1 as a SETTINGS hdfs_truncate_on_insert=1"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference1.jsonl') select 1 as a"
     )
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference2.jsonl') select 2 as b SETTINGS hdfs_truncate_on_insert=1"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference2.jsonl') select 2 as b"
     )
 
     node.query("system drop schema cache for hdfs")
@@ -1070,7 +1055,7 @@ def test_union_schema_inference_mode(started_cluster):
     )
     assert result == "a\tNullable(Int64)\n" "b\tNullable(Int64)\n"
     node.query(
-        f"insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference3.jsonl', TSV) select 'Error' SETTINGS hdfs_truncate_on_insert=1"
+        f"insert into function hdfs('hdfs://hdfs1:9000/test_union_schema_inference3.jsonl', TSV) select 'Error'"
     )
 
     error = node.query_and_get_error(
@@ -1083,11 +1068,11 @@ def test_format_detection(started_cluster):
     node = started_cluster.instances["node1"]
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection0', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(0) SETTINGS hdfs_truncate_on_insert=1"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection0', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(0)"
     )
 
     node.query(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection1', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(10) SETTINGS hdfs_truncate_on_insert=1"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_format_detection1', JSONEachRow) select number as x, 'str_' || toString(number) as y from numbers(10)"
     )
 
     expected_desc_result = node.query(
@@ -1151,7 +1136,7 @@ def test_write_to_globbed_partitioned_path(started_cluster):
     node = started_cluster.instances["node1"]
 
     error = node.query_and_get_error(
-        "insert into function hdfs('hdfs://hdfs1:9000/test_data_*_{_partition_id}.csv') partition by 42 select 42 SETTINGS hdfs_truncate_on_insert=1"
+        "insert into function hdfs('hdfs://hdfs1:9000/test_data_*_{_partition_id}.csv') partition by 42 select 42"
     )
 
     assert "DATABASE_ACCESS_DENIED" in error

From c19ee360d1a4cf0bc7607923505ba1e2a3848132 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Thu, 8 Aug 2024 00:44:42 +0200
Subject: [PATCH 168/265] Update StorageObjectStorageSource.cpp

---
 src/Storages/ObjectStorage/StorageObjectStorageSource.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 810bad4788b..d8e26977e75 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -208,7 +208,6 @@ Chunk StorageObjectStorageSource::generate()
                   .filename = &filename,
                   .last_modified = object_info->metadata->last_modified,
                   .etag = &(object_info->metadata->etag)
-                  .last_modified = object_info->metadata->last_modified,
                 }, getContext(), read_from_format_info.columns_description);
 
             const auto & partition_columns = configuration->getPartitionColumns();

From 42aa967311a55d3da0e1230595b0e0ca9928e777 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 8 Aug 2024 00:38:05 +0000
Subject: [PATCH 169/265] add profile events for merges

---
 src/Common/ProfileEvents.cpp                  | 25 +++++-
 .../Merges/AggregatingSortedTransform.h       | 10 +++
 .../Algorithms/AggregatingSortedAlgorithm.h   |  2 +
 .../FinishAggregatingInOrderAlgorithm.cpp     |  3 +
 .../FinishAggregatingInOrderAlgorithm.h       |  5 ++
 .../GraphiteRollupSortedAlgorithm.h           |  2 +
 .../Merges/Algorithms/IMergingAlgorithm.h     | 11 ++-
 .../IMergingAlgorithmWithSharedChunks.h       |  2 +
 src/Processors/Merges/Algorithms/MergedData.h |  2 +
 .../Algorithms/MergingSortedAlgorithm.h       |  2 +-
 .../Algorithms/SummingSortedAlgorithm.h       |  2 +
 .../Merges/CollapsingSortedTransform.h        | 10 +++
 src/Processors/Merges/IMergingTransform.h     | 35 +++++++-
 .../Merges/MergingSortedTransform.cpp         | 26 ++----
 .../Merges/MergingSortedTransform.h           |  4 -
 .../Merges/ReplacingSortedTransform.h         |  9 ++
 .../Merges/SummingSortedTransform.h           | 10 +++
 .../Merges/VersionedCollapsingTransform.h     |  9 ++
 .../Transforms/ColumnGathererTransform.cpp    | 57 ++++++-------
 .../Transforms/ColumnGathererTransform.h      | 11 ++-
 .../Transforms/MergeJoinTransform.cpp         | 12 ++-
 .../Transforms/MergeJoinTransform.h           |  2 +
 .../Transforms/MergeSortingTransform.cpp      |  2 -
 .../Transforms/PasteJoinTransform.cpp         | 10 +++
 .../Transforms/PasteJoinTransform.h           |  3 +-
 .../gtest_blocks_size_merging_streams.cpp     |  4 +-
 src/Storages/MergeTree/MergeList.h            |  1 +
 src/Storages/MergeTree/MergeProgress.h        | 27 +++---
 src/Storages/MergeTree/MergeTask.cpp          | 84 +++++++++++++++----
 src/Storages/MergeTree/MergeTask.h            | 20 ++++-
 src/Storages/MergeTree/MutateTask.cpp         | 10 ++-
 31 files changed, 308 insertions(+), 104 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index ccdce7ff584..857a08d8a5d 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -210,7 +210,29 @@
     M(Merge, "Number of launched background merges.") \
     M(MergedRows, "Rows read for background merges. This is the number of rows before merge.") \
     M(MergedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for background merges. This is the number before merge.") \
-    M(MergesTimeMilliseconds, "Total time spent for background merges.")\
+    M(MergeTotalMilliseconds, "Total time spent for background merges") \
+    M(MergeExecuteMilliseconds, "Total busy time spent for execution of background merges") \
+    M(MergeHorizontalStageTotalMilliseconds, "Total time spent for horizontal stage of background merges") \
+    M(MergeHorizontalStageExecuteMilliseconds, "Total busy time spent for execution of horizontal stage of background merges") \
+    M(MergeVerticalStageTotalMilliseconds, "Total time spent for vertical stage of background merges") \
+    M(MergeVerticalStageExecuteMilliseconds, "Total busy time spent for execution of vertical stage of background merges") \
+    M(MergeProjectionStageTotalMilliseconds, "Total time spent for projection stage of background merges") \
+    M(MergeProjectionStageExecuteMilliseconds, "Total busy time spent for execution of projection stage of background merges") \
+    \
+    M(MergingSortedMilliseconds, "Total time spent while merging sorted columns") \
+    M(AggregatingSortedMilliseconds, "Total time spent while aggregating sorted columns") \
+    M(CollapsingSortedMilliseconds, "Total time spent while collapsing sorted columns") \
+    M(ReplacingSortedMilliseconds, "Total time spent while replacing sorted columns") \
+    M(SummingSortedMilliseconds, "Total time spent while summing sorted columns") \
+    M(VersionedCollapsingSortedMilliseconds, "Total time spent while version collapsing sorted columns") \
+    M(GatheringColumnMilliseconds, "Total time spent while gathering columns for vertical merge") \
+    \
+    M(MutationTotalParts, "Number of total parts for which mutations tried to be applied") \
+    M(MutationUntouchedParts, "Number of total parts for which mutations tried to be applied but which was completely skipped according to predicate") \
+    M(MutatedRows, "Rows read for mutations. This is the number of rows before mutation") \
+    M(MutatedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for mutations. This is the number before mutation.") \
+    M(MutationTimeMilliseconds, "Total time spent for mutations.") \
+    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
     \
     M(MergeTreeDataWriterRows, "Number of rows INSERTed to MergeTree tables.") \
     M(MergeTreeDataWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables.") \
@@ -225,7 +247,6 @@
     M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
     M(MergeTreeDataProjectionWriterSortingBlocksMicroseconds, "Time spent sorting blocks (for projection it might be a key different from table's sorting key)") \
     M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging blocks") \
-    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
     \
     M(InsertedWideParts, "Number of parts inserted in Wide format.") \
     M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h
index c6d7e844c65..c96ad3db525 100644
--- a/src/Processors/Merges/AggregatingSortedTransform.h
+++ b/src/Processors/Merges/AggregatingSortedTransform.h
@@ -3,6 +3,11 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h>
 
+namespace ProfileEvents
+{
+    extern const Event AggregatingSortedMilliseconds;
+}
+
 namespace DB
 {
 
@@ -29,6 +34,11 @@ public:
     }
 
     String getName() const override { return "AggregatingSortedTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::AggregatingSortedMilliseconds, "Aggregated sorted", getLogger("AggregatingSortedTransform"));
+    }
 };
 
 }
diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
index 53c103e7038..908994e1851 100644
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
@@ -30,6 +30,8 @@ public:
     void consume(Input & input, size_t source_num) override;
     Status merge() override;
 
+    MergedStats getMergedStats() const override { return merged_data.getMergedStats(); }
+
     /// Stores information for aggregation of SimpleAggregateFunction columns
     struct SimpleAggregateDescription
     {
diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
index 86675bcb237..477566d8a94 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
@@ -126,6 +126,9 @@ IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge()
 
 Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge()
 {
+    total_merged_rows += accumulated_rows;
+    total_merged_bytes += accumulated_bytes;
+
     accumulated_rows = 0;
     accumulated_bytes = 0;
 
diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
index cc6578e79be..39171c5a978 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
@@ -50,6 +50,8 @@ public:
     void consume(Input & input, size_t source_num) override;
     Status merge() override;
 
+    MergedStats getMergedStats() const override { return  {.bytes = accumulated_bytes, .rows = accumulated_rows, .blocks = chunk_num}; }
+
 private:
     Chunk prepareToMerge();
     void addToAggregation();
@@ -92,6 +94,9 @@ private:
     UInt64 chunk_num = 0;
     size_t accumulated_rows = 0;
     size_t accumulated_bytes = 0;
+
+    size_t total_merged_rows = 0;
+    size_t total_merged_bytes = 0;
 };
 
 }
diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
index aaa3859efb6..cb2775c968d 100644
--- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
@@ -33,6 +33,8 @@ public:
     const char * getName() const override { return "GraphiteRollupSortedAlgorithm"; }
     Status merge() override;
 
+    MergedStats getMergedStats() const override { return merged_data->getMergedStats(); }
+
     struct ColumnsDefinition
     {
         size_t path_column_num;
diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
index 9a1c7c24270..83f11232b71 100644
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Processors/Chunk.h>
-#include <variant>
+#include <Common/ProfileEvents.h>
 
 namespace DB
 {
@@ -65,6 +65,15 @@ public:
 
     IMergingAlgorithm() = default;
     virtual ~IMergingAlgorithm() = default;
+
+    struct MergedStats
+    {
+        UInt64 bytes = 0;
+        UInt64 rows = 0;
+        UInt64 blocks = 0;
+    };
+
+    virtual MergedStats getMergedStats() const = 0;
 };
 
 // TODO: use when compile with clang which could support it
diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
index bc1aafe93f7..1725108ac5d 100644
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
@@ -16,6 +16,8 @@ public:
     void initialize(Inputs inputs) override;
     void consume(Input & input, size_t source_num) override;
 
+    MergedStats getMergedStats() const override { return merged_data->getMergedStats(); }
+
 private:
     Block header;
     SortDescription description;
diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h
index c5bb074bb0c..8f47f89d8ee 100644
--- a/src/Processors/Merges/Algorithms/MergedData.h
+++ b/src/Processors/Merges/Algorithms/MergedData.h
@@ -183,6 +183,8 @@ public:
     UInt64 totalAllocatedBytes() const { return total_allocated_bytes; }
     UInt64 maxBlockSize() const { return max_block_size; }
 
+    IMergingAlgorithm::MergedStats getMergedStats() const { return {.bytes = total_allocated_bytes, .rows = total_merged_rows, .blocks = total_chunks}; }
+
     virtual ~MergedData() = default;
 
 protected:
diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
index bcb111baadf..c889668a38e 100644
--- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
@@ -31,7 +31,7 @@ public:
     void consume(Input & input, size_t source_num) override;
     Status merge() override;
 
-    const MergedData & getMergedData() const { return merged_data; }
+    MergedStats getMergedStats() const override { return merged_data.getMergedStats(); }
 
 private:
     Block header;
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
index 664b171c4b9..74b4e397831 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
@@ -30,6 +30,8 @@ public:
     void consume(Input & input, size_t source_num) override;
     Status merge() override;
 
+    MergedStats getMergedStats() const override { return merged_data.getMergedStats(); }
+
     struct AggregateDescription;
     struct MapDescription;
 
diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h
index 4479ac82f66..99fb700abf1 100644
--- a/src/Processors/Merges/CollapsingSortedTransform.h
+++ b/src/Processors/Merges/CollapsingSortedTransform.h
@@ -3,6 +3,11 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h>
 
+namespace ProfileEvents
+{
+    extern const Event CollapsingSortedMilliseconds;
+}
+
 namespace DB
 {
 
@@ -36,6 +41,11 @@ public:
     }
 
     String getName() const override { return "CollapsingSortedTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::CollapsingSortedMilliseconds, "Collapsed sorted", getLogger("CollapsingSortedTransform"));
+    }
 };
 
 }
diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h
index be629271736..fba5b038618 100644
--- a/src/Processors/Merges/IMergingTransform.h
+++ b/src/Processors/Merges/IMergingTransform.h
@@ -2,7 +2,10 @@
 
 #include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
 #include <Processors/IProcessor.h>
+#include <Common/ProfileEvents.h>
 #include <Common/Stopwatch.h>
+#include <Common/logger_useful.h>
+#include <Common/formatReadable.h>
 
 namespace DB
 {
@@ -110,6 +113,8 @@ public:
 
     void work() override
     {
+        Stopwatch watch;
+
         if (!state.init_chunks.empty())
             algorithm.initialize(std::move(state.init_chunks));
 
@@ -147,6 +152,8 @@ public:
             // std::cerr << "Finished" << std::endl;
             state.is_finished = true;
         }
+
+        merging_elapsed_ns += watch.elapsedNanoseconds();
     }
 
 protected:
@@ -156,7 +163,33 @@ protected:
     Algorithm algorithm;
 
     /// Profile info.
-    Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE};
+    UInt64 merging_elapsed_ns = 0;
+
+    void logMergedStats(ProfileEvents::Event elapsed_ms_event, std::string_view transform_message, LoggerPtr log) const
+    {
+        auto stats = algorithm.getMergedStats();
+
+        UInt64 elapsed_ms = merging_elapsed_ns / 1000000LL;
+        ProfileEvents::increment(elapsed_ms_event, elapsed_ms);
+
+        /// Don't print info for small parts (< 1M rows)
+        if (stats.rows < 1000000)
+            return;
+
+        double seconds = static_cast<double>(merging_elapsed_ns) / 1000000000ULL;
+
+        if (seconds == 0.0)
+        {
+            LOG_DEBUG(log, "{}: {} blocks, {} rows, {} bytes in 0 sec.",
+                transform_message, stats.blocks, stats.rows, stats.bytes);
+        }
+        else
+        {
+            LOG_DEBUG(log, "{}: {} blocks, {} rows, {} bytes in {} sec., {} rows/sec., {}/sec.",
+                transform_message, stats.blocks, stats.rows, stats.bytes,
+                seconds, stats.rows / seconds, ReadableSize(stats.bytes / seconds));
+        }
+    }
 
 private:
     using IMergingTransformBase::state;
diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp
index 338b1ff7935..d2895a2a2e9 100644
--- a/src/Processors/Merges/MergingSortedTransform.cpp
+++ b/src/Processors/Merges/MergingSortedTransform.cpp
@@ -1,9 +1,12 @@
 #include <Processors/Merges/MergingSortedTransform.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
 #include <IO/WriteBuffer.h>
-
 #include <Common/logger_useful.h>
-#include <Common/formatReadable.h>
+
+namespace ProfileEvents
+{
+    extern const Event MergingSortedMilliseconds;
+}
 
 namespace DB
 {
@@ -18,7 +21,6 @@ MergingSortedTransform::MergingSortedTransform(
     UInt64 limit_,
     bool always_read_till_end_,
     WriteBuffer * out_row_sources_buf_,
-    bool quiet_,
     bool use_average_block_sizes,
     bool have_all_inputs_)
     : IMergingTransform(
@@ -37,7 +39,6 @@ MergingSortedTransform::MergingSortedTransform(
         limit_,
         out_row_sources_buf_,
         use_average_block_sizes)
-    , quiet(quiet_)
 {
 }
 
@@ -48,22 +49,7 @@ void MergingSortedTransform::onNewInput()
 
 void MergingSortedTransform::onFinish()
 {
-    if (quiet)
-        return;
-
-    const auto & merged_data = algorithm.getMergedData();
-
-    auto log = getLogger("MergingSortedTransform");
-
-    double seconds = total_stopwatch.elapsedSeconds();
-
-    if (seconds == 0.0)
-        LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in 0 sec.", merged_data.totalChunks(), merged_data.totalMergedRows());
-    else
-        LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in {} sec., {} rows/sec., {}/sec",
-            merged_data.totalChunks(), merged_data.totalMergedRows(), seconds,
-            merged_data.totalMergedRows() / seconds,
-            ReadableSize(merged_data.totalAllocatedBytes() / seconds));
+    logMergedStats(ProfileEvents::MergingSortedMilliseconds, "Merged sorted", getLogger("MergingSortedTransform"));
 }
 
 }
diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h
index 2b53939f309..6e52450efa7 100644
--- a/src/Processors/Merges/MergingSortedTransform.h
+++ b/src/Processors/Merges/MergingSortedTransform.h
@@ -21,7 +21,6 @@ public:
         UInt64 limit_ = 0,
         bool always_read_till_end_ = false,
         WriteBuffer * out_row_sources_buf_ = nullptr,
-        bool quiet_ = false,
         bool use_average_block_sizes = false,
         bool have_all_inputs_ = true);
 
@@ -30,9 +29,6 @@ public:
 protected:
     void onNewInput() override;
     void onFinish() override;
-
-private:
-    bool quiet = false;
 };
 
 }
diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h
index 2657987f161..dc262aab9ee 100644
--- a/src/Processors/Merges/ReplacingSortedTransform.h
+++ b/src/Processors/Merges/ReplacingSortedTransform.h
@@ -3,6 +3,10 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h>
 
+namespace ProfileEvents
+{
+    extern const Event ReplacingSortedMilliseconds;
+}
 
 namespace DB
 {
@@ -38,6 +42,11 @@ public:
     }
 
     String getName() const override { return "ReplacingSorted"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::ReplacingSortedMilliseconds, "Replaced sorted", getLogger("ReplacingSortedTransform"));
+    }
 };
 
 }
diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h
index 70ddebfea95..d7c20223d7e 100644
--- a/src/Processors/Merges/SummingSortedTransform.h
+++ b/src/Processors/Merges/SummingSortedTransform.h
@@ -3,6 +3,11 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/SummingSortedAlgorithm.h>
 
+namespace ProfileEvents
+{
+    extern const Event SummingSortedMilliseconds;
+}
+
 namespace DB
 {
 
@@ -33,6 +38,11 @@ public:
     }
 
     String getName() const override { return "SummingSortedTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::SummingSortedMilliseconds, "Summed sorted", getLogger("SummingSortedTransform"));
+    }
 };
 
 }
diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h
index 18244469bd7..32b5d7bf343 100644
--- a/src/Processors/Merges/VersionedCollapsingTransform.h
+++ b/src/Processors/Merges/VersionedCollapsingTransform.h
@@ -3,6 +3,10 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h>
 
+namespace ProfileEvents
+{
+    extern const Event VersionedCollapsingSortedMilliseconds;
+}
 
 namespace DB
 {
@@ -33,6 +37,11 @@ public:
     }
 
     String getName() const override { return "VersionedCollapsingTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::VersionedCollapsingSortedMilliseconds, "Versioned collapsed sorted", getLogger("VersionedCollapsingTransform"));
+    }
 };
 
 }
diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp
index 15f8355bdc7..52fa42fdb51 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.cpp
+++ b/src/Processors/Transforms/ColumnGathererTransform.cpp
@@ -1,11 +1,15 @@
 #include <Processors/Transforms/ColumnGathererTransform.h>
+#include <Common/ProfileEvents.h>
 #include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Common/formatReadable.h>
 #include <Columns/ColumnSparse.h>
 #include <IO/WriteHelpers.h>
-#include <iomanip>
 
+namespace ProfileEvents
+{
+    extern const Event GatheringColumnMilliseconds;
+}
 
 namespace DB
 {
@@ -33,6 +37,13 @@ ColumnGathererStream::ColumnGathererStream(
         throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather");
 }
 
+void ColumnGathererStream::updateStats(const IColumn & column)
+{
+    merged_rows += column.size();
+    merged_bytes += column.allocatedBytes();
+    ++merged_blocks;
+}
+
 void ColumnGathererStream::initialize(Inputs inputs)
 {
     Columns source_columns;
@@ -82,7 +93,9 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
         {
             res.addColumn(source_to_fully_copy->column);
         }
-        merged_rows += source_to_fully_copy->size;
+
+        updateStats(*source_to_fully_copy->column);
+
         source_to_fully_copy->pos = source_to_fully_copy->size;
         source_to_fully_copy = nullptr;
         return Status(std::move(res));
@@ -96,8 +109,7 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
         {
             next_required_source = 0;
             Chunk res;
-            merged_rows += sources.front().column->size();
-            merged_bytes += sources.front().column->allocatedBytes();
+            updateStats(*sources.front().column);
             res.addColumn(std::move(sources.front().column));
             sources.front().pos = sources.front().size = 0;
             return Status(std::move(res));
@@ -123,8 +135,8 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
     if (source_to_fully_copy && result_column->empty())
     {
         Chunk res;
-        merged_rows += source_to_fully_copy->column->size();
-        merged_bytes += source_to_fully_copy->column->allocatedBytes();
+        updateStats(*source_to_fully_copy->column);
+
         if (result_column->hasDynamicStructure())
         {
             auto col = result_column->cloneEmpty();
@@ -140,13 +152,13 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
         return Status(std::move(res));
     }
 
-    auto col = result_column->cloneEmpty();
-    result_column.swap(col);
+    auto return_column = result_column->cloneEmpty();
+    result_column.swap(return_column);
 
     Chunk res;
-    merged_rows += col->size();
-    merged_bytes += col->allocatedBytes();
-    res.addColumn(std::move(col));
+    updateStats(*return_column);
+
+    res.addColumn(std::move(return_column));
     return Status(std::move(res), row_sources_buf.eof() && !source_to_fully_copy);
 }
 
@@ -185,31 +197,10 @@ ColumnGathererTransform::ColumnGathererTransform(
             toString(header.columns()));
 }
 
-void ColumnGathererTransform::work()
-{
-    Stopwatch stopwatch;
-    IMergingTransform<ColumnGathererStream>::work();
-    elapsed_ns += stopwatch.elapsedNanoseconds();
-}
-
 void ColumnGathererTransform::onFinish()
 {
-    auto merged_rows = algorithm.getMergedRows();
-    auto merged_bytes = algorithm.getMergedRows();
-    /// Don't print info for small parts (< 10M rows)
-    if (merged_rows < 10000000)
-        return;
-
-    double seconds = static_cast<double>(elapsed_ns) / 1000000000ULL;
     const auto & column_name = getOutputPort().getHeader().getByPosition(0).name;
-
-    if (seconds == 0.0)
-        LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in 0 sec.",
-            column_name, static_cast<double>(merged_bytes) / merged_rows);
-    else
-        LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in {} sec., {} rows/sec., {}/sec.",
-            column_name, static_cast<double>(merged_bytes) / merged_rows, seconds,
-            merged_rows / seconds, ReadableSize(merged_bytes / seconds));
+    logMergedStats(ProfileEvents::GatheringColumnMilliseconds, fmt::format("Gathered column {}", column_name), log);
 }
 
 }
diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h
index ec5691316ce..a535b2669d0 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.h
+++ b/src/Processors/Transforms/ColumnGathererTransform.h
@@ -2,6 +2,7 @@
 
 #include <IO/ReadBuffer.h>
 #include <Common/PODArray.h>
+#include "base/types.h"
 #include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
 #include <Processors/Merges/IMergingTransform.h>
 
@@ -72,10 +73,11 @@ public:
     template <typename Column>
     void gather(Column & column_res);
 
-    UInt64 getMergedRows() const { return merged_rows; }
-    UInt64 getMergedBytes() const { return merged_bytes; }
+    MergedStats getMergedStats() const override { return {.bytes = merged_bytes, .rows = merged_rows, .blocks = merged_blocks}; }
 
 private:
+    void updateStats(const IColumn & column);
+
     /// Cache required fields
     struct Source
     {
@@ -105,6 +107,7 @@ private:
     ssize_t next_required_source = -1;
     UInt64 merged_rows = 0;
     UInt64 merged_bytes = 0;
+    UInt64 merged_blocks = 0;
 };
 
 class ColumnGathererTransform final : public IMergingTransform<ColumnGathererStream>
@@ -120,12 +123,8 @@ public:
 
     String getName() const override { return "ColumnGathererTransform"; }
 
-    void work() override;
-
 protected:
     void onFinish() override;
-    UInt64 elapsed_ns = 0;
-
     LoggerPtr log;
 };
 
diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp
index e96a75d277b..26601207da8 100644
--- a/src/Processors/Transforms/MergeJoinTransform.cpp
+++ b/src/Processors/Transforms/MergeJoinTransform.cpp
@@ -511,6 +511,16 @@ void MergeJoinAlgorithm::logElapsed(double seconds)
         stat.max_blocks_loaded);
 }
 
+IMergingAlgorithm::MergedStats MergeJoinAlgorithm::getMergedStats() const
+{
+    return
+    {
+        .bytes = 0,
+        .rows = stat.num_rows[0] + stat.num_rows[1],
+        .blocks = stat.num_blocks[0] + stat.num_blocks[1],
+    };
+}
+
 static void prepareChunk(Chunk & chunk)
 {
     if (!chunk)
@@ -1271,7 +1281,7 @@ MergeJoinTransform::MergeJoinTransform(
 
 void MergeJoinTransform::onFinish()
 {
-    algorithm.logElapsed(total_stopwatch.elapsedSeconds());
+    algorithm.logElapsed(merging_elapsed_ns / 1000000000ULL);
 }
 
 }
diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h
index d37a0b9f3ae..841a3f15a92 100644
--- a/src/Processors/Transforms/MergeJoinTransform.h
+++ b/src/Processors/Transforms/MergeJoinTransform.h
@@ -245,6 +245,8 @@ public:
     void setAsofInequality(ASOFJoinInequality asof_inequality_);
 
     void logElapsed(double seconds);
+    MergedStats getMergedStats() const override;
+
 private:
     std::optional<Status> handleAnyJoinState();
     Status anyJoin();
diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp
index ede13b29219..c45192e7118 100644
--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@@ -185,7 +185,6 @@ void MergeSortingTransform::consume(Chunk chunk)
 
         if (!external_merging_sorted)
         {
-            bool quiet = false;
             bool have_all_inputs = false;
             bool use_average_block_sizes = false;
 
@@ -199,7 +198,6 @@ void MergeSortingTransform::consume(Chunk chunk)
                     limit,
                     /*always_read_till_end_=*/ false,
                     nullptr,
-                    quiet,
                     use_average_block_sizes,
                     have_all_inputs);
 
diff --git a/src/Processors/Transforms/PasteJoinTransform.cpp b/src/Processors/Transforms/PasteJoinTransform.cpp
index d2fa7eed256..ad01b721726 100644
--- a/src/Processors/Transforms/PasteJoinTransform.cpp
+++ b/src/Processors/Transforms/PasteJoinTransform.cpp
@@ -58,6 +58,16 @@ static void prepareChunk(Chunk & chunk)
     chunk.setColumns(std::move(columns), num_rows);
 }
 
+IMergingAlgorithm::MergedStats PasteJoinAlgorithm::getMergedStats() const
+{
+    return
+    {
+        .bytes = 0,
+        .rows = stat.num_rows[0] + stat.num_rows[1],
+        .blocks = stat.num_blocks[0] + stat.num_blocks[1],
+    };
+}
+
 void PasteJoinAlgorithm::initialize(Inputs inputs)
 {
     if (inputs.size() != 2)
diff --git a/src/Processors/Transforms/PasteJoinTransform.h b/src/Processors/Transforms/PasteJoinTransform.h
index 6a7e65ee27c..fbe85f6993b 100644
--- a/src/Processors/Transforms/PasteJoinTransform.h
+++ b/src/Processors/Transforms/PasteJoinTransform.h
@@ -35,8 +35,7 @@ public:
     void initialize(Inputs inputs) override;
     void consume(Input & input, size_t source_num) override;
     Status merge() override;
-
-    void logElapsed(double seconds);
+    MergedStats getMergedStats() const override;
 
 private:
     Chunk createBlockWithDefaults(size_t source_num);
diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
index bc22f249f97..f41a447049c 100644
--- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
+++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
@@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
     EXPECT_EQ(pipe.numOutputPorts(), 3);
 
     auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
+        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, true);
 
     pipe.addTransform(std::move(transform));
 
@@ -125,7 +125,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
     EXPECT_EQ(pipe.numOutputPorts(), 3);
 
     auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
+        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, true);
 
     pipe.addTransform(std::move(transform));
 
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index d40af6abf43..3a96ba0abae 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -6,6 +6,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/MemoryTracker.h>
 #include <Common/ThreadStatus.h>
+#include "base/types.h"
 #include <Storages/MergeTree/MergeType.h>
 #include <Storages/MergeTree/MergeAlgorithm.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
diff --git a/src/Storages/MergeTree/MergeProgress.h b/src/Storages/MergeTree/MergeProgress.h
index dd4922051b5..8562e81e761 100644
--- a/src/Storages/MergeTree/MergeProgress.h
+++ b/src/Storages/MergeTree/MergeProgress.h
@@ -8,10 +8,10 @@
 
 namespace ProfileEvents
 {
-    extern const Event MergesTimeMilliseconds;
     extern const Event MergedUncompressedBytes;
     extern const Event MergedRows;
-    extern const Event Merge;
+    extern const Event MutatedRows;
+    extern const Event MutatedUncompressedBytes;
 }
 
 namespace DB
@@ -63,18 +63,17 @@ public:
     void updateWatch()
     {
         UInt64 watch_curr_elapsed = merge_list_element_ptr->watch.elapsed();
-        ProfileEvents::increment(ProfileEvents::MergesTimeMilliseconds, (watch_curr_elapsed - watch_prev_elapsed) / 1000000);
         watch_prev_elapsed = watch_curr_elapsed;
     }
 
-    void operator() (const Progress & value)
+    void operator()(const Progress & value)
     {
-        ProfileEvents::increment(ProfileEvents::MergedUncompressedBytes, value.read_bytes);
-        if (stage.is_first)
-        {
-            ProfileEvents::increment(ProfileEvents::MergedRows, value.read_rows);
-            ProfileEvents::increment(ProfileEvents::Merge);
-        }
+        if (merge_list_element_ptr->is_mutation)
+            updateProfileEvents(value, ProfileEvents::MutatedRows, ProfileEvents::MutatedUncompressedBytes);
+        else
+            updateProfileEvents(value, ProfileEvents::MergedRows, ProfileEvents::MergedUncompressedBytes);
+
+
         updateWatch();
 
         merge_list_element_ptr->bytes_read_uncompressed += value.read_bytes;
@@ -90,6 +89,14 @@ public:
                 std::memory_order_relaxed);
         }
     }
+
+private:
+    void updateProfileEvents(const Progress & value, ProfileEvents::Event rows_event, ProfileEvents::Event bytes_event) const
+    {
+        ProfileEvents::increment(bytes_event, value.read_bytes);
+        if (stage.is_first)
+            ProfileEvents::increment(rows_event, value.read_rows);
+    }
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index ce06adf110c..5f178f08ec3 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -5,9 +5,13 @@
 #include <memory>
 #include <fmt/format.h>
 
+#include "Common/ElapsedTimeProfileEventIncrement.h"
+#include "Common/Logger.h"
+#include "Common/Stopwatch.h"
 #include <Common/logger_useful.h>
 #include <Common/ActionBlocker.h>
 #include <Core/Settings.h>
+#include <Common/ProfileEvents.h>
 #include <Processors/Transforms/CheckSortedTransform.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Compression/CompressedWriteBuffer.h>
@@ -39,6 +43,16 @@
 #include <Interpreters/MergeTreeTransaction.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
+namespace ProfileEvents
+{
+    extern const Event Merge;
+    extern const Event MergeTotalMilliseconds;
+    extern const Event MergeExecuteMilliseconds;
+    extern const Event MergeHorizontalStageExecuteMilliseconds;
+    extern const Event MergeVerticalStageExecuteMilliseconds;
+    extern const Event MergeProjectionStageExecuteMilliseconds;
+}
+
 namespace DB
 {
 
@@ -186,6 +200,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
     if (isTTLMergeType(global_ctx->future_part->merge_type) && global_ctx->ttl_merges_blocker->isCancelled())
         throw Exception(ErrorCodes::ABORTED, "Cancelled merging parts with TTL");
 
+    ProfileEvents::increment(ProfileEvents::Merge);
+
     LOG_DEBUG(ctx->log, "Merging {} parts: from {} to {} into {} with storage {}",
         global_ctx->future_part->parts.size(),
         global_ctx->future_part->parts.front()->name,
@@ -446,6 +462,9 @@ void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const Str
 
 MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::getContextForNextStage()
 {
+    ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    ProfileEvents::increment(ProfileEvents::MergeHorizontalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+
     auto new_ctx = std::make_shared<VerticalMergeRuntimeContext>();
 
     new_ctx->rows_sources_write_buf = std::move(ctx->rows_sources_write_buf);
@@ -463,8 +482,10 @@ MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::g
 
 MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNextStage()
 {
-    auto new_ctx = std::make_shared<MergeProjectionsRuntimeContext>();
+    ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    ProfileEvents::increment(ProfileEvents::MergeVerticalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
 
+    auto new_ctx = std::make_shared<MergeProjectionsRuntimeContext>();
     new_ctx->need_sync = std::move(ctx->need_sync);
 
     ctx.reset();
@@ -474,9 +495,14 @@ MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNe
 
 bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute()
 {
-    assert(subtasks_iterator != subtasks.end());
-    if ((this->**subtasks_iterator)())
-        return true;
+    chassert(subtasks_iterator != subtasks.end());
+
+    Stopwatch watch;
+    bool res = (this->**subtasks_iterator)();
+    ctx->elapsed_execute_ns += watch.elapsedNanoseconds();
+
+    if (res)
+        return res;
 
     /// Move to the next subtask in an array of subtasks
     ++subtasks_iterator;
@@ -534,7 +560,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl()
 
 bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const
 {
-     /// No need to execute this part if it is horizontal merge.
+    /// No need to execute this part if it is horizontal merge.
     if (global_ctx->chosen_merge_algorithm != MergeAlgorithm::Vertical)
         return false;
 
@@ -906,12 +932,24 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const
     return false;
 }
 
+MergeTask::StageRuntimeContextPtr MergeTask::MergeProjectionsStage::getContextForNextStage()
+{
+    ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    ProfileEvents::increment(ProfileEvents::MergeProjectionStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+
+    return nullptr;
+}
 
 bool MergeTask::VerticalMergeStage::execute()
 {
-    assert(subtasks_iterator != subtasks.end());
-    if ((this->**subtasks_iterator)())
-        return true;
+    chassert(subtasks_iterator != subtasks.end());
+
+    Stopwatch watch;
+    bool res = (this->**subtasks_iterator)();
+    ctx->elapsed_execute_ns += watch.elapsedNanoseconds();
+
+    if (res)
+        return res;
 
     /// Move to the next subtask in an array of subtasks
     ++subtasks_iterator;
@@ -920,9 +958,14 @@ bool MergeTask::VerticalMergeStage::execute()
 
 bool MergeTask::MergeProjectionsStage::execute()
 {
-    assert(subtasks_iterator != subtasks.end());
-    if ((this->**subtasks_iterator)())
-        return true;
+    chassert(subtasks_iterator != subtasks.end());
+
+    Stopwatch watch;
+    bool res = (this->**subtasks_iterator)();
+    ctx->elapsed_execute_ns += watch.elapsedNanoseconds();
+
+    if (res)
+        return res;
 
     /// Move to the next subtask in an array of subtasks
     ++subtasks_iterator;
@@ -969,12 +1012,22 @@ bool MergeTask::VerticalMergeStage::executeVerticalMergeForAllColumns() const
 
 bool MergeTask::execute()
 {
-    assert(stages_iterator != stages.end());
-    if ((*stages_iterator)->execute())
+    chassert(stages_iterator != stages.end());
+    const auto & current_stage = *stages_iterator;
+
+    if (current_stage->execute())
         return true;
 
-    /// Stage is finished, need initialize context for the next stage
-    auto next_stage_context = (*stages_iterator)->getContextForNextStage();
+    /// Stage is finished, need to initialize context for the next stage and update profile events.
+
+    UInt64 current_elapsed_ms = global_ctx->merge_list_element_ptr->watch.elapsedMilliseconds();
+    UInt64 stage_elapsed_ms = current_elapsed_ms - global_ctx->prev_elapesed_ms;
+    global_ctx->prev_elapesed_ms = current_elapsed_ms;
+
+    ProfileEvents::increment(current_stage->getTotalTimeProfileEvent(), stage_elapsed_ms);
+    ProfileEvents::increment(ProfileEvents::MergeTotalMilliseconds, stage_elapsed_ms);
+
+    auto next_stage_context = current_stage->getContextForNextStage();
 
     /// Move to the next stage in an array of stages
     ++stages_iterator;
@@ -1099,7 +1152,6 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
                 /* limit_= */0,
                 /* always_read_till_end_= */false,
                 ctx->rows_sources_write_buf.get(),
-                true,
                 ctx->blocks_are_granules_size);
             break;
 
diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h
index 8b0f2130e8e..979c85482e5 100644
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@@ -3,6 +3,7 @@
 #include <list>
 #include <memory>
 
+#include <Common/ProfileEvents.h>
 #include <Common/filesystemHelpers.h>
 
 #include <Compression/CompressedReadBuffer.h>
@@ -26,6 +27,12 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeIndices.h>
 
+namespace ProfileEvents
+{
+    extern const Event MergeHorizontalStageTotalMilliseconds;
+    extern const Event MergeVerticalStageTotalMilliseconds;
+    extern const Event MergeProjectionStageTotalMilliseconds;
+}
 
 namespace DB
 {
@@ -134,6 +141,7 @@ private:
     {
         virtual void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) = 0;
         virtual StageRuntimeContextPtr getContextForNextStage() = 0;
+        virtual ProfileEvents::Event getTotalTimeProfileEvent() const = 0;
         virtual bool execute() = 0;
         virtual ~IStage() = default;
     };
@@ -195,6 +203,7 @@ private:
         bool need_prefix;
 
         scope_guard temporary_directory_lock;
+        UInt64 prev_elapesed_ms{0};
     };
 
     using GlobalRuntimeContextPtr = std::shared_ptr<GlobalRuntimeContext>;
@@ -233,6 +242,7 @@ private:
         /// Dependencies for next stages
         std::list<DB::NameAndTypePair>::const_iterator it_name_and_type;
         bool need_sync{false};
+        UInt64 elapsed_execute_ns{0};
     };
 
     using ExecuteAndFinalizeHorizontalPartRuntimeContextPtr = std::shared_ptr<ExecuteAndFinalizeHorizontalPartRuntimeContext>;
@@ -256,7 +266,6 @@ private:
 
         ExecuteAndFinalizeHorizontalPartSubtasks::const_iterator subtasks_iterator = subtasks.begin();
 
-
         MergeAlgorithm chooseMergeAlgorithm() const;
         void createMergedStream();
         void extractMergingAndGatheringColumns() const;
@@ -268,6 +277,7 @@ private:
         }
 
         StageRuntimeContextPtr getContextForNextStage() override;
+        ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeHorizontalStageTotalMilliseconds; }
 
         ExecuteAndFinalizeHorizontalPartRuntimeContextPtr ctx;
         GlobalRuntimeContextPtr global_ctx;
@@ -307,6 +317,7 @@ private:
         QueryPipeline column_parts_pipeline;
         std::unique_ptr<PullingPipelineExecutor> executor;
         std::unique_ptr<CompressedReadBufferFromFile> rows_sources_read_buf{nullptr};
+        UInt64 elapsed_execute_ns{0};
     };
 
     using VerticalMergeRuntimeContextPtr = std::shared_ptr<VerticalMergeRuntimeContext>;
@@ -321,6 +332,7 @@ private:
             global_ctx = static_pointer_cast<GlobalRuntimeContext>(global);
         }
         StageRuntimeContextPtr getContextForNextStage() override;
+        ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeVerticalStageTotalMilliseconds; }
 
         bool prepareVerticalMergeForAllColumns() const;
         bool executeVerticalMergeForAllColumns() const;
@@ -361,6 +373,7 @@ private:
         MergeTasks::iterator projections_iterator;
 
         LoggerPtr log{getLogger("MergeTask::MergeProjectionsStage")};
+        UInt64 elapsed_execute_ns{0};
     };
 
     using MergeProjectionsRuntimeContextPtr = std::shared_ptr<MergeProjectionsRuntimeContext>;
@@ -368,12 +381,15 @@ private:
     struct MergeProjectionsStage : public IStage
     {
         bool execute() override;
+
         void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) override
         {
             ctx = static_pointer_cast<MergeProjectionsRuntimeContext>(local);
             global_ctx = static_pointer_cast<GlobalRuntimeContext>(global);
         }
-        StageRuntimeContextPtr getContextForNextStage() override { return nullptr; }
+
+        StageRuntimeContextPtr getContextForNextStage() override;
+        ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeProjectionStageTotalMilliseconds; }
 
         bool mergeMinMaxIndexAndPrepareProjections() const;
         bool executeProjections() const;
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 9a775db73e2..fe78964a241 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -38,7 +38,10 @@
 
 namespace ProfileEvents
 {
-extern const Event MutateTaskProjectionsCalculationMicroseconds;
+    extern const Event MutationTotalParts;
+    extern const Event MutationUntouchedParts;
+    extern const Event MutationTimeMilliseconds;
+    extern const Event MutateTaskProjectionsCalculationMicroseconds;
 }
 
 namespace CurrentMetrics
@@ -2034,6 +2037,9 @@ bool MutateTask::execute()
             if (task->executeStep())
                 return true;
 
+            auto total_elapsed_ms = (*ctx->mutate_entry)->watch.elapsedMilliseconds();
+            ProfileEvents::increment(ProfileEvents::MutationTimeMilliseconds, total_elapsed_ms);
+
             // The `new_data_part` is a shared pointer and must be moved to allow
             // part deletion in case it is needed in `MutateFromLogEntryTask::finalize`.
             //
@@ -2118,6 +2124,7 @@ bool MutateTask::prepare()
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to mutate {} parts, not one. "
             "This is a bug.", ctx->future_part->parts.size());
 
+    ProfileEvents::increment(ProfileEvents::MutationTotalParts);
     ctx->num_mutations = std::make_unique<CurrentMetrics::Increment>(CurrentMetrics::PartMutation);
 
     auto context_for_reading = Context::createCopy(ctx->context);
@@ -2174,6 +2181,7 @@ bool MutateTask::prepare()
             ctx->temporary_directory_lock = std::move(lock);
         }
 
+        ProfileEvents::increment(ProfileEvents::MutationUntouchedParts);
         promise.set_value(std::move(part));
         return false;
     }

From cab274e1b696e8e355066cce3b05d4337c486157 Mon Sep 17 00:00:00 2001
From: kruglov <kruglov@intaro.ru>
Date: Fri, 2 Aug 2024 10:46:56 +0300
Subject: [PATCH 170/265] Fixed error on generated columns in
 MaterializedPostgreSQL

---
 .../fetchPostgreSQLTableStructure.cpp         | 34 +++++++++-----
 .../fetchPostgreSQLTableStructure.h           |  1 +
 .../test.py                                   | 44 ++++++++++++++++++-
 3 files changed, 67 insertions(+), 12 deletions(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index 943f3ae502e..e2f2358c892 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -196,7 +196,7 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
             }
             else
             {
-                std::tuple<std::string, std::string, std::string, uint16_t, std::string, std::string, std::string> row;
+                std::tuple<std::string, std::string, std::string, uint16_t, std::string, std::string, std::string, std::string> row;
                 while (stream >> row)
                 {
                     const auto column_name = std::get<0>(row);
@@ -206,13 +206,14 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
                         std::get<3>(row));
 
                     columns.push_back(NameAndTypePair(column_name, data_type));
-                    auto attgenerated = std::get<6>(row);
+                    auto attgenerated = std::get<7>(row);
 
                     attributes.emplace(
                         column_name,
                         PostgreSQLTableStructure::PGAttribute{
                             .atttypid = parse<int>(std::get<4>(row)),
                             .atttypmod = parse<int>(std::get<5>(row)),
+                            .attnum = parse<int>(std::get<6>(row)),
                             .atthasdef = false,
                             .attgenerated = attgenerated.empty() ? char{} : char(attgenerated[0]),
                             .attr_def = {}
@@ -308,6 +309,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
            "attndims AS dims, " /// array dimensions
            "atttypid as type_id, "
            "atttypmod as type_modifier, "
+           "attnum as att_num, "
            "attgenerated as generated " /// if column has GENERATED
            "FROM pg_attribute "
            "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) "
@@ -338,17 +340,29 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
             "WHERE adrelid = (SELECT oid FROM pg_class WHERE {});", where);
 
         pqxx::result result{tx.exec(attrdef_query)};
-        for (const auto row : result)
+        if (static_cast<uint64_t>(result.size()) > table.physical_columns->names.size())
         {
-            size_t adnum = row[0].as<int>();
-            if (!adnum || adnum > table.physical_columns->names.size())
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "Received {} attrdef, but currently fetched columns list has {} columns",
+                            result.size(), table.physical_columns->attributes.size());
+        }
+
+        for (const auto & column_attrs : table.physical_columns->attributes)
+        {
+            if (column_attrs.second.attgenerated != 's')
             {
-                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                                "Received adnum {}, but currently fetched columns list has {} columns",
-                                adnum, table.physical_columns->attributes.size());
+                continue;
+            }
+
+            for (const auto row : result)
+            {
+                int adnum = row[0].as<int>();
+                if (column_attrs.second.attnum == adnum)
+                {
+                    table.physical_columns->attributes.at(column_attrs.first).attr_def = row[1].as<std::string>();
+                    break;
+                }
             }
-            const auto column_name = table.physical_columns->names[adnum - 1];
-            table.physical_columns->attributes.at(column_name).attr_def = row[1].as<std::string>();
         }
     }
 
diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
index 81bf7b278fc..25ece6909fd 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
@@ -16,6 +16,7 @@ struct PostgreSQLTableStructure
     {
         Int32 atttypid;
         Int32 atttypmod;
+        Int32 attnum;
         bool atthasdef;
         char attgenerated;
         std::string attr_def;
diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py
index 406b50bc486..75edb22aab1 100644
--- a/tests/integration/test_postgresql_replica_database_engine_2/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py
@@ -953,12 +953,14 @@ def test_generated_columns(started_cluster):
         "",
         f"""CREATE TABLE {table} (
              key integer PRIMARY KEY,
-             x integer,
+             x integer DEFAULT 0,
+             temp integer DEFAULT 0,
              y integer GENERATED ALWAYS AS (x*2) STORED,
-             z text);
+             z text DEFAULT 'z');
          """,
     )
 
+    pg_manager.execute(f"alter table {table} drop column temp;")
     pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');")
     pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');")
 
@@ -991,6 +993,44 @@ def test_generated_columns(started_cluster):
     )
 
 
+def test_generated_columns_with_sequence(started_cluster):
+    table = "test_generated_columns_with_sequence"
+
+    pg_manager.create_postgres_table(
+        table,
+        "",
+        f"""CREATE TABLE {table} (
+             key integer PRIMARY KEY,
+             x integer,
+             y integer GENERATED ALWAYS AS (x*2) STORED,
+             z text);
+         """,
+    )
+
+    pg_manager.execute(
+        f"create sequence {table}_id_seq increment by 1 minvalue 1 start 1;"
+    )
+    pg_manager.execute(
+        f"alter table {table} alter key set default nextval('{table}_id_seq');"
+    )
+    pg_manager.execute(f"insert into {table} (key, x, z) values (1,1,'1');")
+    pg_manager.execute(f"insert into {table} (key, x, z) values (2,2,'2');")
+
+    pg_manager.create_materialized_db(
+        ip=started_cluster.postgres_ip,
+        port=started_cluster.postgres_port,
+        settings=[
+            f"materialized_postgresql_tables_list = '{table}'",
+            "materialized_postgresql_backoff_min_ms = 100",
+            "materialized_postgresql_backoff_max_ms = 100",
+        ],
+    )
+
+    check_tables_are_synchronized(
+        instance, table, postgres_database=pg_manager.get_default_database()
+    )
+
+
 def test_default_columns(started_cluster):
     table = "test_default_columns"
 

From 72bc5cd2e99cee09d0e003fb75192c0bb3114bad Mon Sep 17 00:00:00 2001
From: Kruglov Kirill <ion44@yandex.ru>
Date: Mon, 5 Aug 2024 16:10:27 +0300
Subject: [PATCH 171/265] Update
 src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp

Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
---
 src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
index e2f2358c892..b9fd9c325f8 100644
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@@ -349,7 +349,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
 
         for (const auto & column_attrs : table.physical_columns->attributes)
         {
-            if (column_attrs.second.attgenerated != 's')
+            if (column_attrs.second.attgenerated != 's') /// e.g. not a generated column
             {
                 continue;
             }

From 05febdfb2bdfa78f2d017758ce2261fb554e9546 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 8 Aug 2024 13:47:44 +0000
Subject: [PATCH 172/265] add more events and add tests

---
 src/Common/ProfileEvents.cpp                  |  9 +-
 .../Transforms/ColumnGathererTransform.h      |  1 -
 .../Transforms/MergeJoinTransform.cpp         |  3 +-
 .../Transforms/MergeJoinTransform.h           |  1 +
 .../Transforms/PasteJoinTransform.cpp         |  2 +-
 .../Transforms/PasteJoinTransform.h           |  1 +
 src/Storages/MergeTree/MergeList.h            |  1 -
 src/Storages/MergeTree/MergeTask.cpp          | 16 ++--
 src/Storages/MergeTree/MergeTask.h            |  2 +-
 .../MergeTree/MutateFromLogEntryTask.cpp      |  2 +
 .../MergeTree/MutatePlainMergeTreeTask.cpp    |  2 +
 src/Storages/MergeTree/MutateTask.cpp         | 25 ++++--
 src/Storages/MergeTree/MutateTask.h           |  1 +
 .../02378_part_log_profile_events.sql         |  2 +-
 .../03221_merge_profile_events.reference      |  3 +
 .../03221_merge_profile_events.sql            | 88 +++++++++++++++++++
 .../03221_mutate_profile_events.reference     |  2 +
 .../03221_mutate_profile_events.sql           | 33 +++++++
 18 files changed, 174 insertions(+), 20 deletions(-)
 create mode 100644 tests/queries/0_stateless/03221_merge_profile_events.reference
 create mode 100644 tests/queries/0_stateless/03221_merge_profile_events.sql
 create mode 100644 tests/queries/0_stateless/03221_mutate_profile_events.reference
 create mode 100644 tests/queries/0_stateless/03221_mutate_profile_events.sql

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 857a08d8a5d..d43d9fdcea8 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -209,6 +209,8 @@
     \
     M(Merge, "Number of launched background merges.") \
     M(MergedRows, "Rows read for background merges. This is the number of rows before merge.") \
+    M(MergedColumns, "Number of columns merged during the horizontal stage of merges.") \
+    M(GatheredColumns, "Number of columns gathered during the vertical stage of merges.") \
     M(MergedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for background merges. This is the number before merge.") \
     M(MergeTotalMilliseconds, "Total time spent for background merges") \
     M(MergeExecuteMilliseconds, "Total busy time spent for execution of background merges") \
@@ -231,8 +233,11 @@
     M(MutationUntouchedParts, "Number of total parts for which mutations tried to be applied but which was completely skipped according to predicate") \
     M(MutatedRows, "Rows read for mutations. This is the number of rows before mutation") \
     M(MutatedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for mutations. This is the number before mutation.") \
-    M(MutationTimeMilliseconds, "Total time spent for mutations.") \
-    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
+    M(MutationTotalMilliseconds, "Total time spent for mutations.") \
+    M(MutationExecuteMilliseconds, "Total busy time spent for execution of mutations.") \
+    M(MutationAllPartColumns, "Number of times when task to mutate all columns in part was created") \
+    M(MutationSomePartColumns, "Number of times when task to mutate some columns in part was created") \
+    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections in mutations.") \
     \
     M(MergeTreeDataWriterRows, "Number of rows INSERTed to MergeTree tables.") \
     M(MergeTreeDataWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables.") \
diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h
index a535b2669d0..fbc9a6bfcc6 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.h
+++ b/src/Processors/Transforms/ColumnGathererTransform.h
@@ -2,7 +2,6 @@
 
 #include <IO/ReadBuffer.h>
 #include <Common/PODArray.h>
-#include "base/types.h"
 #include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
 #include <Processors/Merges/IMergingTransform.h>
 
diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp
index 26601207da8..ec7f567ea57 100644
--- a/src/Processors/Transforms/MergeJoinTransform.cpp
+++ b/src/Processors/Transforms/MergeJoinTransform.cpp
@@ -515,7 +515,7 @@ IMergingAlgorithm::MergedStats MergeJoinAlgorithm::getMergedStats() const
 {
     return
     {
-        .bytes = 0,
+        .bytes = stat.num_bytes[0] + stat.num_bytes[1],
         .rows = stat.num_rows[0] + stat.num_rows[1],
         .blocks = stat.num_blocks[0] + stat.num_blocks[1],
     };
@@ -557,6 +557,7 @@ void MergeJoinAlgorithm::consume(Input & input, size_t source_num)
     {
         stat.num_blocks[source_num] += 1;
         stat.num_rows[source_num] += input.chunk.getNumRows();
+        stat.num_bytes[source_num] += input.chunk.allocatedBytes();
     }
 
     prepareChunk(input.chunk);
diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h
index 841a3f15a92..8f74974af0f 100644
--- a/src/Processors/Transforms/MergeJoinTransform.h
+++ b/src/Processors/Transforms/MergeJoinTransform.h
@@ -282,6 +282,7 @@ private:
     {
         size_t num_blocks[2] = {0, 0};
         size_t num_rows[2] = {0, 0};
+        size_t num_bytes[2] = {0, 0};
 
         size_t max_blocks_loaded = 0;
     };
diff --git a/src/Processors/Transforms/PasteJoinTransform.cpp b/src/Processors/Transforms/PasteJoinTransform.cpp
index ad01b721726..982a347a70f 100644
--- a/src/Processors/Transforms/PasteJoinTransform.cpp
+++ b/src/Processors/Transforms/PasteJoinTransform.cpp
@@ -62,7 +62,7 @@ IMergingAlgorithm::MergedStats PasteJoinAlgorithm::getMergedStats() const
 {
     return
     {
-        .bytes = 0,
+        .bytes = stat.num_bytes[0] + stat.num_bytes[1],
         .rows = stat.num_rows[0] + stat.num_rows[1],
         .blocks = stat.num_blocks[0] + stat.num_blocks[1],
     };
diff --git a/src/Processors/Transforms/PasteJoinTransform.h b/src/Processors/Transforms/PasteJoinTransform.h
index fbe85f6993b..c184f20362d 100644
--- a/src/Processors/Transforms/PasteJoinTransform.h
+++ b/src/Processors/Transforms/PasteJoinTransform.h
@@ -54,6 +54,7 @@ private:
     {
         size_t num_blocks[2] = {0, 0};
         size_t num_rows[2] = {0, 0};
+        size_t num_bytes[2] = {0, 0};
 
         size_t max_blocks_loaded = 0;
     };
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 3a96ba0abae..d40af6abf43 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -6,7 +6,6 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/MemoryTracker.h>
 #include <Common/ThreadStatus.h>
-#include "base/types.h"
 #include <Storages/MergeTree/MergeType.h>
 #include <Storages/MergeTree/MergeAlgorithm.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 5f178f08ec3..39bac8f7c24 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -5,9 +5,6 @@
 #include <memory>
 #include <fmt/format.h>
 
-#include "Common/ElapsedTimeProfileEventIncrement.h"
-#include "Common/Logger.h"
-#include "Common/Stopwatch.h"
 #include <Common/logger_useful.h>
 #include <Common/ActionBlocker.h>
 #include <Core/Settings.h>
@@ -46,6 +43,8 @@
 namespace ProfileEvents
 {
     extern const Event Merge;
+    extern const Event MergedColumns;
+    extern const Event GatheredColumns;
     extern const Event MergeTotalMilliseconds;
     extern const Event MergeExecuteMilliseconds;
     extern const Event MergeHorizontalStageExecuteMilliseconds;
@@ -183,6 +182,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu
 
 bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
 {
+    ProfileEvents::increment(ProfileEvents::Merge);
+
     String local_tmp_prefix;
     if (global_ctx->need_prefix)
     {
@@ -200,8 +201,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
     if (isTTLMergeType(global_ctx->future_part->merge_type) && global_ctx->ttl_merges_blocker->isCancelled())
         throw Exception(ErrorCodes::ABORTED, "Cancelled merging parts with TTL");
 
-    ProfileEvents::increment(ProfileEvents::Merge);
-
     LOG_DEBUG(ctx->log, "Merging {} parts: from {} to {} into {} with storage {}",
         global_ctx->future_part->parts.size(),
         global_ctx->future_part->parts.front()->name,
@@ -810,6 +809,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
 
     /// Print overall profiling info. NOTE: it may duplicates previous messages
     {
+        ProfileEvents::increment(ProfileEvents::MergedColumns, global_ctx->merging_columns.size());
+        ProfileEvents::increment(ProfileEvents::GatheredColumns, global_ctx->gathering_columns.size());
+
         double elapsed_seconds = global_ctx->merge_list_element_ptr->watch.elapsedSeconds();
         LOG_DEBUG(ctx->log,
             "Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.",
@@ -1021,8 +1023,8 @@ bool MergeTask::execute()
     /// Stage is finished, need to initialize context for the next stage and update profile events.
 
     UInt64 current_elapsed_ms = global_ctx->merge_list_element_ptr->watch.elapsedMilliseconds();
-    UInt64 stage_elapsed_ms = current_elapsed_ms - global_ctx->prev_elapesed_ms;
-    global_ctx->prev_elapesed_ms = current_elapsed_ms;
+    UInt64 stage_elapsed_ms = current_elapsed_ms - global_ctx->prev_elapsed_ms;
+    global_ctx->prev_elapsed_ms = current_elapsed_ms;
 
     ProfileEvents::increment(current_stage->getTotalTimeProfileEvent(), stage_elapsed_ms);
     ProfileEvents::increment(ProfileEvents::MergeTotalMilliseconds, stage_elapsed_ms);
diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h
index 979c85482e5..38ccc287187 100644
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@@ -203,7 +203,7 @@ private:
         bool need_prefix;
 
         scope_guard temporary_directory_lock;
-        UInt64 prev_elapesed_ms{0};
+        UInt64 prev_elapsed_ms{0};
     };
 
     using GlobalRuntimeContextPtr = std::shared_ptr<GlobalRuntimeContext>;
diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
index 73084f487b9..56f68fd265a 100644
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@@ -254,6 +254,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit
             LOG_ERROR(log, "{}. Data after mutation is not byte-identical to data on another replicas. "
                            "We will download merged part from replica to force byte-identical result.", getCurrentExceptionMessage(false));
 
+            mutate_task->updateProfileEvents();
             write_part_log(ExecutionStatus::fromCurrentException("", true));
 
             if (storage.getSettings()->detach_not_byte_identical_parts)
@@ -281,6 +282,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit
          */
     finish_callback = [storage_ptr = &storage]() { storage_ptr->merge_selecting_task->schedule(); };
     ProfileEvents::increment(ProfileEvents::ReplicatedPartMutations);
+    mutate_task->updateProfileEvents();
     write_part_log({});
 
     return true;
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 9aec074deae..10461eb5942 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -102,6 +102,7 @@ bool MutatePlainMergeTreeTask::executeStep()
                 transaction.commit();
 
                 storage.updateMutationEntriesErrors(future_part, true, "");
+                mutate_task->updateProfileEvents();
                 write_part_log({});
 
                 state = State::NEED_FINISH;
@@ -114,6 +115,7 @@ bool MutatePlainMergeTreeTask::executeStep()
                 PreformattedMessage exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false);
                 LOG_ERROR(getLogger("MutatePlainMergeTreeTask"), exception_message);
                 storage.updateMutationEntriesErrors(future_part, false, exception_message.text);
+                mutate_task->updateProfileEvents();
                 write_part_log(ExecutionStatus::fromCurrentException("", true));
                 tryLogCurrentException(__PRETTY_FUNCTION__);
                 return false;
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index fe78964a241..f4af38d3b67 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -40,7 +40,10 @@ namespace ProfileEvents
 {
     extern const Event MutationTotalParts;
     extern const Event MutationUntouchedParts;
-    extern const Event MutationTimeMilliseconds;
+    extern const Event MutationTotalMilliseconds;
+    extern const Event MutationExecuteMilliseconds;
+    extern const Event MutationAllPartColumns;
+    extern const Event MutationSomePartColumns;
     extern const Event MutateTaskProjectionsCalculationMicroseconds;
 }
 
@@ -1049,6 +1052,7 @@ struct MutationContext
 
     /// Whether we need to count lightweight delete rows in this mutation
     bool count_lightweight_deleted_rows;
+    UInt64 execute_elapsed_ns = 0;
 };
 
 using MutationContextPtr = std::shared_ptr<MutationContext>;
@@ -2020,6 +2024,9 @@ MutateTask::MutateTask(
 
 bool MutateTask::execute()
 {
+    Stopwatch watch;
+    SCOPE_EXIT({ ctx->execute_elapsed_ns += watch.elapsedNanoseconds(); });
+
     switch (state)
     {
         case State::NEED_PREPARE:
@@ -2037,9 +2044,6 @@ bool MutateTask::execute()
             if (task->executeStep())
                 return true;
 
-            auto total_elapsed_ms = (*ctx->mutate_entry)->watch.elapsedMilliseconds();
-            ProfileEvents::increment(ProfileEvents::MutationTimeMilliseconds, total_elapsed_ms);
-
             // The `new_data_part` is a shared pointer and must be moved to allow
             // part deletion in case it is needed in `MutateFromLogEntryTask::finalize`.
             //
@@ -2056,6 +2060,15 @@ bool MutateTask::execute()
     return false;
 }
 
+void MutateTask::updateProfileEvents() const
+{
+    UInt64 total_elapsed_ms = (*ctx->mutate_entry)->watch.elapsedMilliseconds();
+    UInt64 execute_elapsed_ms = ctx->execute_elapsed_ns / 1000000UL;
+
+    ProfileEvents::increment(ProfileEvents::MutationTotalMilliseconds, total_elapsed_ms);
+    ProfileEvents::increment(ProfileEvents::MutationExecuteMilliseconds, execute_elapsed_ms);
+}
+
 static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const MutationCommand & command)
 {
     if (command.type != MutationCommand::READ_COLUMN)
@@ -2118,13 +2131,13 @@ static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, con
 
 bool MutateTask::prepare()
 {
+    ProfileEvents::increment(ProfileEvents::MutationTotalParts);
     MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry);
 
     if (ctx->future_part->parts.size() != 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to mutate {} parts, not one. "
             "This is a bug.", ctx->future_part->parts.size());
 
-    ProfileEvents::increment(ProfileEvents::MutationTotalParts);
     ctx->num_mutations = std::make_unique<CurrentMetrics::Increment>(CurrentMetrics::PartMutation);
 
     auto context_for_reading = Context::createCopy(ctx->context);
@@ -2291,6 +2304,7 @@ bool MutateTask::prepare()
         ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;
 
         task = std::make_unique<MutateAllPartColumnsTask>(ctx);
+        ProfileEvents::increment(ProfileEvents::MutationAllPartColumns);
     }
     else /// TODO: check that we modify only non-key columns in this case.
     {
@@ -2330,6 +2344,7 @@ bool MutateTask::prepare()
         ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER;
 
         task = std::make_unique<MutateSomePartColumnsTask>(ctx);
+        ProfileEvents::increment(ProfileEvents::MutationSomePartColumns);
     }
 
     return true;
diff --git a/src/Storages/MergeTree/MutateTask.h b/src/Storages/MergeTree/MutateTask.h
index dc22b90f0e9..08427bff6d8 100644
--- a/src/Storages/MergeTree/MutateTask.h
+++ b/src/Storages/MergeTree/MutateTask.h
@@ -39,6 +39,7 @@ public:
         bool need_prefix_);
 
     bool execute();
+    void updateProfileEvents() const;
 
     std::future<MergeTreeData::MutableDataPartPtr> getFuture()
     {
diff --git a/tests/queries/0_stateless/02378_part_log_profile_events.sql b/tests/queries/0_stateless/02378_part_log_profile_events.sql
index 38d3f8b4c05..eec76d6f50e 100644
--- a/tests/queries/0_stateless/02378_part_log_profile_events.sql
+++ b/tests/queries/0_stateless/02378_part_log_profile_events.sql
@@ -39,7 +39,7 @@ SYSTEM FLUSH LOGS;
 
 SELECT
     if(count() == 2, 'Ok', 'Error: ' || toString(count())),
-    if(SUM(ProfileEvents['MergedRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MergedRows']))),
+    if(SUM(ProfileEvents['MutatedRows']) == 512, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['MutatedRows']))),
     if(SUM(ProfileEvents['FileOpen']) > 1, 'Ok', 'Error: ' || toString(SUM(ProfileEvents['FileOpen'])))
 FROM system.part_log
 WHERE event_time > now() - INTERVAL 10 MINUTE
diff --git a/tests/queries/0_stateless/03221_merge_profile_events.reference b/tests/queries/0_stateless/03221_merge_profile_events.reference
new file mode 100644
index 00000000000..729e53eae79
--- /dev/null
+++ b/tests/queries/0_stateless/03221_merge_profile_events.reference
@@ -0,0 +1,3 @@
+Horizontal	1	20000	3	0	480000	1	1	1	1
+Vertical	1	20000	1	2	480000	1	1	1	1	1	1
+Vertical	2	20020	4	2	480660	1	1	1	1	1	1	1	1
diff --git a/tests/queries/0_stateless/03221_merge_profile_events.sql b/tests/queries/0_stateless/03221_merge_profile_events.sql
new file mode 100644
index 00000000000..787aff93ffc
--- /dev/null
+++ b/tests/queries/0_stateless/03221_merge_profile_events.sql
@@ -0,0 +1,88 @@
+-- Tags: no-random-settings, no-random-merge-tree-settings
+
+DROP TABLE IF EXISTS t_merge_profile_events_1;
+
+CREATE TABLE t_merge_profile_events_1 (id UInt64, v1 UInt64, v2 UInt64)
+ENGINE = MergeTree ORDER BY id
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO t_merge_profile_events_1 SELECT number, number, number FROM numbers(10000);
+INSERT INTO t_merge_profile_events_1 SELECT number, number, number FROM numbers(10000);
+
+OPTIMIZE TABLE t_merge_profile_events_1 FINAL;
+SYSTEM FLUSH LOGS;
+
+SELECT
+    merge_algorithm,
+    ProfileEvents['Merge'],
+    ProfileEvents['MergedRows'],
+    ProfileEvents['MergedColumns'],
+    ProfileEvents['GatheredColumns'],
+    ProfileEvents['MergedUncompressedBytes'],
+    ProfileEvents['MergeTotalMilliseconds'] > 0,
+    ProfileEvents['MergeExecuteMilliseconds'] > 0,
+    ProfileEvents['MergeHorizontalStageTotalMilliseconds'] > 0,
+    ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0
+FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_1' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1';
+
+DROP TABLE IF EXISTS t_merge_profile_events_1;
+
+DROP TABLE IF EXISTS t_merge_profile_events_2;
+
+CREATE TABLE t_merge_profile_events_2 (id UInt64, v1 UInt64, v2 UInt64)
+ENGINE = MergeTree ORDER BY id
+SETTINGS min_bytes_for_wide_part = 0, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1;
+
+INSERT INTO t_merge_profile_events_2 SELECT number, number, number FROM numbers(10000);
+INSERT INTO t_merge_profile_events_2 SELECT number, number, number FROM numbers(10000);
+
+OPTIMIZE TABLE t_merge_profile_events_2 FINAL;
+SYSTEM FLUSH LOGS;
+
+SELECT
+    merge_algorithm,
+    ProfileEvents['Merge'],
+    ProfileEvents['MergedRows'],
+    ProfileEvents['MergedColumns'],
+    ProfileEvents['GatheredColumns'],
+    ProfileEvents['MergedUncompressedBytes'],
+    ProfileEvents['MergeTotalMilliseconds'] > 0,
+    ProfileEvents['MergeExecuteMilliseconds'] > 0,
+    ProfileEvents['MergeHorizontalStageTotalMilliseconds'] > 0,
+    ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0,
+    ProfileEvents['MergeVerticalStageTotalMilliseconds'] > 0,
+    ProfileEvents['MergeVerticalStageExecuteMilliseconds'] > 0,
+FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_2' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1';
+
+DROP TABLE IF EXISTS t_merge_profile_events_2;
+
+DROP TABLE IF EXISTS t_merge_profile_events_3;
+
+CREATE TABLE t_merge_profile_events_3 (id UInt64, v1 UInt64, v2 UInt64, PROJECTION p (SELECT sum(v1), sum(v2) GROUP BY id % 10))
+ENGINE = MergeTree ORDER BY id
+SETTINGS min_bytes_for_wide_part = 0, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1;
+
+INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(10000);
+INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(10000);
+
+OPTIMIZE TABLE t_merge_profile_events_3 FINAL;
+SYSTEM FLUSH LOGS;
+
+SELECT
+    merge_algorithm,
+    ProfileEvents['Merge'],
+    ProfileEvents['MergedRows'],
+    ProfileEvents['MergedColumns'],
+    ProfileEvents['GatheredColumns'],
+    ProfileEvents['MergedUncompressedBytes'],
+    ProfileEvents['MergeTotalMilliseconds'] > 0,
+    ProfileEvents['MergeExecuteMilliseconds'] > 0,
+    ProfileEvents['MergeHorizontalStageTotalMilliseconds'] > 0,
+    ProfileEvents['MergeHorizontalStageExecuteMilliseconds'] > 0,
+    ProfileEvents['MergeVerticalStageTotalMilliseconds'] > 0,
+    ProfileEvents['MergeVerticalStageExecuteMilliseconds'] > 0,
+    ProfileEvents['MergeProjectionStageTotalMilliseconds'] > 0,
+    ProfileEvents['MergeProjectionStageExecuteMilliseconds'] > 0,
+FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_3' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1';
+
+DROP TABLE IF EXISTS t_merge_profile_events_3;
diff --git a/tests/queries/0_stateless/03221_mutate_profile_events.reference b/tests/queries/0_stateless/03221_mutate_profile_events.reference
new file mode 100644
index 00000000000..d094c37ff88
--- /dev/null
+++ b/tests/queries/0_stateless/03221_mutate_profile_events.reference
@@ -0,0 +1,2 @@
+3	2	1	10000	160000	0	1	1	1
+4	2	1	10000	320000	1	0	1	1
diff --git a/tests/queries/0_stateless/03221_mutate_profile_events.sql b/tests/queries/0_stateless/03221_mutate_profile_events.sql
new file mode 100644
index 00000000000..e9f7f9670bd
--- /dev/null
+++ b/tests/queries/0_stateless/03221_mutate_profile_events.sql
@@ -0,0 +1,33 @@
+-- Tags: no-random-settings, no-random-merge-tree-settings
+
+DROP TABLE IF EXISTS t_mutate_profile_events;
+
+CREATE TABLE t_mutate_profile_events (key UInt64, id UInt64, v1 UInt64, v2 UInt64)
+ENGINE = MergeTree ORDER BY id PARTITION BY key
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO t_mutate_profile_events SELECT 1, number, number, number FROM numbers(10000);
+INSERT INTO t_mutate_profile_events SELECT 2, number, number, number FROM numbers(10000);
+
+SET mutations_sync = 2;
+
+ALTER TABLE t_mutate_profile_events UPDATE v1 = 1000 WHERE key = 1;
+ALTER TABLE t_mutate_profile_events DELETE WHERE key = 2 AND v2 % 10 = 0;
+
+SYSTEM FLUSH LOGS;
+
+SELECT
+    splitByChar('_', part_name)[-1] AS version,
+    sum(ProfileEvents['MutationTotalParts']),
+    sum(ProfileEvents['MutationUntouchedParts']),
+    sum(ProfileEvents['MutatedRows']),
+    sum(ProfileEvents['MutatedUncompressedBytes']),
+    sum(ProfileEvents['MutationAllPartColumns']),
+    sum(ProfileEvents['MutationSomePartColumns']),
+    sum(ProfileEvents['MutationTotalMilliseconds']) > 0,
+    sum(ProfileEvents['MutationExecuteMilliseconds']) > 0,
+FROM system.part_log
+WHERE database = currentDatabase() AND table = 't_mutate_profile_events' AND event_type = 'MutatePart'
+GROUP BY version ORDER BY version;
+
+DROP TABLE IF EXISTS t_mutate_profile_events

From 1e2eea9f6333b165b1b15acef5f489ad067a57f3 Mon Sep 17 00:00:00 2001
From: kruglov <ion44@yandex.ru>
Date: Fri, 9 Aug 2024 10:16:15 +0300
Subject: [PATCH 173/265] Fixed errors when publication name contents symbols
 except [a-z_]

---
 src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
index f632e553a0d..01f78673ed8 100644
--- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
+++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
@@ -659,7 +659,7 @@ void PostgreSQLReplicationHandler::dropReplicationSlot(pqxx::nontransaction & tx
 
 void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx)
 {
-    std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", publication_name);
+    std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", doubleQuoteString(publication_name));
     tx.exec(query_str);
     LOG_DEBUG(log, "Dropped publication: {}", publication_name);
 }
@@ -667,7 +667,7 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx)
 
 void PostgreSQLReplicationHandler::addTableToPublication(pqxx::nontransaction & ntx, const String & table_name)
 {
-    std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name));
+    std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", doubleQuoteString(publication_name), doubleQuoteWithSchema(table_name));
     ntx.exec(query_str);
     LOG_TRACE(log, "Added table {} to publication `{}`", doubleQuoteWithSchema(table_name), publication_name);
 }

From dccb6bdd88ef26244ddb1c9de8d1232140036294 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 9 Aug 2024 18:33:05 +0800
Subject: [PATCH 174/265] fix failed uts

---
 .../Formats/Impl/ORCBlockOutputFormat.cpp     | 47 +++++++------------
 .../Formats/Impl/ORCBlockOutputFormat.h       |  5 --
 2 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index bd89ae0fa86..4a7a23158ff 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -280,20 +280,28 @@ void ORCBlockOutputFormat::writeColumn(
     DataTypePtr & type,
     const PaddedPODArray<UInt8> * null_bytemap)
 {
-    orc_column.numElements = column.size();
+    size_t rows = column.size();
+    orc_column.resize(rows);
+    orc_column.numElements = rows;
+
+    /// Calculate orc_column.hasNulls
     if (null_bytemap)
-    {
         orc_column.hasNulls = !memoryIsZero(null_bytemap->data(), 0, null_bytemap->size());
-        if (orc_column.hasNulls)
-        {
-            orc_column.notNull.resize(null_bytemap->size());
-            for (size_t i = 0; i < null_bytemap->size(); ++i)
-                orc_column.notNull[i] = !(*null_bytemap)[i];
-        }
-    }
     else
         orc_column.hasNulls = false;
 
+    /// Fill orc_column.notNull
+    if (orc_column.hasNulls)
+    {
+        for (size_t i = 0; i < rows; ++i)
+            orc_column.notNull[i] = !(*null_bytemap)[i];
+    }
+    else
+    {
+        for (size_t i = 0; i < rows; ++i)
+            orc_column.notNull[i] = 1;
+    }
+
     /// ORC doesn't have unsigned types, so cast everything to signed and sign-extend to Int64 to
     /// make the ORC library calculate min and max correctly.
     switch (type->getTypeId())
@@ -516,27 +524,6 @@ void ORCBlockOutputFormat::writeColumn(
     }
 }
 
-size_t ORCBlockOutputFormat::getColumnSize(const IColumn & column, DataTypePtr & type)
-{
-    if (type->getTypeId() == TypeIndex::Array)
-    {
-        auto nested_type = assert_cast<const DataTypeArray &>(*type).getNestedType();
-        const IColumn & nested_column = assert_cast<const ColumnArray &>(column).getData();
-        return std::max(column.size(), getColumnSize(nested_column, nested_type));
-    }
-
-    return column.size();
-}
-
-size_t ORCBlockOutputFormat::getMaxColumnSize(Chunk & chunk)
-{
-    size_t columns_num = chunk.getNumColumns();
-    size_t max_column_size = 0;
-    for (size_t i = 0; i != columns_num; ++i)
-        max_column_size = std::max(max_column_size, getColumnSize(*chunk.getColumns()[i], data_types[i]));
-    return max_column_size;
-}
-
 void ORCBlockOutputFormat::consume(Chunk chunk)
 {
     if (!writer)
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
index 28837193d1a..06ecac9b820 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h
@@ -69,11 +69,6 @@ private:
 
     void writeColumn(orc::ColumnVectorBatch & orc_column, const IColumn & column, DataTypePtr & type, const PaddedPODArray<UInt8> * null_bytemap);
 
-    /// These two functions are needed to know maximum nested size of arrays to
-    /// create an ORC Batch with the appropriate size
-    size_t getColumnSize(const IColumn & column, DataTypePtr & type);
-    size_t getMaxColumnSize(Chunk & chunk);
-
     void prepareWriter();
 
     const FormatSettings format_settings;

From b757522fc4ac545451acc398ab230323fb7c0fd3 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 9 Aug 2024 14:20:57 +0100
Subject: [PATCH 175/265] fix build

---
 programs/keeper/Keeper.cpp              |  2 +-
 src/Server/HTTPHandlerFactory.cpp       | 41 ++-----------------------
 src/Server/PrometheusRequestHandler.cpp |  4 +--
 src/Server/PrometheusRequestHandler.h   |  7 +++--
 4 files changed, 10 insertions(+), 44 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index ae51a62ff9c..a447a9e50f6 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -515,7 +515,7 @@ try
                     "Prometheus: http://" + address.toString(),
                     std::make_unique<HTTPServer>(
                         std::move(my_http_context),
-                        createKeeperPrometheusHandlerFactory(config_getter(), async_metrics, "PrometheusHandler-factory"),
+                        createKeeperPrometheusHandlerFactory(*this, config_getter(), async_metrics, "PrometheusHandler-factory"),
                         server_pool,
                         socket,
                         http_params));
diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp
index 0ee45783d52..fc31ad2874e 100644
--- a/src/Server/HTTPHandlerFactory.cpp
+++ b/src/Server/HTTPHandlerFactory.cpp
@@ -122,7 +122,8 @@ static inline auto createHandlersFactoryFromConfig(
             }
             else if (handler_type == "prometheus")
             {
-                main_handler_factory->addHandler(createPrometheusHandlerFactoryForHTTPRule(config, prefix + "." + key, async_metrics));
+                main_handler_factory->addHandler(
+                    createPrometheusHandlerFactoryForHTTPRule(server, config, prefix + "." + key, async_metrics));
             }
             else if (handler_type == "replicas_status")
             {
@@ -199,19 +200,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco::
     else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory")
         return createInterserverHTTPHandlerFactory(server, name);
     else if (name == "PrometheusHandler-factory")
-<<<<<<< HEAD
-    {
-        auto metrics_writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
-        return createPrometheusMainHandlerFactory(config, metrics_writer, name);
-    }
-||||||| 02b8d563e3a
-    {
-        auto metrics_writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
-        return createPrometheusMainHandlerFactory(server, config, metrics_writer, name);
-    }
-=======
         return createPrometheusHandlerFactory(server, config, async_metrics, name);
->>>>>>> master
 
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown HTTP handler factory name.");
 }
@@ -298,34 +287,8 @@ void addDefaultHandlersFactory(
     );
     factory.addHandler(query_handler);
 
-<<<<<<< HEAD
-    /// We check that prometheus handler will be served on current (default) port.
-    /// Otherwise it will be created separately, see createHandlerFactory(...).
-    if (config.has("prometheus") && config.getInt("prometheus.port", 0) == 0)
-    {
-        auto writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
-        auto creator
-            = [writer]() -> std::unique_ptr<PrometheusRequestHandler> { return std::make_unique<PrometheusRequestHandler>(writer); };
-        auto prometheus_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(std::move(creator));
-        prometheus_handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics"));
-        prometheus_handler->allowGetAndHeadRequest();
-||||||| 02b8d563e3a
-    /// We check that prometheus handler will be served on current (default) port.
-    /// Otherwise it will be created separately, see createHandlerFactory(...).
-    if (config.has("prometheus") && config.getInt("prometheus.port", 0) == 0)
-    {
-        auto writer = std::make_shared<PrometheusMetricsWriter>(config, "prometheus", async_metrics);
-        auto creator = [&server, writer] () -> std::unique_ptr<PrometheusRequestHandler>
-        {
-            return std::make_unique<PrometheusRequestHandler>(server, writer);
-        };
-        auto prometheus_handler = std::make_shared<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(std::move(creator));
-        prometheus_handler->attachStrictPath(config.getString("prometheus.endpoint", "/metrics"));
-        prometheus_handler->allowGetAndHeadRequest();
-=======
     /// createPrometheusHandlerFactoryForHTTPRuleDefaults() can return nullptr if prometheus protocols must not be served on http port.
     if (auto prometheus_handler = createPrometheusHandlerFactoryForHTTPRuleDefaults(server, config, async_metrics))
->>>>>>> master
         factory.addHandler(prometheus_handler);
 }
 
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index 52cda92d9b4..ae1fb6d629e 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -95,7 +95,7 @@ public:
 class PrometheusRequestHandler::ImplWithContext : public Impl
 {
 public:
-    explicit ImplWithContext(PrometheusRequestHandler & parent) : Impl(parent), default_settings(parent.server.context()->getSettingsRef()) { }
+    explicit ImplWithContext(PrometheusRequestHandler & parent) : Impl(parent), default_settings(server().context()->getSettingsRef()) { }
 
     virtual void handlingRequestWithContext(HTTPServerRequest & request, HTTPServerResponse & response) = 0;
 
@@ -353,7 +353,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
         if (request.getVersion() == HTTPServerRequest::HTTP_1_1)
             response.setChunkedTransferEncoding(true);
 
-     setResponseDefaultHeaders(response);
+        setResponseDefaultHeaders(response);
 
         impl->beforeHandlingRequest(request);
         impl->handleRequest(request, response);
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index 7aeed11d6b8..281ecf5260e 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -15,8 +15,11 @@ class WriteBufferFromHTTPServerResponse;
 class PrometheusRequestHandler : public HTTPRequestHandler
 {
 public:
-    PrometheusRequestHandler(const PrometheusRequestHandlerConfig & config_,
-                             const AsynchronousMetrics & async_metrics_, std::shared_ptr<PrometheusMetricsWriter> metrics_writer_);
+    PrometheusRequestHandler(
+        IServer & server_,
+        const PrometheusRequestHandlerConfig & config_,
+        const AsynchronousMetrics & async_metrics_,
+        std::shared_ptr<PrometheusMetricsWriter> metrics_writer_);
     ~PrometheusRequestHandler() override;
 
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event_) override;

From 43a38fb5f0563f50d38cf5d988db3b181b64f606 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Fri, 9 Aug 2024 15:11:08 +0100
Subject: [PATCH 176/265] rm redundant file

---
 programs/server/config.d/listen.xml | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 programs/server/config.d/listen.xml

diff --git a/programs/server/config.d/listen.xml b/programs/server/config.d/listen.xml
deleted file mode 100644
index f94e5c88568..00000000000
--- a/programs/server/config.d/listen.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-<clickhouse>
-    <listen_host>::</listen_host>
-</clickhouse>

From 97eded0ac7aa41a9320729b418c8ab2ff1821202 Mon Sep 17 00:00:00 2001
From: kruglov <ion44@yandex.ru>
Date: Fri, 9 Aug 2024 17:38:24 +0300
Subject: [PATCH 177/265] Fixed test_dependent_loading. event_time_microseconds
 has two dates connected with "\n"

---
 .../test_postgresql_replica_database_engine_2/test.py  | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py
index 75edb22aab1..7fdd17625a9 100644
--- a/tests/integration/test_postgresql_replica_database_engine_2/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py
@@ -1127,9 +1127,13 @@ def test_dependent_loading(started_cluster):
     nested_time = instance.query(
         f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{uuid}_nested' and message not like '%like%'"
     ).strip()
-    time = instance.query(
-        f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{table}' and message not like '%like%'"
-    ).strip()
+    time = (
+        instance.query(
+            f"SELECT event_time_microseconds FROM system.text_log WHERE message like 'Loading table default.{table}' and message not like '%like%'"
+        )
+        .strip()
+        .split("\n")[-1]
+    )
     instance.query(
         f"SELECT toDateTime64('{nested_time}', 6) < toDateTime64('{time}', 6)"
     )

From c61eef4a7659e4856cc3266d8d7dd28b4e095d2b Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 9 Aug 2024 21:17:49 +0000
Subject: [PATCH 178/265] Reimplement Dynamic type

---
 docs/en/sql-reference/data-types/dynamic.md   |  123 +-
 src/Columns/ColumnDynamic.cpp                 |  881 ++++--
 src/Columns/ColumnDynamic.h                   |  164 +-
 src/Columns/ColumnVariant.cpp                 |   29 +-
 src/Columns/ColumnVariant.h                   |   11 +-
 src/Columns/tests/gtest_column_dynamic.cpp    |  428 ++-
 src/DataTypes/DataTypeDynamic.cpp             |   79 +-
 src/DataTypes/DataTypeFactory.cpp             |    6 +
 src/DataTypes/DataTypeFactory.h               |    1 +
 src/DataTypes/DataTypeTuple.cpp               |   13 +-
 src/DataTypes/DataTypesBinaryEncoding.cpp     |    2 +-
 .../Serializations/SerializationDynamic.cpp   |  361 ++-
 .../Serializations/SerializationDynamic.h     |    8 +-
 .../SerializationDynamicElement.cpp           |  127 +-
 .../SerializationDynamicElement.h             |    8 +-
 .../SerializationVariantElement.cpp           |   41 +-
 .../SerializationVariantElement.h             |    6 +-
 src/Formats/JSONExtractTree.cpp               |   31 +-
 src/Functions/FunctionsConversion.cpp         |  326 +--
 src/Functions/dynamicType.cpp                 |   91 +-
 tests/queries/0_stateless/00000_test.sql      |   43 +
 ...03033_dynamic_text_serialization.reference |   26 +-
 .../03033_dynamic_text_serialization.sql      |    4 +-
 .../03034_dynamic_conversions.reference       |   18 +-
 .../0_stateless/03034_dynamic_conversions.sql |    6 +-
 .../03035_dynamic_sorting.reference           |  715 +++--
 .../0_stateless/03035_dynamic_sorting.sql     |   67 +-
 ...ed_subcolumns_compact_merge_tree.reference |   20 +
 ...d_shared_subcolumns_compact_merge_tree.sql |   43 +
 ...ic_read_shared_subcolumns_memory.reference |   20 +
 ..._dynamic_read_shared_subcolumns_memory.sql |   43 +
 ..._read_shared_subcolumns_small.reference.j2 | 2460 +++++++++++++++++
 ...ynamic_read_shared_subcolumns_small.sql.j2 |   46 +
 ...hared_subcolumns_wide_merge_tree.reference |   20 +
 ...read_shared_subcolumns_wide_merge_tree.sql |   43 +
 ..._1_horizontal_compact_merge_tree.reference |   94 +-
 ...merges_1_horizontal_compact_merge_tree.sql |   29 +-
 ...s_1_horizontal_compact_wide_tree.reference |   94 +-
 ..._merges_1_horizontal_compact_wide_tree.sql |   28 +-
 ...es_1_vertical_compact_merge_tree.reference |   94 +-
 ...c_merges_1_vertical_compact_merge_tree.sql |   31 +-
 ...erges_1_vertical_wide_merge_tree.reference |   94 +-
 ...amic_merges_1_vertical_wide_merge_tree.sql |   28 +-
 .../03037_dynamic_merges_small.reference.j2   |  376 ++-
 .../03037_dynamic_merges_small.sql.j2         |   28 +-
 ...ynamic_merges_compact_horizontal.reference |   84 +-
 ...sted_dynamic_merges_compact_horizontal.sql |   33 +-
 ..._dynamic_merges_compact_vertical.reference |   84 +-
 ...nested_dynamic_merges_compact_vertical.sql |   33 +-
 ...8_nested_dynamic_merges_small.reference.j2 |  340 ++-
 .../03038_nested_dynamic_merges_small.sql.j2  |   36 +-
 ...d_dynamic_merges_wide_horizontal.reference |   84 +-
 ..._nested_dynamic_merges_wide_horizontal.sql |   33 +-
 ...ted_dynamic_merges_wide_vertical.reference |   84 +-
 ...38_nested_dynamic_merges_wide_vertical.sql |   33 +-
 ...type_alters_1_compact_merge_tree.reference |  123 +-
 ...namic_type_alters_1_compact_merge_tree.sql |   20 +-
 ...040_dynamic_type_alters_1_memory.reference |   94 +-
 ...ic_type_alters_1_wide_merge_tree.reference |  157 +-
 .../03041_dynamic_type_check_table.sh         |    2 +-
 .../03150_dynamic_type_mv_insert.reference    |   50 +-
 .../03150_dynamic_type_mv_insert.sql          |    7 +
 ...151_dynamic_type_scale_max_types.reference |   48 +-
 .../03151_dynamic_type_scale_max_types.sql    |    8 +-
 .../03152_dynamic_type_simple.reference       |    2 +-
 .../0_stateless/03152_dynamic_type_simple.sql |    8 +
 .../0_stateless/03153_dynamic_type_empty.sql  |    2 +
 .../03159_dynamic_type_all_types.sql          |    4 +-
 .../03172_dynamic_binary_serialization.sh     |    4 +-
 ...3200_memory_engine_alter_dynamic.reference |   20 +-
 .../03200_memory_engine_alter_dynamic.sql     |    4 +-
 71 files changed, 6725 insertions(+), 1878 deletions(-)
 create mode 100644 tests/queries/0_stateless/00000_test.sql
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.reference
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.sql
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.reference
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.sql
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.reference
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql

diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md
index 8be81471377..f9befd166fe 100644
--- a/docs/en/sql-reference/data-types/dynamic.md
+++ b/docs/en/sql-reference/data-types/dynamic.md
@@ -14,7 +14,7 @@ To declare a column of `Dynamic` type, use the following syntax:
 <column_name> Dynamic(max_types=N)
 ```
 
-Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`.
+Where `N` is an optional parameter between `0` and `254` indicating how many different data types can be stored as separate subcolumns inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all values with new types will be stored together in a special shared data structure in binary form. Default value of `max_types` is `32`.
 
 :::note
 The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`.
@@ -224,41 +224,43 @@ SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test;
 └───────┴────────────────┘
 ```
 
-If `K < N`, then the values with the rarest types are converted to `String`:
+If `K < N`, then the values with the rarest types will be inserted into a single special subcolumn, but still will be accessible:
 ```text
 CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
 INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
-SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test;
+SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
 ```
 
 ```text
-┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
-│ ᴺᵁᴸᴸ    │ None           │ ᴺᵁᴸᴸ    │ None            │
-│ 42      │ Int64          │ 42      │ Int64           │
-│ 43      │ Int64          │ 43      │ Int64           │
-│ 42.42   │ String         │ 42.42   │ String          │
-│ true    │ Bool           │ true    │ String          │
-│ [1,2,3] │ Array(Int64)   │ [1,2,3] │ String          │
-└─────────┴────────────────┴─────────┴─────────────────┘
+┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
+│ ᴺᵁᴸᴸ    │ None           │ ᴺᵁᴸᴸ    │ None            │ false                            │
+│ 42      │ Int64          │ 42      │ Int64           │ false                            │
+│ 43      │ Int64          │ 43      │ Int64           │ false                            │
+│ 42.42   │ String         │ 42.42   │ String          │ false                            │
+│ true    │ Bool           │ true    │ Bool            │ true                             │
+│ [1,2,3] │ Array(Int64)   │ [1,2,3] │ Array(Int64)    │ true                             │
+└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
 ```
 
-If `K=1`, all types are converted to `String`:
+Functions `isDynamicElementInSharedData` returns `true` for rows that are stored in a special shared data structure inside `Dynamic` and as we can see, resulting column contains only 2 types that are not stored in shared data structure.
+
+If `K=0`, all types will be inserted into single special subcolumn:
 
 ```text
 CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
 INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
-SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test;
+SELECT d, dynamicType(d), d::Dynamic(max_types=0) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
 ```
 
 ```text
-┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
-│ ᴺᵁᴸᴸ    │ None           │ ᴺᵁᴸᴸ    │ None            │
-│ 42      │ Int64          │ 42      │ String          │
-│ 43      │ Int64          │ 43      │ String          │
-│ 42.42   │ String         │ 42.42   │ String          │
-│ true    │ Bool           │ true    │ String          │
-│ [1,2,3] │ Array(Int64)   │ [1,2,3] │ String          │
-└─────────┴────────────────┴─────────┴─────────────────┘
+┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
+│ ᴺᵁᴸᴸ    │ None           │ ᴺᵁᴸᴸ    │ None            │ false                            │
+│ 42      │ Int64          │ 42      │ Int64           │ true                             │
+│ 43      │ Int64          │ 43      │ Int64           │ true                             │
+│ 42.42   │ String         │ 42.42   │ String          │ true                             │
+│ true    │ Bool           │ true    │ Bool            │ true                             │
+│ [1,2,3] │ Array(Int64)   │ [1,2,3] │ Array(Int64)    │ true                             │
+└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
 ```
 
 ## Reading Dynamic type from the data
@@ -411,17 +413,17 @@ SELECT d, dynamicType(d) FROM test ORDER by d;
 
 ## Reaching the limit in number of different data types stored inside Dynamic
 
-`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic).
-When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values.
+`Dynamic` data type can store only limited number of different data types as separate subcolumns. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 0 and 254 (due to implementation details, it's impossible to have more than 254 different data types that can be stored as separate subcolumns inside Dynamic).
+When the limit is reached, all new data types inserted to `Dynamic` column will be inserted into a single shared data structure that stores values with different data types in binary form.
 
 Let's see what happens when the limit is reached in different scenarios.
 
 ### Reaching the limit during data parsing
 
-During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values:
+During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted into shared data structure:
 
 ```sql
-SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
+SELECT d, dynamicType(d), isDynamicElementInSharedData(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
 {"d" : 42}
 {"d" : [1, 2, 3]}
 {"d" : "Hello, World!"}
@@ -432,22 +434,22 @@ SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
 ```
 
 ```text
-┌─d──────────────────────────┬─dynamicType(d)─┐
-│ 42                         │ Int64          │
-│ [1,2,3]                    │ Array(Int64)   │
-│ Hello, World!              │ String         │
-│ 2020-01-01                 │ String         │
-│ ["str1", "str2", "str3"]   │ String         │
-│ {"a" : 1, "b" : [1, 2, 3]} │ String         │
-└────────────────────────────┴────────────────┘
+┌─d──────────────────────┬─dynamicType(d)─────────────────┬─isDynamicElementInSharedData(d)─┐
+│ 42                     │ Int64                          │ false                           │
+│ [1,2,3]                │ Array(Int64)                   │ false                           │
+│ Hello, World!          │ String                         │ false                           │
+│ 2020-01-01             │ Date                           │ true                            │
+│ ['str1','str2','str3'] │ Array(String)                  │ true                            │
+│ (1,[1,2,3])            │ Tuple(a Int64, b Array(Int64)) │ true                            │
+└────────────────────────┴────────────────────────────────┴─────────────────────────────────┘
 ```
 
-As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`.
+As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were inserted into special shared data structure.
 
 ### During merges of data parts in MergeTree table engines
 
-During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts.
-In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`.  In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation.
+During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types that can be stored in separate subcolumns inside and won't be able to store all types as subcolumns from source parts.
+In this case ClickHouse chooses what types will remain as separate subcolumns after merge and what types will be inserted into shared data structure. In most cases ClickHouse tries to keep the most frequent types and store the rarest types in shared data structure, but it depends on the implementation.
 
 Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types:
 
@@ -463,17 +465,17 @@ INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1);
 
 Each insert will create a separate data pert with `Dynamic` column containing single type:
 ```sql
-SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
+SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count();
 ```
 
 ```text
-┌─count()─┬─dynamicType(d)──────┬─_part─────┐
-│       5 │ UInt64              │ all_1_1_0 │
-│       4 │ Array(UInt64)       │ all_2_2_0 │
-│       3 │ Date                │ all_3_3_0 │
-│       2 │ Map(UInt64, UInt64) │ all_4_4_0 │
-│       1 │ String              │ all_5_5_0 │
-└─────────┴─────────────────────┴───────────┘
+┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
+│       5 │ UInt64              │ false                           │ all_1_1_0 │
+│       4 │ Array(UInt64)       │ false                           │ all_2_2_0 │
+│       3 │ Date                │ false                           │ all_3_3_0 │
+│       2 │ Map(UInt64, UInt64) │ false                           │ all_4_4_0 │
+│       1 │ String              │ false                           │ all_5_5_0 │
+└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
 ```
 
 Now, let's merge all parts into one and see what will happen:
@@ -481,18 +483,20 @@ Now, let's merge all parts into one and see what will happen:
 ```sql
 SYSTEM START MERGES test;
 OPTIMIZE TABLE test FINAL;
-SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
+SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count() desc;
 ```
 
 ```text
-┌─count()─┬─dynamicType(d)─┬─_part─────┐
-│       5 │ UInt64         │ all_1_5_2 │
-│       6 │ String         │ all_1_5_2 │
-│       4 │ Array(UInt64)  │ all_1_5_2 │
-└─────────┴────────────────┴───────────┘
+┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
+│       5 │ UInt64              │ false                           │ all_1_5_2 │
+│       4 │ Array(UInt64)       │ false                           │ all_1_5_2 │
+│       3 │ Date                │ false                           │ all_1_5_2 │
+│       2 │ Map(UInt64, UInt64) │ true                            │ all_1_5_2 │
+│       1 │ String              │ true                            │ all_1_5_2 │
+└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
 ```
 
-As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`.
+As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` as subcolumns and inserted all other types into shared data.
 
 ## JSONExtract functions with Dynamic
 
@@ -509,22 +513,23 @@ SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(
 ```
 
 ```sql
-SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types```
+SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Dynamic)') AS map_of_dynamics, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamics) AS map_of_dynamic_types
+```
 
 ```text
-┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐
-│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
-└──────────────────────────────────┴─────────────────────────────────────────────────┘
+┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────────────┐
+│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'Int64','b':'String','c':'Array(Nullable(Int64))'} │
+└──────────────────────────────────┴─────────────────────────────────────────────────────────┘
 ```
 
 ```sql
-SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types```
+SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Dynamic') AS dynamics, arrayMap(x -> (x.1, dynamicType(x.2)), dynamics) AS dynamic_types```
 ```
 
 ```text
-┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐
-│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
-└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
+┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────────────┐
+│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','Int64'),('b','String'),('c','Array(Nullable(Int64))')] │
+└────────────────────────────────────────┴───────────────────────────────────────────────────────────────┘
 ```
 
 ### Binary output format
diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index a92d54dd675..454f7956f48 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -1,16 +1,21 @@
 #include <Columns/ColumnDynamic.h>
 
 #include <Columns/ColumnCompressed.h>
+#include <Columns/ColumnString.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/Serializations/SerializationString.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/FieldToDataType.h>
 #include <DataTypes/DataTypesBinaryEncoding.h>
 #include <Common/Arena.h>
 #include <Common/SipHash.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
-#include <Interpreters/castColumn.h>
+#include <IO/WriteBufferFromVector.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <IO/WriteBufferFromString.h>
+#include <Formats/FormatSettings.h>
 #include <Common/logger_useful.h>
 
 namespace DB
@@ -22,31 +27,77 @@ namespace ErrorCodes
     extern const int PARAMETER_OUT_OF_BOUND;
 }
 
-
-ColumnDynamic::ColumnDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_)
+namespace
 {
-    /// Create empty Variant.
-    variant_info.variant_type = std::make_shared<DataTypeVariant>(DataTypes{});
-    variant_info.variant_name = variant_info.variant_type->getName();
-    variant_column = variant_info.variant_type->createColumn();
+
+/// Static default format settings to avoid creating it every time.
+const FormatSettings & getFormatSettings()
+{
+    static const FormatSettings settings;
+    return settings;
+}
+
+}
+
+/// Shared variant will contain String values but we cannot use usual String type
+/// because we can have regular variant with type String.
+/// To solve it, we use String type with custom name for shared variant.
+DataTypePtr ColumnDynamic::getSharedVariantDataType()
+{
+    return DataTypeFactory::instance().getCustom("String", std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>(getSharedVariantTypeName())));
+}
+
+ColumnDynamic::ColumnDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_), global_max_dynamic_types(max_dynamic_types)
+{
+    /// Create Variant with shared variant.
+    setVariantType(std::make_shared<DataTypeVariant>(DataTypes{getSharedVariantDataType()}));
 }
 
 ColumnDynamic::ColumnDynamic(
-    MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_)
+    MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_)
+    : variant_column(std::move(variant_column_))
+    , max_dynamic_types(max_dynamic_types_)
+    , global_max_dynamic_types(global_max_dynamic_types_)
+    , statistics(statistics_)
+{
+    createVariantInfo(variant_type_);
+}
+
+ColumnDynamic::ColumnDynamic(
+    MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_)
     : variant_column(std::move(variant_column_))
     , variant_info(variant_info_)
     , max_dynamic_types(max_dynamic_types_)
+    , global_max_dynamic_types(global_max_dynamic_types_)
     , statistics(statistics_)
 {
 }
 
-ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_)
+void ColumnDynamic::setVariantType(const DataTypePtr & variant_type)
+{
+    if (variant_column && !empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Setting specific variant type is allowed only for empty dynamic column");
+
+    variant_column = variant_type->createColumn();
+    createVariantInfo(variant_type);
+}
+
+void ColumnDynamic::setMaxDynamicPaths(size_t max_dynamic_type_)
+{
+    if (variant_column && !empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Setting specific max_dynamic_type parameter is allowed only for empty dynamic column");
+
+    max_dynamic_types = max_dynamic_type_;
+}
+
+void ColumnDynamic::createVariantInfo(const DataTypePtr & variant_type)
 {
-    VariantInfo variant_info;
     variant_info.variant_type = variant_type;
     variant_info.variant_name = variant_type->getName();
     const auto & variants = assert_cast<const DataTypeVariant &>(*variant_type).getVariants();
+    variant_info.variant_names.clear();
     variant_info.variant_names.reserve(variants.size());
+    variant_info.variant_name_to_discriminator.clear();
     variant_info.variant_name_to_discriminator.reserve(variants.size());
     for (ColumnVariant::Discriminator discr = 0; discr != variants.size(); ++discr)
     {
@@ -54,30 +105,26 @@ ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column,
         variant_info.variant_name_to_discriminator[variant_name] = discr;
     }
 
-    return create(std::move(variant_column), variant_info, max_dynamic_types_, statistics_);
+    if (!variant_info.variant_name_to_discriminator.contains(getSharedVariantTypeName()))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Variant in Dynamic column doesn't contain shared variant");
 }
 
-bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant)
+bool ColumnDynamic::addNewVariant(const DataTypePtr & new_variant, const String & new_variant_name)
 {
     /// Check if we already have such variant.
-    if (variant_info.variant_name_to_discriminator.contains(new_variant->getName()))
+    if (variant_info.variant_name_to_discriminator.contains(new_variant_name))
         return true;
 
-    /// Check if we reached maximum number of variants.
-    if (variant_info.variant_names.size() >= max_dynamic_types)
+    /// Check if we reached maximum number of variants (don't count shared variant).
+    if (variant_info.variant_names.size() - 1 == max_dynamic_types)
     {
-        /// ColumnDynamic can have max_dynamic_types number of variants only when it has String as a variant.
-        /// Otherwise we won't be able to cast new variants to Strings.
-        if (!variant_info.variant_name_to_discriminator.contains("String"))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no String variant exists");
+        /// Dynamic column should always have shared variant.
+        if (!variant_info.variant_name_to_discriminator.contains(getSharedVariantTypeName()))
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no shared variant exists");
 
         return false;
     }
 
-    /// If we have (max_dynamic_types - 1) number of variants and don't have String variant, we can add only String variant.
-    if (variant_info.variant_names.size() == max_dynamic_types - 1 && new_variant->getName() != "String" && !variant_info.variant_name_to_discriminator.contains("String"))
-        return false;
-
     const DataTypes & current_variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
     DataTypes all_variants = current_variants;
     all_variants.push_back(new_variant);
@@ -86,21 +133,15 @@ bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant)
     return true;
 }
 
-void ColumnDynamic::addStringVariant()
+void extendVariantColumn(
+    IColumn & variant_column,
+    const DataTypePtr & old_variant_type,
+    const DataTypePtr & new_variant_type,
+    std::unordered_map<String, UInt8> old_variant_name_to_discriminator)
 {
-    if (!addNewVariant(std::make_shared<DataTypeString>()))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add String variant to Dynamic column, it's a bug");
-}
-
-void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePtr & new_variant_type)
-{
-    const DataTypes & current_variants =  assert_cast<const DataTypeVariant *>(variant_info.variant_type.get())->getVariants();
+    const DataTypes & current_variants =  assert_cast<const DataTypeVariant *>(old_variant_type.get())->getVariants();
     const DataTypes & new_variants = assert_cast<const DataTypeVariant *>(new_variant_type.get())->getVariants();
 
-    Names new_variant_names;
-    new_variant_names.reserve(new_variants.size());
-    std::unordered_map<String, ColumnVariant::Discriminator> new_variant_name_to_discriminator;
-    new_variant_name_to_discriminator.reserve(new_variants.size());
     std::vector<std::pair<MutableColumnPtr, ColumnVariant::Discriminator>> new_variant_columns_and_discriminators_to_add;
     new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size());
     std::vector<ColumnVariant::Discriminator> current_to_new_discriminators;
@@ -108,26 +149,26 @@ void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePt
 
     for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr)
     {
-        const auto & name = new_variant_names.emplace_back(new_variants[discr]->getName());
-        new_variant_name_to_discriminator[name] = discr;
-
-        auto current_it = variant_info.variant_name_to_discriminator.find(name);
-        if (current_it == variant_info.variant_name_to_discriminator.end())
+        auto current_it = old_variant_name_to_discriminator.find(new_variants[discr]->getName());
+        if (current_it == old_variant_name_to_discriminator.end())
             new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr);
         else
             current_to_new_discriminators[current_it->second] = discr;
     }
 
-    variant_info.variant_type = new_variant_type;
-    variant_info.variant_name = new_variant_type->getName();
-    variant_info.variant_names = new_variant_names;
-    variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator;
-    assert_cast<ColumnVariant &>(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add));
+    assert_cast<ColumnVariant &>(variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add));
+}
+
+void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type)
+{
+    extendVariantColumn(*variant_column, variant_info.variant_type, new_variant_type, variant_info.variant_name_to_discriminator);
+    createVariantInfo(new_variant_type);
+
     /// Clear mappings cache because now with new Variant we will have new mappings.
     variant_mappings_cache.clear();
 }
 
-std::vector<ColumnVariant::Discriminator> * ColumnDynamic::combineVariants(const DB::ColumnDynamic::VariantInfo & other_variant_info)
+std::vector<ColumnVariant::Discriminator> * ColumnDynamic::combineVariants(const ColumnDynamic::VariantInfo & other_variant_info)
 {
     /// Check if we already have global discriminators mapping for other Variant in cache.
     /// It's used to not calculate the same mapping each call of insertFrom with the same columns.
@@ -153,21 +194,14 @@ std::vector<ColumnVariant::Discriminator> * ColumnDynamic::combineVariants(const
     {
         const DataTypes & current_variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
 
-        /// We cannot combine Variants if total number of variants exceeds max_dynamic_types.
-        if (current_variants.size() + num_new_variants > max_dynamic_types)
+        /// We cannot combine Variants if total number of variants exceeds max_dynamic_types (don't count shared variant).
+        if (current_variants.size() + num_new_variants - 1 > max_dynamic_types)
         {
             /// Remember that we cannot combine our variant with this one, so we will not try to do it again.
             variants_with_failed_combination.insert(other_variant_info.variant_name);
             return nullptr;
         }
 
-        /// We cannot combine Variants if total number of variants reaches max_dynamic_types and we don't have String variant.
-        if (current_variants.size() + num_new_variants == max_dynamic_types && !variant_info.variant_name_to_discriminator.contains("String") && !other_variant_info.variant_name_to_discriminator.contains("String"))
-        {
-            variants_with_failed_combination.insert(other_variant_info.variant_name);
-            return nullptr;
-        }
-
         DataTypes all_variants = current_variants;
         all_variants.insert(all_variants.end(), other_variants.begin(), other_variants.end());
         auto new_variant_type = std::make_shared<DataTypeVariant>(all_variants);
@@ -185,40 +219,93 @@ std::vector<ColumnVariant::Discriminator> * ColumnDynamic::combineVariants(const
     return &it->second;
 }
 
-void ColumnDynamic::insert(const DB::Field & x)
+void ColumnDynamic::insert(const Field & x)
 {
-    /// Check if we can insert field without Variant extension.
-    if (variant_column->tryInsert(x))
+    if (x.isNull())
+    {
+        insertDefault();
         return;
+    }
+
+    auto & variant_col = getVariantColumn();
+    auto shared_variant_discr = getSharedVariantDiscriminator();
+    /// Check if we can insert field into existing variants and avoid Variant extension.
+    for (size_t i = 0; i != variant_col.getNumVariants(); ++i)
+    {
+        if (i != shared_variant_discr && variant_col.getVariantByGlobalDiscriminator(i).tryInsert(x))
+        {
+            variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(i));
+            variant_col.getOffsets().push_back(variant_col.getVariantByGlobalDiscriminator(i).size() - 1);
+            return;
+        }
+    }
 
     /// If we cannot insert field into current variant column, extend it with new variant for this field from its type.
-    if (addNewVariant(applyVisitor(FieldToDataType(), x)))
+    auto field_data_type = applyVisitor(FieldToDataType(), x);
+    auto field_data_type_name = field_data_type->getName();
+    if (addNewVariant(field_data_type, field_data_type_name))
     {
-        /// Now we should be able to insert this field into extended variant column.
-        variant_column->insert(x);
+        /// Insert this field into newly added variant.
+        auto discr = variant_info.variant_name_to_discriminator[field_data_type_name];
+        variant_col.getVariantByGlobalDiscriminator(discr).insert(x);
+        variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(discr));
+        variant_col.getOffsets().push_back(variant_col.getVariantByGlobalDiscriminator(discr).size() - 1);
     }
     else
     {
         /// We reached maximum number of variants and couldn't add new variant.
-        /// This case should be really rare in real use cases.
-        /// We should always be able to add String variant and cast inserted value to String.
-        addStringVariant();
-        variant_column->insert(toString(x));
+        /// In this case we add the value of this new variant into special shared variant.
+        /// We store values in shared variant in binary form with binary encoded type.
+        auto & shared_variant = getSharedVariant();
+        auto & chars = shared_variant.getChars();
+        WriteBufferFromVector<ColumnString::Chars> value_buf(chars, AppendModeTag());
+        encodeDataType(field_data_type, value_buf);
+        getVariantSerialization(field_data_type, field_data_type_name)->serializeBinary(x, value_buf, getFormatSettings());
+        value_buf.finalize();
+        chars.push_back(0);
+        shared_variant.getOffsets().push_back(chars.size());
+        variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(shared_variant_discr));
+        variant_col.getOffsets().push_back(shared_variant.size() - 1);
     }
 }
 
-bool ColumnDynamic::tryInsert(const DB::Field & x)
+bool ColumnDynamic::tryInsert(const Field & x)
 {
     /// We can insert any value into Dynamic column.
     insert(x);
     return true;
 }
 
+Field ColumnDynamic::operator[](size_t n) const
+{
+    Field res;
+    get(n, res);
+    return res;
+}
+
+void ColumnDynamic::get(size_t n, Field & res) const
+{
+    const auto & variant_col = getVariantColumn();
+    /// Check if value is not in shared variant.
+    if (variant_col.globalDiscriminatorAt(n) != getSharedVariantDiscriminator())
+    {
+        variant_col.get(n, res);
+        return;
+    }
+
+    /// We should deeserialize value from shared variant.
+    const auto & shared_variant = getSharedVariant();
+    auto value_data = shared_variant.getDataAt(variant_col.offsetAt(n));
+    ReadBufferFromMemory buf(value_data.data, value_data.size);
+    auto type = decodeDataType(buf);
+    getVariantSerialization(type)->deserializeBinary(res, buf, getFormatSettings());
+}
+
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
-void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
+void ColumnDynamic::insertFrom(const IColumn & src_, size_t n)
 #else
-void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
+void ColumnDynamic::doInsertFrom(const IColumn & src_, size_t n)
 #endif
 {
     const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
@@ -231,6 +318,28 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
     }
 
     auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
+    const auto & src_variant_col = assert_cast<const ColumnVariant &>(*dynamic_src.variant_column);
+    auto src_global_discr = src_variant_col.globalDiscriminatorAt(n);
+    auto src_offset = src_variant_col.offsetAt(n);
+
+    /// Check if we insert from shared variant and process it separately.
+    if (src_global_discr == dynamic_src.getSharedVariantDiscriminator())
+    {
+        auto & src_shared_variant = dynamic_src.getSharedVariant();
+        auto value = src_shared_variant.getDataAt(src_offset);
+        /// Decode data type of this value.
+        ReadBufferFromMemory buf(value.data, value.size);
+        auto type = decodeDataType(buf);
+        auto type_name = type->getName();
+        /// Check if we have this variant and deserialize value into variant from shared variant data.
+        if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end())
+            variant_col.deserializeBinaryIntoVariant(it->second, getVariantSerialization(type, type_name), buf, getFormatSettings());
+        /// Otherwise just insert it into our shared variant.
+        else
+            variant_col.insertIntoVariantFrom(getSharedVariantDiscriminator(), src_shared_variant, src_offset);
+
+        return;
+    }
 
     /// If variants are different, we need to extend our variant with new variants.
     if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
@@ -241,8 +350,6 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
 
     /// We cannot combine 2 Variant types as total number of variants exceeds the limit.
     /// We need to insert single value, try to add only corresponding variant.
-    const auto & src_variant_col = assert_cast<const ColumnVariant &>(*dynamic_src.variant_column);
-    auto src_global_discr = src_variant_col.globalDiscriminatorAt(n);
 
     /// NULL doesn't require Variant extension.
     if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR)
@@ -260,19 +367,18 @@ void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
     }
 
     /// We reached maximum number of variants and couldn't add new variant.
-    /// We should always be able to add String variant and cast inserted value to String.
-    addStringVariant();
-    auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty();
-    tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n));
-    auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
-    auto string_variant_discr = variant_info.variant_name_to_discriminator["String"];
-    variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0);
+    /// Insert this value into shared variant.
+    insertValueIntoSharedVariant(
+        src_variant_col.getVariantByGlobalDiscriminator(src_global_discr),
+        variant_type,
+        dynamic_src.variant_info.variant_names[src_global_discr],
+        src_offset);
 }
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
-void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
+void ColumnDynamic::insertRangeFrom(const IColumn & src_, size_t start, size_t length)
 #else
-void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
+void ColumnDynamic::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length)
 #endif
 {
     if (start + length > src_.size())
@@ -293,156 +399,206 @@ void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, si
     /// If variants are different, we need to extend our variant with new variants.
     if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
     {
-        variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping);
+        size_t prev_size = variant_col.size();
+        auto shared_variant_discr = getSharedVariantDiscriminator();
+        variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping, shared_variant_discr);
+
+        /// We should process insertion from srs shared variant separately, because it can contain
+        /// values that should be extracted into our variants. insertRangeFrom above didn't insert
+        /// values into our shared variant (we specified shared_variant_discr as special skip discriminator).
+
+        /// Check if srs shared variant is empty, nothing to do in this case.
+        if (dynamic_src.getSharedVariant().empty())
+            return;
+
+        /// Iterate over src discriminators and process insertion from src shared variant.
+        const auto & src_variant_column = dynamic_src.getVariantColumn();
+        const auto src_shared_variant_discr = dynamic_src.getSharedVariantDiscriminator();
+        const auto src_shared_variant_local_discr = src_variant_column.localDiscriminatorByGlobal(src_shared_variant_discr);
+        const auto & src_local_discriminators = src_variant_column.getLocalDiscriminators();
+        const auto & src_offsets = src_variant_column.getOffsets();
+        const auto & src_shared_variant = assert_cast<const ColumnString &>(src_variant_column.getVariantByLocalDiscriminator(src_shared_variant_local_discr));
+
+        auto & local_discriminators = variant_col.getLocalDiscriminators();
+        auto & offsets = variant_col.getOffsets();
+        const auto shared_variant_local_discr = variant_col.localDiscriminatorByGlobal(shared_variant_discr);
+        auto & shared_variant = assert_cast<ColumnString &>(variant_col.getVariantByLocalDiscriminator(shared_variant_local_discr));
+        for (size_t i = 0; i != length; ++i)
+        {
+            if (src_local_discriminators[start + i] == src_shared_variant_local_discr)
+            {
+                chassert(local_discriminators[prev_size + i] == shared_variant_local_discr);
+                auto value = src_shared_variant.getDataAt(src_offsets[start + i]);
+                ReadBufferFromMemory buf(value.data, value.size);
+                auto type = decodeDataType(buf);
+                auto type_name = type->getName();
+                /// Check if we have variant with this type. In this case we should extract
+                /// the value from src shared variant and insert it into this variant.
+                if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end())
+                {
+                    auto local_discr = variant_col.localDiscriminatorByGlobal(it->second);
+                    auto & variant = variant_col.getVariantByLocalDiscriminator(local_discr);
+                    getVariantSerialization(type, type_name)->deserializeBinary(variant, buf, getFormatSettings());
+                    /// Local discriminators were already filled in ColumnVariant::insertRangeFrom and this row should contain
+                    /// shared_variant_local_discr. Change it to local discriminator of the found variant and update offsets.
+                    local_discriminators[prev_size + i] = local_discr;
+                    offsets[prev_size + i] = variant.size() - 1;
+                }
+                /// Otherwise, insert this value into shared variant.
+                else
+                {
+                    shared_variant.insertData(value.data, value.size);
+                    /// Update variant offset.
+                    offsets[prev_size + i] = shared_variant.size() - 1;
+                }
+            }
+        }
+
         return;
     }
 
     /// We cannot combine 2 Variant types as total number of variants exceeds the limit.
-    /// In this case we will add most frequent variants from this range and insert them as usual,
-    /// all other variants will be converted to String.
-    /// TODO: instead of keeping all current variants and just adding new most frequent variants
-    ///       from source columns we can also try to replace rarest existing variants with frequent
-    ///       variants from source column (so we will avoid casting new frequent variants to String
-    ///       and keeping rare existing ones). It will require rewriting of existing data in Variant
-    ///       column but will improve usability of Dynamic column for example during squashing blocks
-    ///       during insert.
-
-    const auto & src_variant_column = dynamic_src.getVariantColumn();
-
-    /// Calculate ranges for each variant in current range.
-    std::vector<std::pair<size_t, size_t>> variants_ranges(dynamic_src.variant_info.variant_names.size(), {0, 0});
-    /// If we insert the whole column, no need to iterate through the range, we can just take variant sizes.
-    if (start == 0 && length == dynamic_src.size())
-    {
-        for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i)
-            variants_ranges[i] = {0, src_variant_column.getVariantByGlobalDiscriminator(i).size()};
-    }
-    /// Otherwise we need to iterate through discriminators and calculate the range for each variant.
-    else
-    {
-        const auto & local_discriminators = src_variant_column.getLocalDiscriminators();
-        const auto & offsets = src_variant_column.getOffsets();
-        size_t end = start + length;
-        for (size_t i = start; i != end; ++i)
-        {
-            auto discr = src_variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
-            if (discr != ColumnVariant::NULL_DISCRIMINATOR)
-            {
-                if (!variants_ranges[discr].second)
-                    variants_ranges[discr].first = offsets[i];
-                ++variants_ranges[discr].second;
-            }
-        }
-    }
-
+    /// In this case we will add most frequent variants and insert them as usual,
+    /// all other variants will be inserted into shared variant.
     const auto & src_variants = assert_cast<const DataTypeVariant &>(*dynamic_src.variant_info.variant_type).getVariants();
-    /// List of variants that will be converted to String.
-    std::vector<ColumnVariant::Discriminator> variants_to_convert_to_string;
     /// Mapping from global discriminators of src_variant to the new variant we will create.
     std::vector<ColumnVariant::Discriminator> other_to_new_discriminators;
     other_to_new_discriminators.reserve(dynamic_src.variant_info.variant_names.size());
 
-    /// Check if we cannot add any more new variants. In this case we will convert all new variants to String.
-    if (variant_info.variant_names.size() == max_dynamic_types || (variant_info.variant_names.size() == max_dynamic_types - 1 && !variant_info.variant_name_to_discriminator.contains("String")))
+    /// Check if we cannot add any more new variants. In this case we will insert all new variants into shared variant.
+    if (variant_info.variant_names.size() - 1 == max_dynamic_types)
     {
-        addStringVariant();
+        auto shared_variant_discr = getSharedVariantDiscriminator();
         for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i)
         {
             auto it = variant_info.variant_name_to_discriminator.find(dynamic_src.variant_info.variant_names[i]);
             if (it == variant_info.variant_name_to_discriminator.end())
-            {
-                variants_to_convert_to_string.push_back(i);
-                other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator["String"]);
-            }
+                other_to_new_discriminators.push_back(shared_variant_discr);
             else
-            {
                 other_to_new_discriminators.push_back(it->second);
-            }
         }
     }
-    /// We still can add some new variants, but not all of them. Let's choose the most frequent variants in specified range.
+    /// We still can add some new variants, but not all of them. Let's choose the most frequent variants.
     else
     {
+        /// Create list of pairs <size, discriminator> and sort it.
         std::vector<std::pair<size_t, ColumnVariant::Discriminator>> new_variants_with_sizes;
         new_variants_with_sizes.reserve(dynamic_src.variant_info.variant_names.size());
-        for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i)
+        const auto & src_variant_column = dynamic_src.getVariantColumn();
+        for (const auto & [name, discr] : dynamic_src.variant_info.variant_name_to_discriminator)
         {
-            const auto & variant_name = dynamic_src.variant_info.variant_names[i];
-            if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name))
-                new_variants_with_sizes.emplace_back(variants_ranges[i].second, i);
+            if (!variant_info.variant_name_to_discriminator.contains(name))
+                new_variants_with_sizes.emplace_back(src_variant_column.getVariantByGlobalDiscriminator(discr).size(), discr);
         }
 
         std::sort(new_variants_with_sizes.begin(), new_variants_with_sizes.end(), std::greater());
         DataTypes new_variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
-        if (!variant_info.variant_name_to_discriminator.contains("String"))
-            new_variants.push_back(std::make_shared<DataTypeString>());
-
+        /// Add new variants from sorted list until we reach max_dynamic_types.
         for (const auto & [_, discr] : new_variants_with_sizes)
         {
-            if (new_variants.size() != max_dynamic_types)
-                new_variants.push_back(src_variants[discr]);
-            else
-                variants_to_convert_to_string.push_back(discr);
+            if (new_variants.size() - 1 == max_dynamic_types)
+                break;
+            new_variants.push_back(src_variants[discr]);
         }
 
         auto new_variant_type = std::make_shared<DataTypeVariant>(new_variants);
         updateVariantInfoAndExpandVariantColumn(new_variant_type);
-        auto string_variant_discriminator = variant_info.variant_name_to_discriminator.at("String");
+        auto shared_variant_discr = getSharedVariantDiscriminator();
         for (const auto & variant_name : dynamic_src.variant_info.variant_names)
         {
             auto it = variant_info.variant_name_to_discriminator.find(variant_name);
             if (it == variant_info.variant_name_to_discriminator.end())
-                other_to_new_discriminators.push_back(string_variant_discriminator);
+                other_to_new_discriminators.push_back(shared_variant_discr);
             else
                 other_to_new_discriminators.push_back(it->second);
         }
     }
 
-    /// Convert to String all variants that couldn't be added.
-    std::unordered_map<ColumnVariant::Discriminator, ColumnPtr> variants_converted_to_string;
-    variants_converted_to_string.reserve(variants_to_convert_to_string.size());
-    for (auto discr : variants_to_convert_to_string)
-    {
-        auto [variant_start, variant_length] = variants_ranges[discr];
-        const auto & variant = src_variant_column.getVariantPtrByGlobalDiscriminator(discr);
-        if (variant_start == 0 && variant_length == variant->size())
-            variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant, src_variants[discr], ""), std::make_shared<DataTypeString>());
-        else
-            variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant->cut(variant_start, variant_length), src_variants[discr], ""), std::make_shared<DataTypeString>());
-    }
-
+    /// Iterate over the range and perform insertion.
+    const auto & src_variant_column = dynamic_src.getVariantColumn();
     const auto & src_local_discriminators = src_variant_column.getLocalDiscriminators();
     const auto & src_offsets = src_variant_column.getOffsets();
     const auto & src_variant_columns = src_variant_column.getVariants();
+    const auto src_shared_variant_discr = dynamic_src.getSharedVariantDiscriminator();
+    const auto src_shared_variant_local_discr = src_variant_column.localDiscriminatorByGlobal(src_shared_variant_discr);
+    const auto & src_shared_variant = assert_cast<const ColumnString &>(*src_variant_columns[src_shared_variant_local_discr]);
+    auto & local_discriminators = variant_col.getLocalDiscriminators();
+    local_discriminators.reserve(local_discriminators.size() + length);
+    auto & offsets = variant_col.getOffsets();
+    offsets.reserve(offsets.size() + length);
+    auto & variant_columns = variant_col.getVariants();
+    const auto shared_variant_discr = getSharedVariantDiscriminator();
+    const auto shared_variant_local_discr = variant_col.localDiscriminatorByGlobal(shared_variant_discr);
+    auto & shared_variant = assert_cast<ColumnString &>(*variant_columns[shared_variant_local_discr]);
     size_t end = start + length;
     for (size_t i = start; i != end; ++i)
     {
-        auto local_discr = src_local_discriminators[i];
-        if (local_discr == ColumnVariant::NULL_DISCRIMINATOR)
+        auto src_local_discr = src_local_discriminators[i];
+        auto src_offset = src_offsets[i];
+        if (src_local_discr == ColumnVariant::NULL_DISCRIMINATOR)
         {
-            variant_col.insertDefault();
+            local_discriminators.push_back(ColumnVariant::NULL_DISCRIMINATOR);
+            offsets.emplace_back();
         }
         else
         {
-            auto global_discr = src_variant_column.globalDiscriminatorByLocal(local_discr);
-            auto to_global_discr = other_to_new_discriminators[global_discr];
-            auto it = variants_converted_to_string.find(global_discr);
-            if (it == variants_converted_to_string.end())
+            /// Process insertion from src shared variant separately.
+            if (src_local_discr == src_shared_variant_local_discr)
             {
-                variant_col.insertIntoVariantFrom(to_global_discr, *src_variant_columns[local_discr], src_offsets[i]);
+                auto value = src_shared_variant.getDataAt(src_offset);
+                ReadBufferFromMemory buf(value.data, value.size);
+                auto type = decodeDataType(buf);
+                auto type_name = type->getName();
+                /// Check if we have variant with this type. In this case we should extract
+                /// the value from src shared variant and insert it into this variant.
+                if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end())
+                {
+                    auto local_discr = variant_col.localDiscriminatorByGlobal(it->second);
+                    getVariantSerialization(type, type_name)->deserializeBinary(*variant_columns[local_discr], buf, getFormatSettings());
+                    local_discriminators.push_back(local_discr);
+                    offsets.push_back(variant_columns[local_discr]->size() - 1);
+                }
+                /// Otherwise, insert this value into shared variant.
+                else
+                {
+                    shared_variant.insertData(value.data, value.size);
+                    local_discriminators.push_back(shared_variant_local_discr);
+                    offsets.push_back(shared_variant.size() - 1);
+                }
             }
+            /// Insertion from usual variant.
             else
             {
-                variant_col.insertIntoVariantFrom(to_global_discr, *it->second, src_offsets[i] - variants_ranges[global_discr].first);
+                auto src_global_discr = src_variant_column.globalDiscriminatorByLocal(src_local_discr);
+                auto global_discr = other_to_new_discriminators[src_global_discr];
+                /// Check if we need to insert this value into shared variant.
+                if (global_discr == shared_variant_discr)
+                {
+                    serializeValueIntoSharedVariant(
+                        shared_variant,
+                        *src_variant_columns[src_local_discr],
+                        src_variants[src_global_discr],
+                        getVariantSerialization(src_variants[src_global_discr], dynamic_src.variant_info.variant_names[src_global_discr]),
+                        src_offset);
+                    local_discriminators.push_back(shared_variant_local_discr);
+                    offsets.push_back(shared_variant.size() - 1);
+                }
+                else
+                {
+                    auto local_discr = variant_col.localDiscriminatorByGlobal(global_discr);
+                    variant_columns[local_discr]->insertFrom(*src_variant_columns[src_local_discr], src_offset);
+                    local_discriminators.push_back(local_discr);
+                    offsets.push_back(variant_columns[local_discr]->size() - 1);
+                }
             }
         }
     }
 }
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
-void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
+void ColumnDynamic::insertManyFrom(const IColumn & src_, size_t position, size_t length)
 #else
-void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
+void ColumnDynamic::doInsertManyFrom(const IColumn & src_, size_t position, size_t length)
 #endif
 {
     const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
@@ -455,6 +611,36 @@ void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position,
     }
 
     auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
+    const auto & src_variant_col = assert_cast<const ColumnVariant &>(*dynamic_src.variant_column);
+    auto src_global_discr = src_variant_col.globalDiscriminatorAt(position);
+    auto src_offset = src_variant_col.offsetAt(position);
+
+    /// Check if we insert from shared variant and process it separately.
+    if (src_global_discr == dynamic_src.getSharedVariantDiscriminator())
+    {
+        auto & src_shared_variant = dynamic_src.getSharedVariant();
+        auto value = src_shared_variant.getDataAt(src_offset);
+        /// Decode data type of this value.
+        ReadBufferFromMemory buf(value.data, value.size);
+        auto type = decodeDataType(buf);
+        auto type_name = type->getName();
+        /// Check if we have this variant and deserialize value into variant from shared variant data.
+        if (auto it = variant_info.variant_name_to_discriminator.find(type_name); it != variant_info.variant_name_to_discriminator.end())
+        {
+            /// Deserialize value into temporary column and use it in insertManyIntoVariantFrom.
+            auto tmp_column = type->createColumn();
+            tmp_column->reserve(1);
+            getVariantSerialization(type, type_name)->deserializeBinary(*tmp_column, buf, getFormatSettings());
+            variant_col.insertManyIntoVariantFrom(it->second, *tmp_column, 0, length);
+        }
+        /// Otherwise just insert it into our shared variant.
+        else
+        {
+            variant_col.insertManyIntoVariantFrom(getSharedVariantDiscriminator(), src_shared_variant, src_offset, length);
+        }
+
+        return;
+    }
 
     /// If variants are different, we need to extend our variant with new variants.
     if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
@@ -465,8 +651,6 @@ void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position,
 
     /// We cannot combine 2 Variant types as total number of variants exceeds the limit.
     /// We need to insert single value, try to add only corresponding variant.
-    const auto & src_variant_col = assert_cast<const ColumnVariant &>(*dynamic_src.variant_column);
-    auto src_global_discr = src_variant_col.globalDiscriminatorAt(position);
     if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR)
     {
         insertDefault();
@@ -481,21 +665,51 @@ void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position,
         return;
     }
 
-    addStringVariant();
-    auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty();
-    tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position));
-    auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
-    auto string_variant_discr = variant_info.variant_name_to_discriminator["String"];
-    variant_col.insertManyIntoVariantFrom(string_variant_discr, *tmp_string_column, 0, length);
+    /// We reached maximum number of variants and couldn't add new variant.
+    /// Insert this value into shared variant.
+    /// Create temporary string column, serialize value into it and use it in insertManyIntoVariantFrom.
+    auto tmp_shared_variant = ColumnString::create();
+    serializeValueIntoSharedVariant(
+        *tmp_shared_variant,
+        src_variant_col.getVariantByGlobalDiscriminator(src_global_discr),
+        variant_type,
+        getVariantSerialization(variant_type, dynamic_src.variant_info.variant_names[src_global_discr]),
+        src_offset);
+
+    variant_col.insertManyIntoVariantFrom(getSharedVariantDiscriminator(), *tmp_shared_variant, 0, length);
 }
 
+void ColumnDynamic::insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n)
+{
+    auto & variant_col = getVariantColumn();
+    auto & shared_variant = getSharedVariant();
+    serializeValueIntoSharedVariant(shared_variant, src, type, getVariantSerialization(type, type_name), n);
+    variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(getSharedVariantDiscriminator()));
+    variant_col.getOffsets().push_back(shared_variant.size() - 1);
+}
 
-StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, const char *& begin) const
+void ColumnDynamic::serializeValueIntoSharedVariant(
+    ColumnString & shared_variant,
+    const IColumn & src,
+    const DataTypePtr & type,
+    const SerializationPtr & serialization,
+    size_t n)
+{
+    auto & chars = shared_variant.getChars();
+    WriteBufferFromVector<ColumnString::Chars> value_buf(chars, AppendModeTag());
+    encodeDataType(type, value_buf);
+    serialization->serializeBinary(src, n, value_buf, getFormatSettings());
+    value_buf.finalize();
+    chars.push_back(0);
+    shared_variant.getOffsets().push_back(chars.size());
+}
+
+StringRef ColumnDynamic::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const
 {
     /// We cannot use Variant serialization here as it serializes discriminator + value,
     /// but Dynamic doesn't have fixed mapping discriminator <-> variant type
     /// as different Dynamic column can have different Variants.
-    /// Instead, we serialize null bit + variant type in binary format (size + bytes) + value.
+    /// Instead, we serialize null bit + variant type and value in binary format (size + data).
     const auto & variant_col = assert_cast<const ColumnVariant &>(*variant_column);
     auto discr = variant_col.globalDiscriminatorAt(n);
     StringRef res;
@@ -509,19 +723,29 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, co
         return res;
     }
 
-    const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(discr);
-    String variant_type_binary_data = encodeDataType(variant_type);
-    size_t variant_type_binary_data_size = variant_type_binary_data.size();
-    char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size(), begin);
-    memcpy(pos, &null_bit, sizeof(UInt8));
-    memcpy(pos + sizeof(UInt8), &variant_type_binary_data_size, sizeof(size_t));
-    memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_type_binary_data.data(), variant_type_binary_data.size());
-    res.data = pos;
-    res.size = sizeof(UInt8) + sizeof(size_t) + variant_type_binary_data.size();
+    WriteBufferFromOwnString buf;
+    StringRef type_and_value;
+    /// If we have value from shared variant, it's already stored in the desired format.
+    if (discr == getSharedVariantDiscriminator())
+    {
+        type_and_value = getSharedVariant().getDataAt(variant_col.offsetAt(n));
+    }
+    /// For regular variants serialize its type and value in binary format.
+    else
+    {
+        const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(discr);
+        encodeDataType(variant_type, buf);
+        getVariantSerialization(variant_type, variant_info.variant_names[discr])
+            ->serializeBinary(variant_col.getVariantByGlobalDiscriminator(discr), variant_col.offsetAt(n), buf, getFormatSettings());
+        type_and_value = buf.str();
+    }
 
-    auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin);
-    res.data = value_ref.data - res.size;
-    res.size += value_ref.size;
+    char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + type_and_value.size, begin);
+    memcpy(pos, &null_bit, sizeof(UInt8));
+    memcpy(pos + sizeof(UInt8), &type_and_value.size, sizeof(size_t));
+    memcpy(pos + sizeof(UInt8) + sizeof(size_t), type_and_value.data, type_and_value.size);
+    res.data = pos;
+    res.size = sizeof(UInt8) + sizeof(size_t) + type_and_value.size;
     return res;
 }
 
@@ -536,39 +760,36 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos)
         return pos;
     }
 
-    /// Read variant type in binary format.
-    const size_t variant_type_binary_data_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(variant_type_binary_data_size);
-    String variant_type_binary_data;
-    variant_type_binary_data.resize(variant_type_binary_data_size);
-    memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size);
-    pos += variant_type_binary_data_size;
-    auto variant_type = decodeDataType(variant_type_binary_data);
+    /// Read variant type and value in binary format.
+    const size_t type_and_value_size = unalignedLoad<size_t>(pos);
+    pos += sizeof(type_and_value_size);
+    std::string_view type_and_value(pos, type_and_value_size);
+    pos += type_and_value_size;
+
+    ReadBufferFromMemory buf(type_and_value.data(), type_and_value.size());
+    auto variant_type = decodeDataType(buf);
     auto variant_name = variant_type->getName();
     /// If we already have such variant, just deserialize it into corresponding variant column.
     auto it = variant_info.variant_name_to_discriminator.find(variant_name);
     if (it != variant_info.variant_name_to_discriminator.end())
     {
-        auto discr = it->second;
-        return variant_col.deserializeVariantAndInsertFromArena(discr, pos);
+        variant_col.deserializeBinaryIntoVariant(it->second, getVariantSerialization(variant_type, variant_name), buf, getFormatSettings());
     }
-
-    /// If we don't have such variant, add it.
-    if (likely(addNewVariant(variant_type)))
+    /// If we don't have such variant, try to add it.
+    else if (likely(addNewVariant(variant_type)))
     {
         auto discr = variant_info.variant_name_to_discriminator[variant_name];
-        return variant_col.deserializeVariantAndInsertFromArena(discr, pos);
+        variant_col.deserializeBinaryIntoVariant(discr, getVariantSerialization(variant_type, variant_name), buf, getFormatSettings());
+    }
+    /// Otherwise insert this value into shared variant.
+    else
+    {
+        auto & shared_variant = getSharedVariant();
+        shared_variant.insertData(type_and_value.data(), type_and_value.size());
+        variant_col.getLocalDiscriminators().push_back(variant_col.localDiscriminatorByGlobal(getSharedVariantDiscriminator()));
+        variant_col.getOffsets().push_back(shared_variant.size() - 1);
     }
 
-    /// We reached maximum number of variants and couldn't add new variant.
-    /// We should always be able to add String variant and cast inserted value to String.
-    addStringVariant();
-    /// Create temporary column of this variant type and deserialize value into it.
-    auto tmp_variant_column = variant_type->createColumn();
-    pos = tmp_variant_column->deserializeAndInsertFromArena(pos);
-    /// Cast temporary column to String and insert this value into String variant.
-    auto str_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
-    variant_col.insertIntoVariantFrom(variant_info.variant_name_to_discriminator["String"], *str_column, 0);
     return pos;
 }
 
@@ -579,14 +800,10 @@ const char * ColumnDynamic::skipSerializedInArena(const char * pos) const
     if (null_bit)
         return pos;
 
-    const size_t variant_type_binary_data_size = unalignedLoad<size_t>(pos);
-    pos += sizeof(variant_type_binary_data_size);
-    String variant_type_binary_data;
-    variant_type_binary_data.resize(variant_type_binary_data_size);
-    memcpy(variant_type_binary_data.data(), pos, variant_type_binary_data_size);
-    pos += variant_type_binary_data_size;
-    auto tmp_variant_column = decodeDataType(variant_type_binary_data)->createColumn();
-    return tmp_variant_column->skipSerializedInArena(pos);
+    const size_t type_and_value_size = unalignedLoad<size_t>(pos);
+    pos += sizeof(type_and_value_size);
+    pos += type_and_value_size;
+    return pos;
 }
 
 void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
@@ -604,9 +821,9 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
 }
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
-int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
+int ColumnDynamic::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
 #else
-int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
+int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
 #endif
 {
     const auto & left_variant = assert_cast<const ColumnVariant &>(*variant_column);
@@ -614,7 +831,9 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int
     const auto & right_variant = assert_cast<const ColumnVariant &>(*right_dynamic.variant_column);
 
     auto left_discr = left_variant.globalDiscriminatorAt(n);
+    auto left_shared_variant_discr = getSharedVariantDiscriminator();
     auto right_discr = right_variant.globalDiscriminatorAt(m);
+    auto right_shared_variant_discr = right_dynamic.getSharedVariantDiscriminator();
 
     /// Check if we have NULLs and return result based on nan_direction_hint.
     if (left_discr == ColumnVariant::NULL_DISCRIMINATOR && right_discr == ColumnVariant::NULL_DISCRIMINATOR)
@@ -624,12 +843,125 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int
     else if (right_discr == ColumnVariant::NULL_DISCRIMINATOR)
         return -nan_direction_hint;
 
-    /// If rows have different types, we compare type names.
-    if (variant_info.variant_names[left_discr] != right_dynamic.variant_info.variant_names[right_discr])
-        return variant_info.variant_names[left_discr] < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1;
+    /// Check if both values are in shared variant.
+    if (left_discr == left_shared_variant_discr && right_discr == right_shared_variant_discr)
+    {
+        /// Extract type names from both values.
+        auto left_value = getSharedVariant().getDataAt(left_variant.offsetAt(n));
+        ReadBufferFromMemory buf_left(left_value.data, left_value.size);
+        auto left_data_type = decodeDataType(buf_left);
+        auto left_data_type_name = left_data_type->getName();
 
-    /// If rows have the same types, compare actual values from corresponding variants.
-    return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
+        auto right_value = right_dynamic.getSharedVariant().getDataAt(right_variant.offsetAt(m));
+        ReadBufferFromMemory buf_right(right_value.data, right_value.size);
+        auto right_data_type = decodeDataType(buf_right);
+        auto right_data_type_name = right_data_type->getName();
+
+        /// If rows have different types, we compare type names.
+        if (left_data_type_name != right_data_type_name)
+            return left_data_type_name < right_data_type_name ? -1 : 1;
+
+        /// If rows have the same type, we compare actual values.
+        /// We have both values serialized in binary format, so we need to
+        /// create temporary column, insert both values into it and compare.
+        auto tmp_column = left_data_type->createColumn();
+        const auto & serialization = getVariantSerialization(left_data_type, left_data_type_name);
+        serialization->deserializeBinary(*tmp_column, buf_left, getFormatSettings());
+        serialization->deserializeBinary(*tmp_column, buf_right, getFormatSettings());
+        return tmp_column->compareAt(0, 1, *tmp_column, nan_direction_hint);
+    }
+    /// Check if only left value is in shared data.
+    else if (left_discr == left_shared_variant_discr)
+    {
+        /// Extract left type name from the value.
+        auto left_value = getSharedVariant().getDataAt(left_variant.offsetAt(n));
+        ReadBufferFromMemory buf_left(left_value.data, left_value.size);
+        auto left_data_type = decodeDataType(buf_left);
+        auto left_data_type_name = left_data_type->getName();
+
+        /// If rows have different types, we compare type names.
+        if (left_data_type_name != right_dynamic.variant_info.variant_names[right_discr])
+            return left_data_type_name < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1;
+
+        /// If rows have the same type, we compare actual values.
+        /// We have left value serialized in binary format, we need to
+        /// create temporary column, insert the value into it and compare.
+        auto tmp_column = left_data_type->createColumn();
+        getVariantSerialization(left_data_type, left_data_type_name)->deserializeBinary(*tmp_column, buf_left, getFormatSettings());
+        return tmp_column->compareAt(0, right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
+    }
+    /// Check if only right value is in shared data.
+    else if (right_discr == right_shared_variant_discr)
+    {
+        /// Extract right type name from the value.
+        auto right_value = right_dynamic.getSharedVariant().getDataAt(right_variant.offsetAt(m));
+        ReadBufferFromMemory buf_right(right_value.data, right_value.size);
+        auto right_data_type = decodeDataType(buf_right);
+        auto right_data_type_name = right_data_type->getName();
+
+        /// If rows have different types, we compare type names.
+        if (variant_info.variant_names[left_discr] != right_data_type_name)
+            return variant_info.variant_names[left_discr] < right_data_type_name ? -1 : 1;
+
+        /// If rows have the same type, we compare actual values.
+        /// We have right value serialized in binary format, we need to
+        /// create temporary column, insert the value into it and compare.
+        auto tmp_column = right_data_type->createColumn();
+        getVariantSerialization(right_data_type, right_data_type_name)->deserializeBinary(*tmp_column, buf_right, getFormatSettings());
+        return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), 0, *tmp_column, nan_direction_hint);
+    }
+    /// Otherwise both values are regular variants.
+    else
+    {
+        /// If rows have different types, we compare type names.
+        if (variant_info.variant_names[left_discr] != right_dynamic.variant_info.variant_names[right_discr])
+            return variant_info.variant_names[left_discr] < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1;
+
+        /// If rows have the same types, compare actual values from corresponding variants.
+        return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
+    }
+}
+
+struct ColumnDynamic::ComparatorBase
+{
+    const ColumnDynamic & parent;
+    int nan_direction_hint;
+
+    ComparatorBase(const ColumnDynamic & parent_, int nan_direction_hint_)
+        : parent(parent_), nan_direction_hint(nan_direction_hint_)
+    {
+    }
+
+    ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
+    {
+        return parent.compareAt(lhs, rhs, parent, nan_direction_hint);
+    }
+};
+
+void ColumnDynamic::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
+{
+    if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
+        getPermutationImpl(limit, res, ComparatorAscendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+    else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
+        getPermutationImpl(limit, res, ComparatorAscendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+    else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
+        getPermutationImpl(limit, res, ComparatorDescendingUnstable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+    else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
+        getPermutationImpl(limit, res, ComparatorDescendingStable(*this, nan_direction_hint), DefaultSort(), DefaultPartialSort());
+}
+
+void ColumnDynamic::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, DB::EqualRanges & equal_ranges) const
+{
+    auto comparator_equal = ComparatorEqual(*this, nan_direction_hint);
+
+    if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Unstable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
+    else if (direction == IColumn::PermutationSortDirection::Ascending && stability == IColumn::PermutationSortStability::Stable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorAscendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
+    else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Unstable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingUnstable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
+    else if (direction == IColumn::PermutationSortDirection::Descending && stability == IColumn::PermutationSortStability::Stable)
+        updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this, nan_direction_hint), comparator_equal, DefaultSort(), DefaultPartialSort());
 }
 
 ColumnPtr ColumnDynamic::compress() const
@@ -637,14 +969,16 @@ ColumnPtr ColumnDynamic::compress() const
     ColumnPtr variant_compressed = variant_column->compress();
     size_t byte_size = variant_compressed->byteSize();
     return ColumnCompressed::create(size(), byte_size,
-        [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_statistics = statistics]() mutable
+        [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_global_max_dynamic_types = global_max_dynamic_types, my_statistics = statistics]() mutable
         {
-            return ColumnDynamic::create(my_variant_compressed->decompress(), my_variant_info, my_max_dynamic_types, my_statistics);
+            return ColumnDynamic::create(my_variant_compressed->decompress(), my_variant_info, my_max_dynamic_types, my_global_max_dynamic_types, my_statistics);
         });
 }
 
 void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
+    LOG_DEBUG(getLogger("ColumnDynamic"), "takeDynamicStructureFromSourceColumns");
+
     if (!empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Dynamic column");
 
@@ -663,6 +997,9 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
     /// First, collect all variants from all source columns and calculate total sizes.
     std::unordered_map<String, size_t> total_sizes;
     DataTypes all_variants;
+    /// Add shared variant type in advance;
+    all_variants.push_back(getSharedVariantDataType());
+    total_sizes[getSharedVariantTypeName()] = 0;
 
     for (const auto & source_column : source_columns)
     {
@@ -671,7 +1008,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
         const auto & source_variant_info = source_dynamic.getVariantInfo();
         const auto & source_variants = assert_cast<const DataTypeVariant &>(*source_variant_info.variant_type).getVariants();
         /// During deserialization from MergeTree we will have variant sizes statistics from the whole data part.
-        const auto & source_statistics =  source_dynamic.getStatistics();
+        const auto & source_statistics = source_dynamic.getStatistics();
         for (size_t i = 0; i != source_variants.size(); ++i)
         {
             const auto & variant_name = source_variant_info.variant_names[i];
@@ -682,37 +1019,67 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
                 all_variants.push_back(source_variants[i]);
                 it = total_sizes.emplace(variant_name, 0).first;
             }
-            auto statistics_it = source_statistics.data.find(variant_name);
-            size_t size = statistics_it == source_statistics.data.end() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : statistics_it->second;
+            size_t size = source_variant_column.getVariantByGlobalDiscriminator(i).size();
+            if (source_statistics)
+            {
+                auto statistics_it = source_statistics->variants_statistics.find(variant_name);
+                if (statistics_it != source_statistics->variants_statistics.end())
+                    size = statistics_it->second;
+            }
+
             it->second += size;
         }
+
+        /// Use add variants from shared variant statistics. It can help extracting
+        /// frequent variants from shared variant to usual variants.
+        if (source_statistics)
+        {
+            for (const auto & [variant_name, size] : source_statistics->shared_variants_statistics)
+            {
+                auto it = total_sizes.find(variant_name);
+                /// Add this variant to the list of all variants if we didn't see it yet.
+                if (it == total_sizes.end())
+                {
+                    all_variants.push_back(DataTypeFactory::instance().get(variant_name));
+                    it = total_sizes.emplace(variant_name, 0).first;
+                }
+                it->second += size;
+            }
+        }
     }
 
     DataTypePtr result_variant_type;
-    /// Check if the number of all variants exceeds the limit.
-    if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_sizes.contains("String")))
+    Statistics new_statistics(Statistics::Source::MERGE);
+    /// Check if the number of all dynamic types exceeds the limit.
+    if (all_variants.size() - 1 > global_max_dynamic_types)
     {
         /// Create list of variants with their sizes and sort it.
         std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
         variants_with_sizes.reserve(all_variants.size());
         for (const auto & variant : all_variants)
-            variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
+        {
+            if (variant->getName() != getSharedVariantTypeName())
+                variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
+        }
         std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
 
-        /// Take first max_dynamic_types variants from sorted list.
+        /// Take first max_dynamic_types variants from sorted list and fill shared_variants_statistics with the rest.
         DataTypes result_variants;
-        result_variants.reserve(max_dynamic_types);
-        /// Add String variant in advance.
-        result_variants.push_back(std::make_shared<DataTypeString>());
-        for (const auto & [_, variant] : variants_with_sizes)
+        result_variants.reserve(global_max_dynamic_types + 1);
+        for (const auto & [size, variant] : variants_with_sizes)
         {
-            if (result_variants.size() == max_dynamic_types)
-                break;
-
-            if (variant->getName() != "String")
+            /// Add variant to the resulting variants list until we reach max_dynamic_types.
+            if (result_variants.size() < global_max_dynamic_types)
                 result_variants.push_back(variant);
+            /// Add all remaining variants into shared_variants_statistics until we reach its max size.
+            else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
+                new_statistics.shared_variants_statistics[variant->getName()] = size;
+            else
+                break;
         }
 
+        /// Add shared variant.
+        result_variants.push_back(getSharedVariantDataType());
         result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
     }
     else
@@ -720,26 +1087,16 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
         result_variant_type = std::make_shared<DataTypeVariant>(all_variants);
     }
 
-    /// Now we have resulting Variant and can fill variant info.
-    variant_info.variant_type = result_variant_type;
-    variant_info.variant_name = result_variant_type->getName();
-    const auto & result_variants = assert_cast<const DataTypeVariant &>(*result_variant_type).getVariants();
-    variant_info.variant_names.clear();
-    variant_info.variant_names.reserve(result_variants.size());
-    variant_info.variant_name_to_discriminator.clear();
-    variant_info.variant_name_to_discriminator.reserve(result_variants.size());
-    statistics.data.clear();
-    statistics.data.reserve(result_variants.size());
-    statistics.source = Statistics::Source::MERGE;
-    for (size_t i = 0; i != result_variants.size(); ++i)
-    {
-        auto variant_name = result_variants[i]->getName();
-        variant_info.variant_names.push_back(variant_name);
-        variant_info.variant_name_to_discriminator[variant_name] = i;
-        statistics.data[variant_name] = total_sizes[variant_name];
-    }
+    /// Now we have resulting Variant and can fill variant info and create merge statistics.
+    setVariantType(result_variant_type);
+    new_statistics.variants_statistics.reserve(variant_info.variant_names.size());
+    for (const auto & variant_name : variant_info.variant_names)
+        new_statistics.variants_statistics[variant_name] = total_sizes[variant_name];
+    statistics = std::make_shared<const Statistics>(std::move(new_statistics));
 
-    variant_column = variant_info.variant_type->createColumn();
+    /// Reduce max_dynamic_types to the number of selected variants (without shared variant), so there will be no possibility
+    /// to extend selected variants on inerts into this column during merges.
+    max_dynamic_types = variant_info.variant_names.size() - 1;
 
     /// Now we have the resulting Variant that will be used in all merged columns.
     /// Variants can also contain Dynamic columns inside, we should collect
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index e92cabd3db9..8b815e2b015 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -3,6 +3,7 @@
 #include <Columns/IColumn.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnVariant.h>
+#include <Columns/ColumnString.h>
 #include <DataTypes/IDataType.h>
 #include <Common/WeakHash.h>
 
@@ -19,11 +20,15 @@ namespace DB
  *
  * When new values are inserted into Dynamic column, the internal Variant
  * type and column are extended if the inserted value has new type.
+ * When the limit on number of dynamic types is exceeded, all values
+ * with new types are inserted into special shared variant with type String
+ * that contains values and their types in binary format.
  */
 class ColumnDynamic final : public COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>
 {
 public:
-    ///
+    static constexpr const char * SHARED_VARIANT_TYPE_NAME = "SharedVariant";
+
     struct Statistics
     {
         enum class Source
@@ -32,12 +37,27 @@ public:
             MERGE, /// Statistics were calculated during merge of several MergeTree parts.
         };
 
+        Statistics(Source source_) : source(source_) {}
+
         /// Source of the statistics.
         Source source;
-        /// Statistics data: (variant name) -> (total variant size in data part).
-        std::unordered_map<String, size_t> data;
+        /// Statistics data for usual variants: (variant name) -> (total variant size in data part).
+        std::unordered_map<String, size_t> variants_statistics;
+        /// Statistics data for variants from shared variant: (variant name) -> (total variant size in data part).
+        /// For shared variant we store statistics only for first 256 variants (should cover almost all cases and it's not expensive).
+        static constexpr const size_t MAX_SHARED_VARIANT_STATISTICS_SIZE = 256;
+        std::unordered_map<String, size_t> shared_variants_statistics;
     };
 
+    using StatisticsPtr = std::shared_ptr<const Statistics>;
+
+    struct ComparatorBase;
+    using ComparatorAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorBase>;
+    using ComparatorAscendingStable = ComparatorAscendingStableImpl<ComparatorBase>;
+    using ComparatorDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorBase>;
+    using ComparatorDescendingStable = ComparatorDescendingStableImpl<ComparatorBase>;
+    using ComparatorEqual = ComparatorEqualImpl<ComparatorBase>;
+
 private:
     friend class COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>;
 
@@ -54,28 +74,32 @@ private:
     };
 
     explicit ColumnDynamic(size_t max_dynamic_types_);
-    ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {});
+    ColumnDynamic(MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {});
+    ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {});
 
 public:
     /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
       * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
       */
     using Base = COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>;
-    static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {})
+    static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
     {
-        return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, statistics_);
+        return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, global_max_dynamic_types_, statistics_);
     }
 
-    static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {})
+    static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
     {
-        return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, statistics_);
+        return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, global_max_dynamic_types_, statistics_);
     }
 
-    static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {});
-
-    static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {})
+    static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type_, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
     {
-        return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, statistics_);
+        return Base::create(std::move(variant_column_), variant_type_, max_dynamic_types_, global_max_dynamic_types_, statistics_);
+    }
+
+    static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, size_t global_max_dynamic_types_, const StatisticsPtr & statistics_ = {})
+    {
+        return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, global_max_dynamic_types_, statistics_);
     }
 
     static MutablePtr create(size_t max_dynamic_types_)
@@ -83,7 +107,7 @@ public:
         return Base::create(max_dynamic_types_);
     }
 
-    std::string getName() const override { return "Dynamic(max_types=" + std::to_string(max_dynamic_types) + ")"; }
+    std::string getName() const override { return "Dynamic(max_types=" + std::to_string(global_max_dynamic_types) + ")"; }
 
     const char * getFamilyName() const override
     {
@@ -98,12 +122,12 @@ public:
     MutableColumnPtr cloneEmpty() const override
     {
         /// Keep current dynamic structure
-        return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics);
+        return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, global_max_dynamic_types, statistics);
     }
 
     MutableColumnPtr cloneResized(size_t size) const override
     {
-        return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics);
+        return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, global_max_dynamic_types, statistics);
     }
 
     size_t size() const override
@@ -111,15 +135,9 @@ public:
         return variant_column->size();
     }
 
-    Field operator[](size_t n) const override
-    {
-        return (*variant_column)[n];
-    }
+    Field operator[](size_t n) const override;
 
-    void get(size_t n, Field & res) const override
-    {
-        variant_column->get(n, res);
-    }
+    void get(size_t n, Field & res) const override;
 
     bool isDefaultAt(size_t n) const override
     {
@@ -187,7 +205,7 @@ public:
 
     ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
     {
-        return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types);
+        return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types, global_max_dynamic_types);
     }
 
     void expand(const Filter & mask, bool inverted) override
@@ -197,17 +215,17 @@ public:
 
     ColumnPtr permute(const Permutation & perm, size_t limit) const override
     {
-        return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types);
+        return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types, global_max_dynamic_types);
     }
 
     ColumnPtr index(const IColumn & indexes, size_t limit) const override
     {
-        return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types);
+        return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types, global_max_dynamic_types);
     }
 
     ColumnPtr replicate(const Offsets & replicate_offsets) const override
     {
-        return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types);
+        return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types, global_max_dynamic_types);
     }
 
     MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
@@ -216,7 +234,7 @@ public:
         MutableColumns scattered_columns;
         scattered_columns.reserve(num_columns);
         for (auto & scattered_variant_column : scattered_variant_columns)
-            scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types));
+            scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types, global_max_dynamic_types));
 
         return scattered_columns;
     }
@@ -238,16 +256,10 @@ public:
     }
 
     void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
-                        size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override
-    {
-        variant_column->getPermutation(direction, stability, limit, nan_direction_hint, res);
-    }
+                        size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
 
     void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
-                           size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override
-    {
-        variant_column->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
-    }
+                           size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override;
 
     void reserve(size_t n) override
     {
@@ -293,7 +305,7 @@ public:
     bool structureEquals(const IColumn & rhs) const override
     {
         if (const auto * rhs_concrete = typeid_cast<const ColumnDynamic *>(&rhs))
-            return max_dynamic_types == rhs_concrete->max_dynamic_types;
+            return global_max_dynamic_types == rhs_concrete->global_max_dynamic_types;
         return false;
     }
 
@@ -336,17 +348,67 @@ public:
     const ColumnVariant & getVariantColumn() const { return assert_cast<const ColumnVariant &>(*variant_column); }
     ColumnVariant & getVariantColumn() { return assert_cast<ColumnVariant &>(*variant_column); }
 
-    bool addNewVariant(const DataTypePtr & new_variant);
-    void addStringVariant();
+    bool addNewVariant(const DataTypePtr & new_variant, const String & new_variant_name);
+    bool addNewVariant(const DataTypePtr & new_variant) { return addNewVariant(new_variant, new_variant->getName()); }
 
     bool hasDynamicStructure() const override { return true; }
     void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
 
-    const Statistics & getStatistics() const { return statistics; }
+    const StatisticsPtr & getStatistics() const { return statistics; }
+    void setStatistics(const StatisticsPtr & statistics_) { statistics = statistics_; }
 
     size_t getMaxDynamicTypes() const { return max_dynamic_types; }
 
+    void setVariantType(const DataTypePtr & variant_type);
+    void setMaxDynamicPaths(size_t max_dynamic_type_);
+
+    static const String & getSharedVariantTypeName()
+    {
+        static const String name = SHARED_VARIANT_TYPE_NAME;
+        return name;
+    }
+
+    static DataTypePtr getSharedVariantDataType();
+
+    ColumnVariant::Discriminator getSharedVariantDiscriminator() const
+    {
+        return variant_info.variant_name_to_discriminator.at(getSharedVariantTypeName());
+    }
+
+    ColumnString & getSharedVariant()
+    {
+        return assert_cast<ColumnString &>(getVariantColumn().getVariantByGlobalDiscriminator(getSharedVariantDiscriminator()));
+    }
+
+    const ColumnString & getSharedVariant() const
+    {
+        return assert_cast<const ColumnString &>(getVariantColumn().getVariantByGlobalDiscriminator(getSharedVariantDiscriminator()));
+    }
+
+    /// Serializes type and value in binary format into provided shared variant. Doesn't update Variant discriminators and offsets.
+    static void serializeValueIntoSharedVariant(ColumnString & shared_variant, const IColumn & src, const DataTypePtr & type, const SerializationPtr & serialization, size_t n);
+
+    /// Insert value into shared variant. Also updates Variant discriminators and offsets.
+    void insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n);
+
+    const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type, const String & variant_name) const
+    {
+        /// Get serialization for provided data type.
+        /// To avoid calling type->getDefaultSerialization() every time we use simple cache with max size.
+        /// When max size is reached, just clear the cache.
+        if (serialization_cache.size() == SERIALIZATION_CACHE_MAX_SIZE)
+            serialization_cache.clear();
+
+        if (auto it = serialization_cache.find(variant_name); it != serialization_cache.end())
+            return it->second;
+
+        return serialization_cache.emplace(variant_name, variant_type->getDefaultSerialization()).first->second;
+    }
+
+    const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) const { return getVariantSerialization(variant_type, variant_type->getName()); }
 private:
+    void createVariantInfo(const DataTypePtr & variant_type);
+
     /// Combine current variant with the other variant and return global discriminators mapping
     /// from other variant to the combined one. It's used for inserting from
     /// different variants.
@@ -359,12 +421,19 @@ private:
     /// Store the type of current variant with some additional information.
     VariantInfo variant_info;
     /// The maximum number of different types that can be stored in this Dynamic column.
-    /// If exceeded, all new variants will be converted to String.
+    /// If exceeded, all new variants will be added to a special shared variant with type String
+    /// in binary format. This limit can be different for different instances of Dynamic column.
+    /// When max_dynamic_types = 0, we will have only shared variant and insert all values into it.
     size_t max_dynamic_types;
+    /// The types limit specified in the data type by the user Dynamic(max_types=N).
+    /// max_dynamic_types in all column instances of this Dynamic type can be only smaller
+    /// (for example, max_dynamic_types can be reduced in takeDynamicStructureFromSourceColumns
+    /// before merge of different Dynamic columns).
+    size_t global_max_dynamic_types;
 
     /// Size statistics of each variants from MergeTree data part.
     /// Used in takeDynamicStructureFromSourceColumns and set during deserialization.
-    Statistics statistics;
+    StatisticsPtr statistics;
 
     /// Cache (Variant name) -> (global discriminators mapping from this variant to current variant in Dynamic column).
     /// Used to avoid mappings recalculation in combineVariants for the same Variant types.
@@ -372,6 +441,17 @@ private:
     /// Cache of Variant types that couldn't be combined with current variant in Dynamic column.
     /// Used to avoid checking if combination is possible for the same Variant types.
     std::unordered_set<String> variants_with_failed_combination;
+
+    /// We can use serializations of different data types to serialize values into shared variant.
+    /// To avoid creating the same serialization multiple times, use simple cache.
+    static const size_t SERIALIZATION_CACHE_MAX_SIZE = 256;
+    mutable std::unordered_map<String, SerializationPtr> serialization_cache;
 };
 
+void extendVariantColumn(
+    IColumn & variant_column,
+    const DataTypePtr & old_variant_type,
+    const DataTypePtr & new_variant_type,
+    std::unordered_map<String, UInt8> old_variant_name_to_discriminator);
+
 }
diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp
index de7efb41d19..7531e976926 100644
--- a/src/Columns/ColumnVariant.cpp
+++ b/src/Columns/ColumnVariant.cpp
@@ -476,7 +476,7 @@ void ColumnVariant::insertFromImpl(const DB::IColumn & src_, size_t n, const std
     }
 }
 
-void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping)
+void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, Discriminator * skip_discriminator)
 {
     const size_t num_variants = variants.size();
     const auto & src = assert_cast<const ColumnVariant &>(src_);
@@ -557,9 +557,12 @@ void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start,
         Discriminator global_discr = src_global_discr;
         if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR)
             global_discr = (*global_discriminators_mapping)[src_global_discr];
-        Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
-        if (nested_length)
-            variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length);
+        if (!skip_discriminator || global_discr != *skip_discriminator)
+        {
+            Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
+            if (nested_length)
+                variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length);
+        }
     }
 }
 
@@ -610,7 +613,7 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l
 void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length)
 #endif
 {
-    insertRangeFromImpl(src_, start, length, nullptr);
+    insertRangeFromImpl(src_, start, length, nullptr, nullptr);
 }
 
 #if !defined(DEBUG_OR_SANITIZER_BUILD)
@@ -627,9 +630,9 @@ void ColumnVariant::insertFrom(const DB::IColumn & src_, size_t n, const std::ve
     insertFromImpl(src_, n, &global_discriminators_mapping);
 }
 
-void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping)
+void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping, Discriminator skip_discriminator)
 {
-    insertRangeFromImpl(src_, start, length, &global_discriminators_mapping);
+    insertRangeFromImpl(src_, start, length, &global_discriminators_mapping, &skip_discriminator);
 }
 
 void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping)
@@ -673,6 +676,14 @@ void ColumnVariant::insertManyIntoVariantFrom(DB::ColumnVariant::Discriminator g
     variants[local_discr]->insertManyFrom(src_, position, length);
 }
 
+void ColumnVariant::deserializeBinaryIntoVariant(ColumnVariant::Discriminator global_discr, const SerializationPtr & serialization, ReadBuffer & buf, const FormatSettings & format_settings)
+{
+    auto local_discr = localDiscriminatorByGlobal(global_discr);
+    serialization->deserializeBinary(*variants[local_discr], buf, format_settings);
+    getLocalDiscriminators().push_back(local_discr);
+    getOffsets().push_back(variants[local_discr]->size() - 1);
+}
+
 void ColumnVariant::insertDefault()
 {
     getLocalDiscriminators().push_back(NULL_DISCRIMINATOR);
@@ -1213,9 +1224,7 @@ struct ColumnVariant::ComparatorBase
 
     ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const
     {
-        int res = parent.compareAt(lhs, rhs, parent, nan_direction_hint);
-
-        return res;
+        return parent.compareAt(lhs, rhs, parent, nan_direction_hint);
     }
 };
 
diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h
index 34c24b5428d..571a843d113 100644
--- a/src/Columns/ColumnVariant.h
+++ b/src/Columns/ColumnVariant.h
@@ -2,6 +2,8 @@
 
 #include <Columns/IColumn.h>
 #include <Columns/ColumnVector.h>
+#include <Formats/FormatSettings.h>
+#include <DataTypes/Serializations/ISerialization.h>
 
 
 namespace DB
@@ -196,13 +198,15 @@ public:
 
     /// Methods for insertion from another Variant but with known mapping between global discriminators.
     void insertFrom(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
-    void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
+    /// Don't insert data into variant with skip_discriminator global discriminator, it will be processed separately.
+    void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping, Discriminator skip_discriminator);
     void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
 
     /// Methods for insertion into a specific variant.
     void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n);
     void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length);
     void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length);
+    void deserializeBinaryIntoVariant(Discriminator global_discr, const SerializationPtr & serialization, ReadBuffer & buf, const FormatSettings & format_settings);
 
     void insertDefault() override;
     void insertManyDefaults(size_t length) override;
@@ -263,6 +267,7 @@ public:
     ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; }
 
     const NestedColumns & getVariants() const { return variants; }
+    NestedColumns & getVariants() { return variants; }
 
     const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; }
     IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; }
@@ -302,6 +307,8 @@ public:
         return true;
     }
 
+    std::vector<Discriminator> getLocalToGlobalDiscriminatorsMapping() const { return local_to_global_discriminators; }
+
     /// Check if we have only 1 non-empty variant and no NULL values,
     /// and if so, return the discriminator of this non-empty column.
     std::optional<Discriminator> getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls() const;
@@ -322,7 +329,7 @@ public:
 
 private:
     void insertFromImpl(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
-    void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
+    void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, Discriminator * skip_discriminator);
     void insertManyFromImpl(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
 
     void initIdentityGlobalToLocalDiscriminatorsMapping();
diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp
index a2862b09de1..5445bd525d9 100644
--- a/src/Columns/tests/gtest_column_dynamic.cpp
+++ b/src/Columns/tests/gtest_column_dynamic.cpp
@@ -9,9 +9,12 @@ TEST(ColumnDynamic, CreateEmpty)
 {
     auto column = ColumnDynamic::create(255);
     ASSERT_TRUE(column->empty());
-    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()");
-    ASSERT_TRUE(column->getVariantInfo().variant_names.empty());
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty());
+    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)");
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 1);
+    ASSERT_EQ(column->getVariantInfo().variant_names[0], "SharedVariant");
+    ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.size(), 1);
+    ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0);
+    ASSERT_TRUE(column->getVariantColumn().getVariantByGlobalDiscriminator(0).empty());
 }
 
 TEST(ColumnDynamic, InsertDefault)
@@ -19,9 +22,12 @@ TEST(ColumnDynamic, InsertDefault)
     auto column = ColumnDynamic::create(255);
     column->insertDefault();
     ASSERT_TRUE(column->size() == 1);
-    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()");
-    ASSERT_TRUE(column->getVariantInfo().variant_names.empty());
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty());
+    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)");
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 1);
+    ASSERT_EQ(column->getVariantInfo().variant_names[0], "SharedVariant");
+    ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.size(), 1);
+    ASSERT_EQ(column->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0);
+    ASSERT_TRUE(column->getVariantColumn().getVariantByGlobalDiscriminator(0).empty());
     ASSERT_TRUE(column->isNullAt(0));
     ASSERT_EQ((*column)[0], Field(Null()));
 }
@@ -41,10 +47,10 @@ TEST(ColumnDynamic, InsertFields)
     column->insert(Field(43.43));
     ASSERT_TRUE(column->size() == 10);
 
-    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)");
-    std::vector<String> expected_names = {"Float64", "Int8", "String"};
+    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, SharedVariant, String)");
+    std::vector<String> expected_names = {"Float64", "Int8", "SharedVariant", "String"};
     ASSERT_EQ(column->getVariantInfo().variant_names, expected_names);
-    std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}};
+    std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}};
     ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
 }
 
@@ -66,56 +72,66 @@ TEST(ColumnDynamic, InsertFieldsOverflow1)
 {
     auto column = getDynamicWithManyVariants(253);
 
-    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 253);
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
 
     column->insert(Field(42.42));
-    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
+    ASSERT_EQ(column->size(), 254);
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
 
     column->insert(Field(42));
+    ASSERT_EQ(column->size(), 255);
     ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column->getSharedVariant().size(), 1);
     Field field = (*column)[column->size() - 1];
-    ASSERT_EQ(field, "42");
+    ASSERT_EQ(field, 42);
 
     column->insert(Field(43));
+    ASSERT_EQ(column->size(), 256);
     ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column->getSharedVariant().size(), 2);
     field = (*column)[column->size() - 1];
-    ASSERT_EQ(field, "43");
+    ASSERT_EQ(field, 43);
 
     column->insert(Field("str1"));
+    ASSERT_EQ(column->size(), 257);
     ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column->getSharedVariant().size(), 3);
     field = (*column)[column->size() - 1];
     ASSERT_EQ(field, "str1");
 
     column->insert(Field(Array({Field(42), Field(43)})));
     ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column->getSharedVariant().size(), 4);
     field = (*column)[column->size() - 1];
-    ASSERT_EQ(field, "[42, 43]");
+    ASSERT_EQ(field, Field(Array({Field(42), Field(43)})));
 }
 
 TEST(ColumnDynamic, InsertFieldsOverflow2)
 {
     auto column = getDynamicWithManyVariants(254);
-    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
 
     column->insert(Field("str1"));
     ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column->getSharedVariant().size(), 1);
+    Field field = (*column)[column->size() - 1];
+    ASSERT_EQ(field, "str1");
 
     column->insert(Field(42));
     ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    Field field = (*column)[column->size() - 1];
-    ASSERT_EQ(field, "42");
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column->getSharedVariant().size(), 2);
+    field = (*column)[column->size() - 1];
+    ASSERT_EQ(field, 42);
 }
 
 ColumnDynamic::MutablePtr getInsertFromColumn(size_t num = 1)
@@ -155,7 +171,7 @@ void checkInsertFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynami
 TEST(ColumnDynamic, InsertFrom1)
 {
     auto column_to = ColumnDynamic::create(255);
-    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertFrom2)
@@ -165,7 +181,7 @@ TEST(ColumnDynamic, InsertFrom2)
     column_to->insert(Field(42.42));
     column_to->insert(Field("str"));
 
-    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertFrom3)
@@ -176,7 +192,7 @@ TEST(ColumnDynamic, InsertFrom3)
     column_to->insert(Field("str"));
     column_to->insert(Array({Field(42)}));
 
-    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
+    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}});
 }
 
 TEST(ColumnDynamic, InsertFromOverflow1)
@@ -188,7 +204,7 @@ TEST(ColumnDynamic, InsertFromOverflow1)
 
     auto column_to = getDynamicWithManyVariants(253);
     column_to->insertFrom(*column_from, 0);
-    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     auto field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, 42);
@@ -196,13 +212,15 @@ TEST(ColumnDynamic, InsertFromOverflow1)
     column_to->insertFrom(*column_from, 1);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 1);
     field = (*column_to)[column_to->size() - 1];
-    ASSERT_EQ(field, "42.42");
+    ASSERT_EQ(field, 42.42);
 
     column_to->insertFrom(*column_from, 2);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
     field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, "str");
 }
@@ -221,9 +239,32 @@ TEST(ColumnDynamic, InsertFromOverflow2)
 
     column_to->insertFrom(*column_from, 1);
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 1);
     field = (*column_to)[column_to->size() - 1];
-    ASSERT_EQ(field, "42.42");
+    ASSERT_EQ(field, 42.42);
+}
+
+TEST(ColumnDynamic, InsertFromOverflow3)
+{
+    auto column_from = ColumnDynamic::create(1);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(41));
+
+    column_to->insertFrom(*column_from, 0);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 0);
+    auto field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertFrom(*column_from, 1);
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 1);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42.42);
 }
 
 void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector<String> & expected_names, const std::unordered_map<String, UInt8> & expected_variant_name_to_discriminator)
@@ -257,7 +298,7 @@ void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDy
 TEST(ColumnDynamic, InsertManyFrom1)
 {
     auto column_to = ColumnDynamic::create(255);
-    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertManyFrom2)
@@ -267,7 +308,7 @@ TEST(ColumnDynamic, InsertManyFrom2)
     column_to->insert(Field(42.42));
     column_to->insert(Field("str"));
 
-    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertManyFrom3)
@@ -278,7 +319,7 @@ TEST(ColumnDynamic, InsertManyFrom3)
     column_to->insert(Field("str"));
     column_to->insert(Array({Field(42)}));
 
-    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
+    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}});
 }
 
 TEST(ColumnDynamic, InsertManyFromOverflow1)
@@ -290,8 +331,9 @@ TEST(ColumnDynamic, InsertManyFromOverflow1)
 
     auto column_to = getDynamicWithManyVariants(253);
     column_to->insertManyFrom(*column_from, 0, 2);
-    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 0);
     auto field = (*column_to)[column_to->size() - 2];
     ASSERT_EQ(field, 42);
     field = (*column_to)[column_to->size() - 1];
@@ -300,15 +342,17 @@ TEST(ColumnDynamic, InsertManyFromOverflow1)
     column_to->insertManyFrom(*column_from, 1, 2);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
     field = (*column_to)[column_to->size() - 2];
-    ASSERT_EQ(field, "42.42");
+    ASSERT_EQ(field, 42.42);
     field = (*column_to)[column_to->size() - 1];
-    ASSERT_EQ(field, "42.42");
+    ASSERT_EQ(field, 42.42);
 
     column_to->insertManyFrom(*column_from, 2, 2);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 4);
     field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, "str");
     field = (*column_to)[column_to->size() - 2];
@@ -323,8 +367,9 @@ TEST(ColumnDynamic, InsertManyFromOverflow2)
 
     auto column_to = getDynamicWithManyVariants(253);
     column_to->insertManyFrom(*column_from, 0, 2);
-    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 0);
     auto field = (*column_to)[column_to->size() - 2];
     ASSERT_EQ(field, 42);
     field = (*column_to)[column_to->size() - 1];
@@ -333,11 +378,39 @@ TEST(ColumnDynamic, InsertManyFromOverflow2)
     column_to->insertManyFrom(*column_from, 1, 2);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
     field = (*column_to)[column_to->size() - 2];
-    ASSERT_EQ(field, "42.42");
+    ASSERT_EQ(field, 42.42);
     field = (*column_to)[column_to->size() - 1];
-    ASSERT_EQ(field, "42.42");
+    ASSERT_EQ(field, 42.42);
+}
+
+
+TEST(ColumnDynamic, InsertManyFromOverflow3)
+{
+    auto column_from = ColumnDynamic::create(1);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(41));
+
+    column_to->insertManyFrom(*column_from, 0, 2);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 0);
+    auto field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertManyFrom(*column_from, 1, 2);
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42.42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42.42);
 }
 
 void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector<String> & expected_names, const std::unordered_map<String, UInt8> & expected_variant_name_to_discriminator)
@@ -369,7 +442,7 @@ void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnD
 TEST(ColumnDynamic, InsertRangeFrom1)
 {
     auto column_to = ColumnDynamic::create(255);
-    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertRangeFrom2)
@@ -379,7 +452,7 @@ TEST(ColumnDynamic, InsertRangeFrom2)
     column_to->insert(Field(42.42));
     column_to->insert(Field("str1"));
 
-    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertRangeFrom3)
@@ -390,7 +463,7 @@ TEST(ColumnDynamic, InsertRangeFrom3)
     column_to->insert(Field("str1"));
     column_to->insert(Array({Field(42)}));
 
-    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
+    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, SharedVariant, String)", {"Array(Int8)", "Float64", "Int8", "SharedVariant", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"SharedVariant", 3}, {"String", 4}});
 }
 
 TEST(ColumnDynamic, InsertRangeFromOverflow1)
@@ -403,16 +476,18 @@ TEST(ColumnDynamic, InsertRangeFromOverflow1)
 
     auto column_to = getDynamicWithManyVariants(253);
     column_to->insertRangeFrom(*column_from, 0, 4);
+    ASSERT_EQ(column_to->size(), 257);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
     auto field = (*column_to)[column_to->size() - 4];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 3];
     ASSERT_EQ(field, Field(43));
     field = (*column_to)[column_to->size() - 2];
-    ASSERT_EQ(field, Field("42.42"));
+    ASSERT_EQ(field, Field(42.42));
     field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, Field("str"));
 }
@@ -428,14 +503,15 @@ TEST(ColumnDynamic, InsertRangeFromOverflow2)
     column_to->insertRangeFrom(*column_from, 0, 3);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 1);
     auto field = (*column_to)[column_to->size() - 3];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 2];
     ASSERT_EQ(field, Field(43));
     field = (*column_to)[column_to->size() - 1];
-    ASSERT_EQ(field, Field("42.42"));
+    ASSERT_EQ(field, Field(42.42));
 }
 
 TEST(ColumnDynamic, InsertRangeFromOverflow3)
@@ -449,15 +525,16 @@ TEST(ColumnDynamic, InsertRangeFromOverflow3)
     column_to->insert(Field("Str"));
     column_to->insertRangeFrom(*column_from, 0, 3);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 3);
     auto field = (*column_to)[column_to->size() - 3];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 2];
     ASSERT_EQ(field, Field(43));
     field = (*column_to)[column_to->size() - 1];
-    ASSERT_EQ(field, Field("42.42"));
+    ASSERT_EQ(field, Field(42.42));
 }
 
 TEST(ColumnDynamic, InsertRangeFromOverflow4)
@@ -471,12 +548,13 @@ TEST(ColumnDynamic, InsertRangeFromOverflow4)
     column_to->insertRangeFrom(*column_from, 0, 3);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 3);
     auto field = (*column_to)[column_to->size() - 3];
-    ASSERT_EQ(field, Field("42"));
+    ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 2];
-    ASSERT_EQ(field, Field("42.42"));
+    ASSERT_EQ(field, Field(42.42));
     field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, Field("str"));
 }
@@ -493,15 +571,16 @@ TEST(ColumnDynamic, InsertRangeFromOverflow5)
     column_to->insert(Field("str"));
     column_to->insertRangeFrom(*column_from, 0, 4);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 3);
     auto field = (*column_to)[column_to->size() - 4];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 3];
     ASSERT_EQ(field, Field(43));
     field = (*column_to)[column_to->size() - 2];
-    ASSERT_EQ(field, Field("42.42"));
+    ASSERT_EQ(field, Field(42.42));
     field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, Field("str"));
 }
@@ -520,13 +599,14 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6)
     auto column_to = getDynamicWithManyVariants(253);
     column_to->insertRangeFrom(*column_from, 2, 5);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 4);
     auto field = (*column_to)[column_to->size() - 5];
 
-    ASSERT_EQ(field, Field("44"));
+    ASSERT_EQ(field, Field(44));
     field = (*column_to)[column_to->size() - 4];
     ASSERT_EQ(field, Field(42.42));
     field = (*column_to)[column_to->size() - 3];
@@ -534,7 +614,131 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6)
     field = (*column_to)[column_to->size() - 2];
     ASSERT_EQ(field, Field("str"));
     field = (*column_to)[column_to->size() - 1];
-    ASSERT_EQ(field, Field("[42]"));
+    ASSERT_EQ(field, Field(Array({Field(42)})));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow7)
+{
+    auto column_from = ColumnDynamic::create(3);
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str1"));
+    column_from->insert(Field(42));
+    column_from->insert(Field(43.43));
+    column_from->insert(Field(Array({Field(41)})));
+    column_from->insert(Field(43));
+    column_from->insert(Field("str2"));
+    column_from->insert(Field(Array({Field(42)})));
+
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(42));
+
+    column_to->insertRangeFrom(*column_from, 0, 8);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 4);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
+    auto field = (*column_to)[column_to->size() - 8];
+    ASSERT_EQ(field, Field(42.42));
+    field = (*column_to)[column_to->size() - 7];
+    ASSERT_EQ(field, Field("str1"));
+    field = (*column_to)[column_to->size() - 6];
+    ASSERT_EQ(field, Field(42));
+    field = (*column_to)[column_to->size() - 5];
+    ASSERT_EQ(field, Field(43.43));
+    field = (*column_to)[column_to->size() - 4];
+    ASSERT_EQ(field, Field(Array({Field(41)})));
+    field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(43));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field("str2"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field(Array({Field(42)})));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow8)
+{
+    auto column_from = ColumnDynamic::create(3);
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str1"));
+    column_from->insert(Field(42));
+    column_from->insert(Field(43.43));
+    column_from->insert(Field(Array({Field(41)})));
+    column_from->insert(Field(43));
+    column_from->insert(Field("str2"));
+    column_from->insert(Field(Array({Field(42)})));
+
+    auto column_to = ColumnDynamic::create(3);
+    column_to->insert(Field(42));
+    column_from->insert(Field("str1"));
+
+    column_to->insertRangeFrom(*column_from, 0, 8);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 3);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 4);
+    auto field = (*column_to)[column_to->size() - 8];
+    ASSERT_EQ(field, Field(42.42));
+    field = (*column_to)[column_to->size() - 7];
+    ASSERT_EQ(field, Field("str1"));
+    field = (*column_to)[column_to->size() - 6];
+    ASSERT_EQ(field, Field(42));
+    field = (*column_to)[column_to->size() - 5];
+    ASSERT_EQ(field, Field(43.43));
+    field = (*column_to)[column_to->size() - 4];
+    ASSERT_EQ(field, Field(Array({Field(41)})));
+    field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(43));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field("str2"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field(Array({Field(42)})));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow9)
+{
+    auto column_from = ColumnDynamic::create(3);
+    column_from->insert(Field("str1"));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str2"));
+    column_from->insert(Field(42));
+    column_from->insert(Field(43.43));
+    column_from->insert(Field(Array({Field(41)})));
+    column_from->insert(Field(43));
+    column_from->insert(Field("str2"));
+    column_from->insert(Field(Array({Field(42)})));
+
+    auto column_to = ColumnDynamic::create(3);
+    column_to->insert(Field(42));
+
+    column_to->insertRangeFrom(*column_from, 0, 9);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 3);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 4);
+    auto field = (*column_to)[column_to->size() - 9];
+    ASSERT_EQ(field, Field("str1"));
+    field = (*column_to)[column_to->size() - 8];
+    ASSERT_EQ(field, Field(42.42));
+    field = (*column_to)[column_to->size() - 7];
+    ASSERT_EQ(field, Field("str2"));
+    field = (*column_to)[column_to->size() - 6];
+    ASSERT_EQ(field, Field(42));
+    field = (*column_to)[column_to->size() - 5];
+    ASSERT_EQ(field, Field(43.43));
+    field = (*column_to)[column_to->size() - 4];
+    ASSERT_EQ(field, Field(Array({Field(41)})));
+    field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(43));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field("str2"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field(Array({Field(42)})));
 }
 
 TEST(ColumnDynamic, SerializeDeserializeFromArena1)
@@ -583,18 +787,18 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena2)
     pos = column_to->deserializeAndInsertFromArena(pos);
     column_to->deserializeAndInsertFromArena(pos);
 
-    ASSERT_EQ((*column_from)[column_from->size() - 4], 42);
-    ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42);
-    ASSERT_EQ((*column_from)[column_from->size() - 2], "str");
-    ASSERT_EQ((*column_from)[column_from->size() - 1], Null());
-    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)");
-    std::vector<String> expected_names = {"Float64", "Int8", "String"};
+    ASSERT_EQ((*column_to)[column_to->size() - 4], 42);
+    ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42);
+    ASSERT_EQ((*column_to)[column_to->size() - 2], "str");
+    ASSERT_EQ((*column_to)[column_to->size() - 1], Null());
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, SharedVariant, String)");
+    std::vector<String> expected_names = {"Float64", "Int8", "SharedVariant", "String"};
     ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
-    std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}};
+    std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}};
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
 }
 
-TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow)
+TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow1)
 {
     auto column_from = ColumnDynamic::create(255);
     column_from->insert(Field(42));
@@ -615,18 +819,56 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow)
     pos = column_to->deserializeAndInsertFromArena(pos);
     column_to->deserializeAndInsertFromArena(pos);
 
-    ASSERT_EQ((*column_from)[column_from->size() - 4], 42);
-    ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42);
-    ASSERT_EQ((*column_from)[column_from->size() - 2], "str");
-    ASSERT_EQ((*column_from)[column_from->size() - 1], Null());
+    ASSERT_EQ((*column_to)[column_to->size() - 4], 42);
+    ASSERT_EQ((*column_to)[column_to->size() - 3], 42.42);
+    ASSERT_EQ((*column_to)[column_to->size() - 2], "str");
+    ASSERT_EQ((*column_to)[column_to->size() - 1], Null());
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
+}
+
+TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow2)
+{
+    auto column_from = ColumnDynamic::create(2);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+    column_from->insert(Field(Null()));
+    column_from->insert(Field(Array({Field(42)})));
+
+    Arena arena;
+    const char * pos = nullptr;
+    auto ref1 = column_from->serializeValueIntoArena(0, arena, pos);
+    column_from->serializeValueIntoArena(1, arena, pos);
+    column_from->serializeValueIntoArena(2, arena, pos);
+    column_from->serializeValueIntoArena(3, arena, pos);
+    column_from->serializeValueIntoArena(4, arena, pos);
+
+    auto column_to = ColumnDynamic::create(3);
+    column_to->insert(Field(42.42));
+    pos = column_to->deserializeAndInsertFromArena(ref1.data);
+    pos = column_to->deserializeAndInsertFromArena(pos);
+    pos = column_to->deserializeAndInsertFromArena(pos);
+    pos = column_to->deserializeAndInsertFromArena(pos);
+    column_to->deserializeAndInsertFromArena(pos);
+
+    ASSERT_EQ((*column_to)[column_to->size() - 5], 42);
+    ASSERT_EQ((*column_to)[column_to->size() - 4], 42.42);
+    ASSERT_EQ((*column_to)[column_to->size() - 3], "str");
+    ASSERT_EQ((*column_to)[column_to->size() - 2], Null());
+    ASSERT_EQ((*column_to)[column_to->size() - 1], Field(Array({Field(42)})));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    ASSERT_EQ(column_to->getSharedVariant().size(), 2);
 }
 
 TEST(ColumnDynamic, skipSerializedInArena)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(3);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
     column_from->insert(Field("str"));
@@ -647,6 +889,34 @@ TEST(ColumnDynamic, skipSerializedInArena)
     pos = column_to->skipSerializedInArena(pos);
 
     ASSERT_EQ(pos, end);
-    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.empty());
-    ASSERT_TRUE(column_to->getVariantInfo().variant_names.empty());
+    ASSERT_EQ(column_to->getVariantInfo().variant_name_to_discriminator.at("SharedVariant"), 0);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, Names{"SharedVariant"});
+}
+
+TEST(ColumnDynamic, compare)
+{
+    auto column_from = ColumnDynamic::create(3);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+    column_from->insert(Field(Null()));
+    column_from->insert(Field(Array({Field(42)})));
+
+    ASSERT_EQ(column_from->compareAt(0, 0, *column_from, -1), 0);
+    ASSERT_EQ(column_from->compareAt(0, 1, *column_from, -1), 1);
+    ASSERT_EQ(column_from->compareAt(1, 1, *column_from, -1), 0);
+    ASSERT_EQ(column_from->compareAt(0, 2, *column_from, -1), -1);
+    ASSERT_EQ(column_from->compareAt(2, 0, *column_from, -1), 1);
+    ASSERT_EQ(column_from->compareAt(2, 4, *column_from, -1), 1);
+    ASSERT_EQ(column_from->compareAt(4, 2, *column_from, -1), -1);
+    ASSERT_EQ(column_from->compareAt(4, 4, *column_from, -1), 0);
+    ASSERT_EQ(column_from->compareAt(0, 3, *column_from, -1), 1);
+    ASSERT_EQ(column_from->compareAt(1, 3, *column_from, -1), 1);
+    ASSERT_EQ(column_from->compareAt(2, 3, *column_from, -1), 1);
+    ASSERT_EQ(column_from->compareAt(3, 3, *column_from, -1), 0);
+    ASSERT_EQ(column_from->compareAt(4, 3, *column_from, -1), 1);
+    ASSERT_EQ(column_from->compareAt(3, 0, *column_from, -1), -1);
+    ASSERT_EQ(column_from->compareAt(3, 1, *column_from, -1), -1);
+    ASSERT_EQ(column_from->compareAt(3, 2, *column_from, -1), -1);
+    ASSERT_EQ(column_from->compareAt(3, 4, *column_from, -1), -1);
 }
diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp
index a1b1f8325f0..e00638a50ab 100644
--- a/src/DataTypes/DataTypeDynamic.cpp
+++ b/src/DataTypes/DataTypeDynamic.cpp
@@ -7,6 +7,7 @@
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypesBinaryEncoding.h>
 #include <Columns/ColumnDynamic.h>
 #include <Columns/ColumnVariant.h>
 #include <Core/Field.h>
@@ -14,6 +15,7 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
+#include <IO/ReadBufferFromMemory.h>
 
 namespace DB
 {
@@ -71,8 +73,8 @@ static DataTypePtr create(const ASTPtr & arguments)
 
     auto * literal = argument->arguments->children[1]->as<ASTLiteral>();
 
-    if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get<UInt64>() == 0 || literal->value.get<UInt64>() > ColumnVariant::MAX_NESTED_COLUMNS)
-        throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255");
+    if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get<UInt64>() > ColumnVariant::MAX_NESTED_COLUMNS - 1)
+        throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 0 and 254");
 
     return std::make_shared<DataTypeDynamic>(literal->value.get<UInt64>());
 }
@@ -84,30 +86,72 @@ void registerDataTypeDynamic(DataTypeFactory & factory)
 
 std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const
 {
-    auto [subcolumn_type_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name);
+    auto [type_subcolumn_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name);
     /// Check if requested subcolumn is a valid data type.
-    auto subcolumn_type = DataTypeFactory::instance().tryGet(String(subcolumn_type_name));
+    auto subcolumn_type = DataTypeFactory::instance().tryGet(String(type_subcolumn_name));
     if (!subcolumn_type)
     {
         if (throw_if_null)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", subcolumn_type_name);
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", type_subcolumn_name);
         return nullptr;
     }
 
     std::unique_ptr<SubstreamData> res = std::make_unique<SubstreamData>(subcolumn_type->getDefaultSerialization());
     res->type = subcolumn_type;
     std::optional<ColumnVariant::Discriminator> discriminator;
+    ColumnPtr null_map_for_variant_from_shared_variant;
     if (data.column)
     {
         /// If column was provided, we should extract subcolumn from Dynamic column.
         const auto & dynamic_column = assert_cast<const ColumnDynamic &>(*data.column);
         const auto & variant_info = dynamic_column.getVariantInfo();
+        const auto & variant_column = dynamic_column.getVariantColumn();
+        const auto & shared_variant = dynamic_column.getSharedVariant();
         /// Check if provided Dynamic column has subcolumn of this type.
-        auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type->getName());
+        String subcolumn_type_name = subcolumn_type->getName();
+        auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type_name);
         if (it != variant_info.variant_name_to_discriminator.end())
         {
             discriminator = it->second;
-            res->column = dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(*discriminator);
+            res->column = variant_column.getVariantPtrByGlobalDiscriminator(*discriminator);
+        }
+        /// Otherwise if there is data in shared variant try to find requested type there.
+        else if (!shared_variant.empty())
+        {
+            /// Create null map for resulting subcolumn to make it Nullable.
+            auto null_map_column = ColumnUInt8::create();
+            NullMap & null_map = assert_cast<ColumnUInt8 &>(*null_map_column).getData();
+            null_map.reserve(variant_column.size());
+            auto subcolumn = subcolumn_type->createColumn();
+            auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(dynamic_column.getSharedVariantDiscriminator());
+            const auto & local_discriminators = variant_column.getLocalDiscriminators();
+            const auto & offsets = variant_column.getOffsets();
+            const FormatSettings format_settings;
+            for (size_t i = 0; i != local_discriminators.size(); ++i)
+            {
+                if (local_discriminators[i] == shared_variant_local_discr)
+                {
+                    auto value = shared_variant.getDataAt(offsets[i]);
+                    ReadBufferFromMemory buf(value.data, value.size);
+                    auto type = decodeDataType(buf);
+                    if (type->getName() == subcolumn_type_name)
+                    {
+                        dynamic_column.getVariantSerialization(subcolumn_type, subcolumn_type_name)->deserializeBinary(*subcolumn, buf, format_settings);
+                        null_map.push_back(0);
+                    }
+                    else
+                    {
+                        null_map.push_back(1);
+                    }
+                }
+                else
+                {
+                    null_map.push_back(1);
+                }
+            }
+
+            res->column = std::move(subcolumn);
+            null_map_for_variant_from_shared_variant = std::move(null_map_column);
         }
     }
 
@@ -125,7 +169,7 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
             return nullptr;
     }
 
-    res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName(), is_null_map_subcolumn);
+    res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName(), String(subcolumn_nested_name), is_null_map_subcolumn);
     /// Make resulting subcolumn Nullable only if type subcolumn can be inside Nullable or can be LowCardinality(Nullable()).
     bool make_subcolumn_nullable = subcolumn_type->canBeInsideNullable() || subcolumn_type->lowCardinality();
     if (!is_null_map_subcolumn && make_subcolumn_nullable)
@@ -133,10 +177,10 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
 
     if (data.column)
     {
+        /// Check if provided Dynamic column has subcolumn of this type. In this case we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to
+        /// create full subcolumn from variant according to discriminators.
         if (discriminator)
         {
-            /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator/VariantNullMapSubcolumnCreator to
-            /// create full subcolumn from variant according to discriminators.
             const auto & variant_column = assert_cast<const ColumnDynamic &>(*data.column).getVariantColumn();
             std::unique_ptr<ISerialization::ISubcolumnCreator> creator;
             if (is_null_map_subcolumn)
@@ -154,6 +198,21 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
                     make_subcolumn_nullable);
             res->column = creator->create(res->column);
         }
+        /// Check if requested type was extracted from shared variant. In this case we should use
+        /// VariantSubcolumnCreator to create full subcolumn from variant according to created null map.
+        else if (null_map_for_variant_from_shared_variant)
+        {
+            if (is_null_map_subcolumn)
+            {
+                res->column = null_map_for_variant_from_shared_variant;
+            }
+            else
+            {
+                SerializationVariantElement::VariantSubcolumnCreator creator(
+                    null_map_for_variant_from_shared_variant, "", 0, 0, make_subcolumn_nullable, null_map_for_variant_from_shared_variant);
+                res->column = creator.create(res->column);
+            }
+        }
         /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
         else if (is_null_map_subcolumn)
         {
diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp
index 6f7dcd65b83..ca2ebdfbdbb 100644
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@@ -150,6 +150,12 @@ DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) cons
     return type;
 }
 
+DataTypePtr DataTypeFactory::getCustom(const String & base_name, DataTypeCustomDescPtr customization) const
+{
+    auto type = get(base_name);
+    type->setCustomization(std::move(customization));
+    return type;
+}
 
 void DataTypeFactory::registerDataType(const String & family_name, Value creator, Case case_sensitiveness)
 {
diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h
index edba9886d1c..a8324341691 100644
--- a/src/DataTypes/DataTypeFactory.h
+++ b/src/DataTypes/DataTypeFactory.h
@@ -34,6 +34,7 @@ public:
     DataTypePtr get(const String & family_name, const ASTPtr & parameters) const;
     DataTypePtr get(const ASTPtr & ast) const;
     DataTypePtr getCustom(DataTypeCustomDescPtr customization) const;
+    DataTypePtr getCustom(const String & base_name, DataTypeCustomDescPtr customization) const;
 
     /// Return nullptr in case of error.
     DataTypePtr tryGet(const String & full_name) const;
diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index e96937d522d..67b4a0a5e31 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -192,17 +192,12 @@ MutableColumnPtr DataTypeTuple::createColumn() const
 
 MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serialization) const
 {
-    /// If we read Tuple as Variant subcolumn, it may be wrapped to SerializationVariantElement.
-    /// Here we don't need it, so we drop this wrapper.
-    const auto * current_serialization = &serialization;
-    while (const auto * serialization_variant_element = typeid_cast<const SerializationVariantElement *>(current_serialization))
-        current_serialization = serialization_variant_element->getNested().get();
-
-    /// If we read subcolumn of nested Tuple, it may be wrapped to SerializationNamed
+    /// If we read subcolumn of nested Tuple or this Tuple is a subcolumn, it may be wrapped to SerializationWrapper
     /// several times to allow to reconstruct the substream path name.
     /// Here we don't need substream path name, so we drop first several wrapper serializations.
-    while (const auto * serialization_named = typeid_cast<const SerializationNamed *>(current_serialization))
-        current_serialization = serialization_named->getNested().get();
+    const auto * current_serialization = &serialization;
+    while (const auto * serialization_wrapper = dynamic_cast<const SerializationWrapper *>(current_serialization))
+        current_serialization = serialization_wrapper->getNested().get();
 
     const auto * serialization_tuple = typeid_cast<const SerializationTuple *>(current_serialization);
     if (!serialization_tuple)
diff --git a/src/DataTypes/DataTypesBinaryEncoding.cpp b/src/DataTypes/DataTypesBinaryEncoding.cpp
index bd994e313ba..610f246265e 100644
--- a/src/DataTypes/DataTypesBinaryEncoding.cpp
+++ b/src/DataTypes/DataTypesBinaryEncoding.cpp
@@ -444,7 +444,7 @@ void encodeDataType(const DataTypePtr & type, WriteBuffer & buf)
         case BinaryTypeIndex::Dynamic:
         {
             const auto & dynamic_type = assert_cast<const DataTypeDynamic &>(*type);
-            /// Maximum number of dynamic types is 255, we can write it as 1 byte.
+            /// Maximum number of dynamic types is 254, we can write it as 1 byte.
             writeBinary(UInt8(dynamic_type.getMaxDynamicTypes()), buf);
             break;
         }
diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index 7609ffc91ca..67b29750948 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -27,15 +27,21 @@ namespace ErrorCodes
 struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState
 {
     SerializationDynamic::DynamicStructureSerializationVersion structure_version;
+    size_t max_dynamic_types;
     DataTypePtr variant_type;
     Names variant_names;
     SerializationPtr variant_serialization;
     ISerialization::SerializeBinaryBulkStatePtr variant_state;
 
-    /// Variants statistics. Map (Variant name) -> (Variant size).
-    ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} };
+    /// Variants statistics.
+    ColumnDynamic::Statistics statistics;
+    /// If true, statistics will be recalculated during serialization.
+    bool recalculate_statistics = false;
 
-    explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {}
+    explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_)
+        : structure_version(structure_version_), statistics(ColumnDynamic::Statistics::Source::READ)
+    {
+    }
 };
 
 struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState
@@ -106,20 +112,41 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
     writeBinaryLittleEndian(structure_version, *stream);
     auto dynamic_state = std::make_shared<SerializeBinaryBulkStateDynamic>(structure_version);
 
+    dynamic_state->max_dynamic_types = column_dynamic.getMaxDynamicTypes();
+    /// Write max_dynamic_types parameter, because it can differ from the max_dynamic_types
+    /// that is specified in the Dynamic type (we could decrease it before merge).
+    writeBinaryLittleEndian(dynamic_state->max_dynamic_types, *stream);
+
     dynamic_state->variant_type = variant_info.variant_type;
     dynamic_state->variant_names = variant_info.variant_names;
     const auto & variant_column = column_dynamic.getVariantColumn();
 
-    /// Write internal Variant type name.
+    /// Write information about variants.
+    size_t num_variants = dynamic_state->variant_names.size() - 1; /// Don't write shared variant, Dynamic column should always have it.
+    writeBinaryLittleEndian(num_variants, *stream);
     if (settings.data_types_binary_encoding)
-        encodeDataType(dynamic_state->variant_type, *stream);
+    {
+        const auto & variants = assert_cast<const DataTypeVariant &>(*dynamic_state->variant_type).getVariants();
+        for (const auto & variant: variants)
+        {
+            if (variant->getName() != ColumnDynamic::getSharedVariantTypeName())
+                encodeDataType(dynamic_state->variant_type, *stream);
+        }
+    }
     else
-        writeStringBinary(dynamic_state->variant_type->getName(), *stream);
+    {
+        for (const auto & name : dynamic_state->variant_names)
+        {
+            if (name != ColumnDynamic::getSharedVariantTypeName())
+                writeStringBinary(name, *stream);
+        }
+    }
 
     /// Write statistics in prefix if needed.
     if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX)
     {
         const auto & statistics = column_dynamic.getStatistics();
+        /// First, write statistics for usual variants.
         for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
         {
             size_t size = 0;
@@ -129,13 +156,55 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
             ///   - statistics read from the data part during deserialization of Dynamic column (Statistics::Source::READ).
             /// We can rely only on statistics calculated during the merge, because column with statistics that was read
             /// during deserialization from some data part could be filtered/limited/transformed/etc and so the statistics can be outdated.
-            if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE)
-                size = statistics.data.at(variant_info.variant_names[i]);
+            if (statistics && statistics->source == ColumnDynamic::Statistics::Source::MERGE)
+                size = statistics->variants_statistics.at(variant_info.variant_names[i]);
             /// Otherwise we can use only variant sizes from current column.
             else
                 size = variant_column.getVariantByGlobalDiscriminator(i).size();
             writeVarUInt(size, *stream);
         }
+
+        /// Second, write statistics for variants in shared variant.
+        /// Check if we have statistics calculated during merge of some data parts (Statistics::Source::MERGE).
+        if (statistics && statistics->source == ColumnDynamic::Statistics::Source::MERGE)
+        {
+            writeVarUInt(statistics->shared_variants_statistics.size(), *stream);
+            for (const auto & [variant_name, size] : statistics->shared_variants_statistics)
+            {
+                writeStringBinary(variant_name, *stream);
+                writeVarUInt(size, *stream);
+            }
+        }
+        /// If we don't have statistics for shared variants from merge, calculate it from the column.
+        else
+        {
+            std::unordered_map<String, size_t> shared_variants_statistics;
+            const auto & shared_variant = column_dynamic.getSharedVariant();
+            for (size_t i = 0; i != shared_variant.size(); ++i)
+            {
+                auto value = shared_variant.getDataAt(i);
+                ReadBufferFromMemory buf(value.data, value.size);
+                auto type = decodeDataType(buf);
+                auto type_name = type->getName();
+                if (auto it = shared_variants_statistics.find(type_name); it != shared_variants_statistics.end())
+                    ++it->second;
+                else if (shared_variants_statistics.size() < ColumnDynamic::Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
+                    shared_variants_statistics.emplace(type_name, 1);
+            }
+
+            writeVarUInt(shared_variants_statistics.size(), *stream);
+            for (const auto & [variant_name, size] : shared_variants_statistics)
+            {
+                writeStringBinary(variant_name, *stream);
+                writeVarUInt(size, *stream);
+            }
+        }
+    }
+    /// Otherwise statistics will be written in the suffix, in this case we will recalculate
+    /// statistics during serialization to make it more precise.
+    else
+    {
+        dynamic_state->recalculate_statistics = true;
     }
 
     dynamic_state->variant_serialization = dynamic_state->variant_type->getDefaultSerialization();
@@ -182,33 +251,58 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD
         UInt64 structure_version;
         readBinaryLittleEndian(structure_version, *structure_stream);
         auto structure_state = std::make_shared<DeserializeBinaryBulkStateDynamicStructure>(structure_version);
-        /// Read internal Variant type name.
+        /// Read max_dynamic_types parameter.
+        readBinaryLittleEndian(structure_state->max_dynamic_types, *structure_stream);
+        /// Read information about variants.
+        DataTypes variants;
+        size_t num_variants;
+        readBinaryLittleEndian(num_variants, *structure_stream);
+        variants.reserve(num_variants + 1); /// +1 for shared variant.
         if (settings.data_types_binary_encoding)
         {
-            structure_state->variant_type = decodeDataType(*structure_stream);
+            for (size_t i = 0; i != num_variants; ++i)
+                variants.push_back(decodeDataType(*structure_stream));
         }
         else
         {
             String data_type_name;
-            readStringBinary(data_type_name, *structure_stream);
-            structure_state->variant_type = DataTypeFactory::instance().get(data_type_name);
+            for (size_t i = 0; i != num_variants; ++i)
+            {
+                readStringBinary(data_type_name, *structure_stream);
+                variants.push_back(DataTypeFactory::instance().get(data_type_name));
+            }
         }
-        const auto * variant_type = typeid_cast<const DataTypeVariant *>(structure_state->variant_type.get());
-        if (!variant_type)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName());
+        /// Add shared variant, Dynamic column should always have it.
+        variants.push_back(ColumnDynamic::getSharedVariantDataType());
+        auto variant_type = std::make_shared<DataTypeVariant>(variants);
 
         /// Read statistics.
         if (settings.dynamic_read_statistics)
         {
-            const auto & variants = variant_type->getVariants();
+            ColumnDynamic::Statistics statistics(ColumnDynamic::Statistics::Source::READ);
+            /// First, read statistics for usual variants.
             size_t variant_size;
-            for (const auto & variant : variants)
+            for (const auto & variant : variant_type->getVariants())
             {
                 readVarUInt(variant_size, *structure_stream);
-                structure_state->statistics.data[variant->getName()] = variant_size;
+                statistics.variants_statistics[variant->getName()] = variant_size;
             }
+
+            /// Second, rend statistics for shared variants.
+            size_t statistics_size;
+            readVarUInt(statistics_size, *structure_stream);
+            String variant_name;
+            for (size_t i = 0; i != statistics_size; ++i)
+            {
+                readStringBinary(variant_name, *structure_stream);
+                readVarUInt(variant_size, *structure_stream);
+                statistics.shared_variants_statistics[variant_name] = variant_size;
+            }
+
+            structure_state->statistics = std::make_shared<const ColumnDynamic::Statistics>(std::move(statistics));
         }
 
+        structure_state->variant_type = std::move(variant_type);
         state = structure_state;
         addToSubstreamsDeserializeStatesCache(cache, settings.path, state);
     }
@@ -231,8 +325,16 @@ void SerializationDynamic::serializeBinaryBulkStateSuffix(
     /// Write statistics in suffix if needed.
     if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX)
     {
+        /// First, write statistics for usual variants.
         for (const auto & variant_name : dynamic_state->variant_names)
-            writeVarUInt(dynamic_state->statistics.data[variant_name], *stream);
+            writeVarUInt(dynamic_state->statistics.variants_statistics[variant_name], *stream);
+        /// Second, write statistics for shared variants.
+        writeVarUInt(dynamic_state->statistics.shared_variants_statistics.size(), *stream);
+        for (const auto & [variant_name, size] : dynamic_state->statistics.shared_variants_statistics)
+        {
+            writeStringBinary(variant_name, *stream);
+            writeVarUInt(size, *stream);
+        }
     }
 
     settings.path.push_back(Substream::DynamicData);
@@ -255,9 +357,42 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams(
     if (!variant_info.variant_type->equals(*dynamic_state->variant_type))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName());
 
+    if (column_dynamic.getMaxDynamicTypes() != dynamic_state->max_dynamic_types)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of max_dynamic_types parameter of Dynamic. Expected: {}, Got: {}", dynamic_state->max_dynamic_types, column_dynamic.getMaxDynamicTypes());
+
     settings.path.push_back(Substream::DynamicData);
-    assert_cast<const SerializationVariant &>(*dynamic_state->variant_serialization)
-        .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.data);
+    if (dynamic_state->recalculate_statistics)
+    {
+        assert_cast<const SerializationVariant &>(*dynamic_state->variant_serialization)
+            .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.variants_statistics);
+        /// Calculate statistics for shared variants.
+        const auto & shared_variant = column_dynamic.getSharedVariant();
+        if (!shared_variant.empty())
+        {
+            const auto & local_discriminators = variant_column->getLocalDiscriminators();
+            const auto & offsets = variant_column->getOffsets();
+            const auto shared_variant_discr = variant_column->localDiscriminatorByGlobal(column_dynamic.getSharedVariantDiscriminator());
+            size_t end = limit == 0 || offset + limit > local_discriminators.size() ? local_discriminators.size() : offset + limit;
+            for (size_t i = offset; i != end; ++i)
+            {
+                if (local_discriminators[i] == shared_variant_discr)
+                {
+                    auto value = shared_variant.getDataAt(offsets[i]);
+                    ReadBufferFromMemory buf(value.data, value.size);
+                    auto type = decodeDataType(buf);
+                    auto type_name = type->getName();
+                    if (auto it = dynamic_state->statistics.shared_variants_statistics.find(type_name); it != dynamic_state->statistics.shared_variants_statistics.end())
+                        ++it->second;
+                    else if (dynamic_state->statistics.shared_variants_statistics.size() < ColumnDynamic::Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
+                        dynamic_state->statistics.shared_variants_statistics.emplace(type_name, 1);
+                }
+            }
+        }
+    }
+    else
+    {
+        assert_cast<const SerializationVariant &>(*dynamic_state->variant_serialization).serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state);
+    }
     settings.path.pop_back();
 }
 
@@ -272,13 +407,17 @@ void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams(
         return;
 
     auto mutable_column = column->assumeMutable();
+    auto & column_dynamic = assert_cast<ColumnDynamic &>(*mutable_column);
     auto * dynamic_state = checkAndGetState<DeserializeBinaryBulkStateDynamic>(state);
     auto * structure_state = checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(dynamic_state->structure_state);
 
     if (mutable_column->empty())
-        mutable_column = ColumnDynamic::create(structure_state->variant_type->createColumn(), structure_state->variant_type, max_dynamic_types, structure_state->statistics);
+    {
+        column_dynamic.setMaxDynamicPaths(structure_state->max_dynamic_types);
+        column_dynamic.setVariantType(structure_state->variant_type);
+        column_dynamic.setStatistics(structure_state->statistics);
+    }
 
-    auto & column_dynamic = assert_cast<ColumnDynamic &>(*mutable_column);
     const auto & variant_info = column_dynamic.getVariantInfo();
     if (!variant_info.variant_type->equals(*structure_state->variant_type))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", structure_state->variant_type->getName(), variant_info.variant_type->getName());
@@ -329,24 +468,42 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu
         encodeDataType(std::make_shared<DataTypeNothing>(), ostr);
         return;
     }
+    /// Check if this value is in shared variant. In this case it's already
+    /// in desired binary format.
+    else if (global_discr == dynamic_column.getSharedVariantDiscriminator())
+    {
+        auto value = dynamic_column.getSharedVariant().getDataAt(variant_column.offsetAt(row_num));
+        ostr.write(value.data, value.size);
+        return;
+    }
 
     const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(global_discr);
+    const auto & variant_type_name = variant_info.variant_names[global_discr];
     encodeDataType(variant_type, ostr);
-    variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
+    dynamic_column.getVariantSerialization(variant_type, variant_type_name)->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
 }
 
-template <typename DeserializeFunc>
-static void deserializeVariant(
+template <typename ReturnType = void, typename DeserializeFunc>
+static ReturnType deserializeVariant(
     ColumnVariant & variant_column,
-    const DataTypePtr & variant_type,
+    const SerializationPtr & variant_serialization,
     ColumnVariant::Discriminator global_discr,
     ReadBuffer & istr,
     DeserializeFunc deserialize)
 {
     auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discr);
-    deserialize(*variant_type->getDefaultSerialization(), variant, istr);
+    if constexpr (std::is_same_v<ReturnType, bool>)
+    {
+        if (!deserialize(*variant_serialization, variant, istr))
+            return ReturnType(false);
+    }
+    else
+    {
+        deserialize(*variant_serialization, variant, istr);
+    }
     variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discr));
     variant_column.getOffsets().push_back(variant.size() - 1);
+    return ReturnType(true);
 }
 
 void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -360,11 +517,12 @@ void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr
     }
 
     auto variant_type_name = variant_type->getName();
+    const auto & variant_serialization = dynamic_column.getVariantSerialization(variant_type, variant_type_name);
     const auto & variant_info = dynamic_column.getVariantInfo();
     auto it = variant_info.variant_name_to_discriminator.find(variant_type_name);
     if (it != variant_info.variant_name_to_discriminator.end())
     {
-        deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
+        deserializeVariant(dynamic_column.getVariantColumn(), variant_serialization, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
         return;
     }
 
@@ -372,25 +530,15 @@ void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr
     if (dynamic_column.addNewVariant(variant_type))
     {
         auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name);
-        deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
+        deserializeVariant(dynamic_column.getVariantColumn(), variant_serialization, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
         return;
     }
 
     /// We reached maximum number of variants and couldn't add new variant.
-    /// This case should be really rare in real use cases.
-    /// We should always be able to add String variant and insert value as String.
-    dynamic_column.addStringVariant();
+    /// In this case we insert this value into shared variant in binary form.
     auto tmp_variant_column = variant_type->createColumn();
-    variant_type->getDefaultSerialization()->deserializeBinary(*tmp_variant_column, istr, settings);
-    auto string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
-    auto & variant_column = dynamic_column.getVariantColumn();
-    variant_column.insertIntoVariantFrom(variant_info.variant_name_to_discriminator.at("String"), *string_column, 0);
-}
-
-void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
-{
-    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
-    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextCSV(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+    variant_serialization->deserializeBinary(*tmp_variant_column, istr, settings);
+    dynamic_column.insertValueIntoSharedVariant(*tmp_variant_column, variant_type, variant_type_name, 0);
 }
 
 template <typename ReadFieldFunc, typename TryDeserializeVariantFunc, typename DeserializeVariant>
@@ -406,6 +554,7 @@ static void deserializeTextImpl(
     auto & dynamic_column = assert_cast<ColumnDynamic &>(column);
     auto & variant_column = dynamic_column.getVariantColumn();
     const auto & variant_info = dynamic_column.getVariantInfo();
+    const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
     String field = read_field(istr);
     auto field_buf = std::make_unique<ReadBufferFromString>(field);
     JSONInferenceInfo json_info;
@@ -413,27 +562,81 @@ static void deserializeTextImpl(
     if (escaping_rule == FormatSettings::EscapingRule::JSON)
         transformFinalInferredJSONTypeIfNeeded(variant_type, settings, &json_info);
 
-    if (checkIfTypeIsComplete(variant_type) && dynamic_column.addNewVariant(variant_type))
+    /// If inferred type is not complete, we cannot add it as a new variant.
+    /// Let's try to deserialize this field into existing variants.
+    /// If failed, insert this value as String.
+    if (!checkIfTypeIsComplete(variant_type))
+    {
+        size_t shared_variant_discr = dynamic_column.getSharedVariantDiscriminator();
+        for (size_t i = 0; i != variant_types.size(); ++i)
+        {
+            field_buf = std::make_unique<ReadBufferFromString>(field);
+            if (i != shared_variant_discr
+                && deserializeVariant<bool>(
+                    variant_column,
+                    dynamic_column.getVariantSerialization(variant_types[i], variant_info.variant_names[i]),
+                    i,
+                    *field_buf,
+                    try_deserialize_variant))
+                return;
+        }
+
+        variant_type = std::make_shared<DataTypeString>();
+        /// To be able to deserialize field as String with Quoted escaping rule, it should be quoted.
+        if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\''))
+            field = "'" + field + "'";
+    }
+    else if (dynamic_column.addNewVariant(variant_type, variant_type->getName()))
     {
         auto discr = variant_info.variant_name_to_discriminator.at(variant_type->getName());
-        deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, *field_buf, deserialize_variant);
+        deserializeVariant(dynamic_column.getVariantColumn(), dynamic_column.getVariantSerialization(variant_type), discr, *field_buf, deserialize_variant);
         return;
     }
 
-    /// We couldn't infer type or add new variant. Try to insert field into current variants.
+    /// We couldn't infer type or add new variant. Insert it into shared variant.
+    auto tmp_variant_column = variant_type->createColumn();
     field_buf = std::make_unique<ReadBufferFromString>(field);
-    if (try_deserialize_variant(*variant_info.variant_type->getDefaultSerialization(), variant_column, *field_buf))
-        return;
+    auto variant_type_name = variant_type->getName();
+    deserialize_variant(*dynamic_column.getVariantSerialization(variant_type, variant_type_name), *tmp_variant_column, *field_buf);
+    dynamic_column.insertValueIntoSharedVariant(*tmp_variant_column, variant_type, variant_type_name, 0);
+}
 
-    /// We couldn't insert field into any existing variant, add String variant and read value as String.
-    dynamic_column.addStringVariant();
+template <typename NestedSerialize>
+static void serializeTextImpl(
+    const IColumn & column,
+    size_t row_num,
+    WriteBuffer & ostr,
+    const FormatSettings & settings,
+    NestedSerialize nested_serialize)
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    const auto & variant_column = dynamic_column.getVariantColumn();
+    /// Check if this row has value in shared variant. In this case we should first deserialize it from binary format.
+    if (variant_column.globalDiscriminatorAt(row_num) == dynamic_column.getSharedVariantDiscriminator())
+    {
+        auto value = dynamic_column.getSharedVariant().getDataAt(variant_column.offsetAt(row_num));
+        ReadBufferFromMemory buf(value.data, value.size);
+        auto variant_type = decodeDataType(buf);
+        auto tmp_variant_column = variant_type->createColumn();
+        auto variant_serialization = dynamic_column.getVariantSerialization(variant_type);
+        variant_serialization->deserializeBinary(*tmp_variant_column, buf, settings);
+        nested_serialize(*variant_serialization, *tmp_variant_column, 0, ostr);
+    }
+    /// Otherwise just use serialization for Variant.
+    else
+    {
+        nested_serialize(*dynamic_column.getVariantInfo().variant_type->getDefaultSerialization(), variant_column, row_num, ostr);
+    }
+}
 
-    if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\''))
-        field = "'" + field + "'";
+void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
+    {
+        serialization.serializeTextCSV(col, row, buf, settings);
+    };
 
-    field_buf = std::make_unique<ReadBufferFromString>(field);
-    auto string_discr = variant_info.variant_name_to_discriminator.at("String");
-    deserializeVariant(dynamic_column.getVariantColumn(), std::make_shared<DataTypeString>(), string_discr, *field_buf, deserialize_variant);
+    serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
 }
 
 void SerializationDynamic::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -466,8 +669,12 @@ bool SerializationDynamic::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadB
 
 void SerializationDynamic::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
-    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextEscaped(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+    auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
+    {
+        serialization.serializeTextEscaped(col, row, buf, settings);
+    };
+
+    serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
 }
 
 void SerializationDynamic::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -500,8 +707,12 @@ bool SerializationDynamic::tryDeserializeTextEscaped(DB::IColumn & column, DB::R
 
 void SerializationDynamic::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
-    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextQuoted(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+    auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
+    {
+        serialization.serializeTextQuoted(col, row, buf, settings);
+    };
+
+    serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
 }
 
 void SerializationDynamic::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -534,8 +745,12 @@ bool SerializationDynamic::tryDeserializeTextQuoted(DB::IColumn & column, DB::Re
 
 void SerializationDynamic::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
-    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextJSON(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+    auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
+    {
+        serialization.serializeTextJSON(col, row, buf, settings);
+    };
+
+    serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
 }
 
 void SerializationDynamic::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -568,8 +783,12 @@ bool SerializationDynamic::tryDeserializeTextJSON(DB::IColumn & column, DB::Read
 
 void SerializationDynamic::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
-    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextRaw(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+    auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
+    {
+        serialization.serializeTextRaw(col, row, buf, settings);
+    };
+
+    serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
 }
 
 void SerializationDynamic::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -602,8 +821,12 @@ bool SerializationDynamic::tryDeserializeTextRaw(DB::IColumn & column, DB::ReadB
 
 void SerializationDynamic::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
-    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeText(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+    auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
+    {
+        serialization.serializeText(col, row, buf, settings);
+    };
+
+    serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
 }
 
 void SerializationDynamic::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
@@ -636,8 +859,12 @@ bool SerializationDynamic::tryDeserializeWholeText(DB::IColumn & column, DB::Rea
 
 void SerializationDynamic::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
 {
-    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
-    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextXML(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+    auto nested_serialize = [&settings](const ISerialization & serialization, const IColumn & col, size_t row, WriteBuffer & buf)
+    {
+        serialization.serializeTextXML(col, row, buf, settings);
+    };
+
+    serializeTextImpl(column, row_num, ostr, settings, nested_serialize);
 }
 
 }
diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h
index 001a3cf87ce..3dbf311fb6c 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.h
+++ b/src/DataTypes/Serializations/SerializationDynamic.h
@@ -105,9 +105,13 @@ private:
     {
         DynamicStructureSerializationVersion structure_version;
         DataTypePtr variant_type;
-        ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ, .data = {}};
+        size_t max_dynamic_types;
+        ColumnDynamic::StatisticsPtr statistics;
 
-        explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {}
+        explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_)
+            : structure_version(structure_version_)
+        {
+        }
     };
 
     size_t max_dynamic_types;
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
index 211f0ac9377..cffca14bca5 100644
--- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
@@ -4,7 +4,10 @@
 #include <DataTypes/Serializations/SerializationDynamic.h>
 #include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypesBinaryEncoding.h>
 #include <Columns/ColumnDynamic.h>
+#include <Columns/ColumnLowCardinality.h>
+#include <Formats/FormatSettings.h>
 #include <IO/ReadHelpers.h>
 
 namespace DB
@@ -21,6 +24,8 @@ struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::Deseria
     ISerialization::DeserializeBinaryBulkStatePtr structure_state;
     SerializationPtr variant_serialization;
     ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
+    bool read_from_shared_variant;
+    ColumnPtr shared_variant;
 };
 
 void SerializationDynamicElement::enumerateStreams(
@@ -73,9 +78,10 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
 
     auto dynamic_element_state = std::make_shared<DeserializeBinaryBulkStateDynamicElement>();
     dynamic_element_state->structure_state = std::move(structure_state);
-    const auto & variant_type = checkAndGetState<SerializationDynamic::DeserializeBinaryBulkStateDynamicStructure>(dynamic_element_state->structure_state)->variant_type;
+    const auto & variant_type = assert_cast<const DataTypeVariant &>(
+        *checkAndGetState<SerializationDynamic::DeserializeBinaryBulkStateDynamicStructure>(dynamic_element_state->structure_state)->variant_type);
     /// Check if we actually have required element in the Variant.
-    if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
+    if (auto global_discr = variant_type.tryGetVariantDiscriminator(dynamic_element_name))
     {
         settings.path.push_back(Substream::DynamicData);
         if (is_null_map_subcolumn)
@@ -83,6 +89,21 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
         else
             dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
         dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
+        dynamic_element_state->read_from_shared_variant = false;
+        settings.path.pop_back();
+    }
+    /// If we don't have this element in the Variant, we will read shared variant and try to find it there.
+    else
+    {
+        auto shared_variant_global_discr = variant_type.tryGetVariantDiscriminator(ColumnDynamic::getSharedVariantTypeName());
+        chassert(shared_variant_global_discr.has_value());
+        settings.path.push_back(Substream::DynamicData);
+        dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(
+            ColumnDynamic::getSharedVariantDataType()->getDefaultSerialization(),
+            ColumnDynamic::getSharedVariantTypeName(),
+            *shared_variant_global_discr);
+        dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
+        dynamic_element_state->read_from_shared_variant = true;
         settings.path.pop_back();
     }
 
@@ -115,23 +136,103 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
 
     auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
 
-    if (dynamic_element_state->variant_serialization)
+    /// Check if this subcolumn should not be read from shared variant.
+    /// In this case just read data from the corresponding variant.
+    if (!dynamic_element_state->read_from_shared_variant)
     {
         settings.path.push_back(Substream::DynamicData);
-        dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
+        dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(
+            result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
         settings.path.pop_back();
     }
-    else if (is_null_map_subcolumn)
-    {
-        auto mutable_column = result_column->assumeMutable();
-        auto & data = assert_cast<ColumnUInt8 &>(*mutable_column).getData();
-        data.resize_fill(data.size() + limit, 1);
-    }
+    /// Otherwise, read the shared variant column and extract requested type from it.
     else
     {
-        auto mutable_column = result_column->assumeMutable();
-        mutable_column->insertManyDefaults(limit);
-        result_column = std::move(mutable_column);
+        settings.path.push_back(Substream::DynamicData);
+        /// Initialize shared_variant column if needed.
+        if (result_column->empty())
+            dynamic_element_state->shared_variant = makeNullable(ColumnDynamic::getSharedVariantDataType()->createColumn());
+        size_t prev_size = result_column->size();
+        dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(
+            dynamic_element_state->shared_variant, limit, settings, dynamic_element_state->variant_element_state, cache);
+        settings.path.pop_back();
+
+        /// If we need to read a subcolumn from variant column, create an empty variant column, fill it and extract subcolumn.
+        auto variant_type = DataTypeFactory::instance().get(dynamic_element_name);
+        auto result_type = makeNullableOrLowCardinalityNullableSafe(variant_type);
+        MutableColumnPtr variant_column = nested_subcolumn.empty() || is_null_map_subcolumn ? result_column->assumeMutable() : result_type->createColumn();
+        variant_column->reserve(variant_column->size() + limit);
+        MutableColumnPtr non_nullable_variant_column = variant_column->assumeMutable();
+        NullMap * null_map = nullptr;
+        bool is_low_cardinality_nullable = isColumnLowCardinalityNullable(*variant_column);
+        /// Resulting subolumn can be Nullable, but value is serialized in shared variant as non-Nullable.
+        /// Extract non-nullable column and remember the null map to fill it during deserialization.
+        if (isColumnNullable(*variant_column))
+        {
+            auto & nullable_variant_column = assert_cast<ColumnNullable &>(*variant_column);
+            non_nullable_variant_column = nullable_variant_column.getNestedColumnPtr()->assumeMutable();
+            null_map = &nullable_variant_column.getNullMapData();
+        }
+        else if (is_null_map_subcolumn)
+        {
+            null_map = &assert_cast<ColumnUInt8 &>(*variant_column).getData();
+        }
+
+        auto variant_serialization = variant_type->getDefaultSerialization();
+
+        const auto & nullable_shared_variant = assert_cast<const ColumnNullable &>(*dynamic_element_state->shared_variant);
+        const auto & shared_null_map = nullable_shared_variant.getNullMapData();
+        const auto & shared_variant = assert_cast<const ColumnString &>(nullable_shared_variant.getNestedColumn());
+        const FormatSettings format_settings;
+        for (size_t i = prev_size; i != shared_variant.size(); ++i)
+        {
+            if (!shared_null_map[i])
+            {
+                auto value = shared_variant.getDataAt(i);
+                ReadBufferFromMemory buf(value.data, value.size);
+                auto type = decodeDataType(buf);
+                if (type->getName() == dynamic_element_name)
+                {
+                    /// When requested type is LowCardinality the subcolumn type name will be LowCardinality(Nullable).
+                    /// Value in shared variant is serialized as LowCardinality and we cannot simply deserialize it
+                    /// inside LowCardinality(Nullable) column (it will try to deserialize null bit). In this case we
+                    /// have to create temporary LowCardinality column, deserialize value into it and insert it into
+                    /// resulting LowCardinality(Nullable) (insertion from LowCardinality column to LowCardinality(Nullable)
+                    /// column is allowed).
+                    if (is_low_cardinality_nullable)
+                    {
+                        auto tmp_column = variant_type->createColumn();
+                        variant_serialization->deserializeBinary(*tmp_column, buf, format_settings);
+                        non_nullable_variant_column->insertFrom(*tmp_column, 0);
+                    }
+                    else if (is_null_map_subcolumn)
+                    {
+                        null_map->push_back(0);
+                    }
+                    else
+                    {
+                        variant_serialization->deserializeBinary(*non_nullable_variant_column, buf, format_settings);
+                        if (null_map)
+                            null_map->push_back(0);
+                    }
+                }
+                else
+                {
+                    variant_column->insertDefault();
+                }
+            }
+            else
+            {
+                variant_column->insertDefault();
+            }
+        }
+
+        /// Extract nested subcolumn if needed.
+        if (!nested_subcolumn.empty() && !is_null_map_subcolumn)
+        {
+            auto subcolumn = result_type->getSubcolumn(nested_subcolumn, variant_column->getPtr());
+            result_column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
+        }
     }
 }
 
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h
index 127d14a55e0..c674cf479ae 100644
--- a/src/DataTypes/Serializations/SerializationDynamicElement.h
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.h
@@ -13,11 +13,15 @@ private:
     /// To be able to deserialize Dynamic element as a subcolumn
     /// we need its type name and global discriminator.
     String dynamic_element_name;
+    /// Nested subcolumn of a type dynamic type. For example, for `Tuple(a UInt32)`.a
+    /// subcolumn dynamic_element_name = 'Tuple(a UInt32)' and nested_subcolumn = 'a'.
+    /// Needed to extract nested subcolumn from values in shared variant.
+    String nested_subcolumn;
     bool is_null_map_subcolumn;
 
 public:
-    SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, bool is_null_map_subcolumn_ = false)
-        : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), is_null_map_subcolumn(is_null_map_subcolumn_)
+    SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_, const String & nested_subcolumn_, bool is_null_map_subcolumn_ = false)
+        : SerializationWrapper(nested_), dynamic_element_name(dynamic_element_name_), nested_subcolumn(nested_subcolumn_), is_null_map_subcolumn(is_null_map_subcolumn_)
     {
     }
 
diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp
index 03b5d9584e0..36dc85f60ee 100644
--- a/src/DataTypes/Serializations/SerializationVariantElement.cpp
+++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp
@@ -305,8 +305,10 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
     const String & variant_element_name_,
     ColumnVariant::Discriminator global_variant_discriminator_,
     ColumnVariant::Discriminator local_variant_discriminator_,
-    bool make_nullable_)
+    bool make_nullable_,
+    const ColumnPtr & null_map_)
     : local_discriminators(local_discriminators_)
+    , null_map(null_map_)
     , variant_element_name(variant_element_name_)
     , global_variant_discriminator(global_variant_discriminator_)
     , local_variant_discriminator(local_variant_discriminator_)
@@ -314,12 +316,13 @@ SerializationVariantElement::VariantSubcolumnCreator::VariantSubcolumnCreator(
 {
 }
 
-DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::DataTypePtr & prev) const
+
+DataTypePtr SerializationVariantElement::VariantSubcolumnCreator::create(const DataTypePtr & prev) const
 {
     return make_nullable ? makeNullableOrLowCardinalityNullableSafe(prev) : prev;
 }
 
-SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB::SerializationPtr & prev) const
+SerializationPtr SerializationVariantElement::VariantSubcolumnCreator::create(const SerializationPtr & prev) const
 {
     return std::make_shared<SerializationVariantElement>(prev, variant_element_name, global_variant_discriminator);
 }
@@ -339,12 +342,16 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
         return res;
     }
 
-    /// In general case we should iterate through discriminators and create null-map for our variant.
-    NullMap null_map;
-    null_map.reserve(local_discriminators->size());
-    const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
-    for (auto local_discr : local_discriminators_data)
-        null_map.push_back(local_discr != local_variant_discriminator);
+    /// In general case we should iterate through discriminators and create null-map for our variant if we don't already have it.
+    std::optional<NullMap> null_map_from_discriminators;
+    if (!null_map)
+    {
+        null_map_from_discriminators = NullMap();
+        null_map_from_discriminators->reserve(local_discriminators->size());
+        const auto & local_discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*local_discriminators).getData();
+        for (auto local_discr : local_discriminators_data)
+            null_map_from_discriminators->push_back(local_discr != local_variant_discriminator);
+    }
 
     /// Now we can create new column from null-map and variant column using IColumn::expand.
     auto res_column = IColumn::mutate(prev);
@@ -356,13 +363,21 @@ ColumnPtr SerializationVariantElement::VariantSubcolumnCreator::create(const DB:
     if (make_nullable && prev->lowCardinality())
         res_column = assert_cast<ColumnLowCardinality &>(*res_column).cloneNullable();
 
-    res_column->expand(null_map, /*inverted = */ true);
+    if (null_map_from_discriminators)
+        res_column->expand(*null_map_from_discriminators, /*inverted = */ true);
+    else
+        res_column->expand(assert_cast<const ColumnUInt8 &>(*null_map).getData(), /*inverted = */ true);
 
     if (make_nullable && prev->canBeInsideNullable())
     {
-        auto null_map_col = ColumnUInt8::create();
-        null_map_col->getData() = std::move(null_map);
-        return ColumnNullable::create(std::move(res_column), std::move(null_map_col));
+        if (null_map_from_discriminators)
+        {
+            auto null_map_col = ColumnUInt8::create();
+            null_map_col->getData() = std::move(*null_map_from_discriminators);
+            return ColumnNullable::create(std::move(res_column), std::move(null_map_col));
+        }
+
+        return ColumnNullable::create(std::move(res_column), null_map->assumeMutable());
     }
 
     return res_column;
diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h
index 69101aea0f5..64f86eb2190 100644
--- a/src/DataTypes/Serializations/SerializationVariantElement.h
+++ b/src/DataTypes/Serializations/SerializationVariantElement.h
@@ -63,18 +63,22 @@ public:
 
     struct VariantSubcolumnCreator : public ISubcolumnCreator
     {
+    private:
         const ColumnPtr local_discriminators;
+        const ColumnPtr null_map; /// optional
         const String variant_element_name;
         const ColumnVariant::Discriminator global_variant_discriminator;
         const ColumnVariant::Discriminator local_variant_discriminator;
         bool make_nullable;
 
+    public:
         VariantSubcolumnCreator(
             const ColumnPtr & local_discriminators_,
             const String & variant_element_name_,
             ColumnVariant::Discriminator global_variant_discriminator_,
             ColumnVariant::Discriminator local_variant_discriminator_,
-            bool make_nullable_);
+            bool make_nullable_,
+            const ColumnPtr & null_map_ = nullptr);
 
         DataTypePtr create(const DataTypePtr & prev) const override;
         ColumnPtr create(const ColumnPtr & prev) const override;
diff --git a/src/Formats/JSONExtractTree.cpp b/src/Formats/JSONExtractTree.cpp
index 242d2dc9f80..86fde3852b8 100644
--- a/src/Formats/JSONExtractTree.cpp
+++ b/src/Formats/JSONExtractTree.cpp
@@ -1362,13 +1362,14 @@ public:
         }
 
         auto & variant_column = column_dynamic.getVariantColumn();
-        auto variant_info = column_dynamic.getVariantInfo();
+        const auto & variant_info = column_dynamic.getVariantInfo();
         /// Second, infer ClickHouse type for this element and add it as a new variant.
         auto element_type = elementToDataType(element, format_settings);
-        if (column_dynamic.addNewVariant(element_type))
+        auto element_type_name = element_type->getName();
+        if (column_dynamic.addNewVariant(element_type, element_type_name))
         {
             auto node = buildJSONExtractTree<JSONParser>(element_type, "Dynamic inference");
-            auto global_discriminator = variant_info.variant_name_to_discriminator[element_type->getName()];
+            auto global_discriminator = variant_info.variant_name_to_discriminator.at(element_type_name);
             auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discriminator);
             if (!node->insertResultToColumn(variant, element, insert_settings, format_settings, error))
                 return false;
@@ -1377,29 +1378,15 @@ public:
             return true;
         }
 
-        /// We couldn't add new variant. Try to insert element into current variants.
-        auto variant_node = buildJSONExtractTree<JSONParser>(variant_info.variant_type, "Dynamic inference");
-        if (variant_node->insertResultToColumn(variant_column, element, insert_settings, format_settings, error))
-            return true;
-
-        /// We couldn't insert element into any existing variant, add String variant and read value as String.
-        column_dynamic.addStringVariant();
-        auto string_global_discriminator = variant_info.variant_name_to_discriminator["String"];
-        auto & string_column = variant_column.getVariantByGlobalDiscriminator(string_global_discriminator);
-        if (!getStringNode()->insertResultToColumn(string_column, element, insert_settings, format_settings, error))
+        /// We couldn't add this variant, insert it into shared variant.
+        auto tmp_variant_column = element_type->createColumn();
+        auto node = buildJSONExtractTree<JSONParser>(element_type, "Dynamic inference");
+        if (!node->insertResultToColumn(*tmp_variant_column, element, insert_settings, format_settings, error))
             return false;
-        variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(string_global_discriminator));
-        variant_column.getOffsets().push_back(string_column.size() - 1);
+        column_dynamic.insertValueIntoSharedVariant(*tmp_variant_column, element_type, element_type_name, 0);
         return true;
     }
 
-    static const std::unique_ptr<JSONExtractTreeNode<JSONParser>> & getStringNode()
-    {
-        static const std::unique_ptr<JSONExtractTreeNode<JSONParser>> string_node
-            = buildJSONExtractTree<JSONParser>(std::make_shared<DataTypeString>(), "Dynamic inference");
-        return string_node;
-    }
-
     static DataTypePtr elementToDataType(const typename JSONParser::Element & element, const FormatSettings & format_settings)
     {
         JSONInferenceInfo json_inference_info;
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 675283d011e..21b98cf505c 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -43,6 +43,7 @@
 #include <DataTypes/DataTypeDynamic.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypesBinaryEncoding.h>
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/Serializations/SerializationDecimal.h>
 #include <Formats/FormatSettings.h>
@@ -4287,13 +4288,98 @@ private:
     WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const
     {
         return [this]
-               (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
+               (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
         {
+            /// When casting Dynamic to regular column we should cast all variants from current Dynamic column
+            /// and construct the result based on discriminators.
             const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments.front().column.get());
+            const auto & variant_column = column_dynamic.getVariantColumn();
             const auto & variant_info = column_dynamic.getVariantInfo();
-            auto variant_wrapper = createVariantToColumnWrapper(assert_cast<const DataTypeVariant &>(*variant_info.variant_type), result_type);
-            ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")};
-            return variant_wrapper(args, result_type, col_nullable, input_rows_count);
+
+            /// First, cast usual variants to result type.
+            const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
+            std::vector<ColumnPtr> casted_variant_columns;
+            casted_variant_columns.reserve(variant_types.size());
+            for (size_t i = 0; i != variant_types.size(); ++i)
+            {
+                const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i);
+                ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}};
+                auto variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type);
+                casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
+            }
+
+            /// Second, collect all variants stored in shared variant and cast them to result type.
+            std::vector<MutableColumnPtr> variant_columns_from_shared_variant;
+            DataTypes variant_types_from_shared_variant;
+            /// We will need to know what variant to use when we see discriminator of a shared variant.
+            /// To do it, we remember what variant was extracted from each row and what was it's offset.
+            PaddedPODArray<UInt64> shared_variant_indexes;
+            PaddedPODArray<UInt64> shared_variant_offsets;
+            std::unordered_map<String, UInt64> shared_variant_to_index;
+            const auto & shared_variant = column_dynamic.getSharedVariant();
+            const auto shared_variant_discr = column_dynamic.getSharedVariantDiscriminator();
+            const auto & local_discriminators = variant_column.getLocalDiscriminators();
+            const auto & offsets = variant_column.getOffsets();
+            if (!shared_variant.empty())
+            {
+                shared_variant_indexes.reserve(input_rows_count);
+                shared_variant_offsets.reserve(input_rows_count);
+                FormatSettings format_settings;
+                const auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(shared_variant_discr);
+                for (size_t i = 0; i != input_rows_count; ++i)
+                {
+                    if (local_discriminators[i] == shared_variant_local_discr)
+                    {
+                        auto value = shared_variant.getDataAt(offsets[i]);
+                        ReadBufferFromMemory buf(value.data, value.size);
+                        auto type = decodeDataType(buf);
+                        auto type_name = type->getName();
+                        auto it = shared_variant_to_index.find(type_name);
+                        /// Check if didn't created column for this variant yet.
+                        if (it == shared_variant_to_index.end())
+                        {
+                            it = shared_variant_to_index.emplace(type_name, variant_columns_from_shared_variant.size()).first;
+                            variant_columns_from_shared_variant.push_back(type->createColumn());
+                            variant_types_from_shared_variant.push_back(type);
+                        }
+
+                        shared_variant_indexes.push_back(it->second);
+                        shared_variant_offsets.push_back(variant_columns_from_shared_variant[it->second]->size());
+                        type->getDefaultSerialization()->deserializeBinary(*variant_columns_from_shared_variant[it->second], buf, format_settings);
+                    }
+                    else
+                    {
+                        shared_variant_indexes.emplace_back();
+                        shared_variant_offsets.emplace_back();
+                    }
+                }
+            }
+
+            /// Cast all extracted variants into result type.
+            std::vector<ColumnPtr> casted_shared_variant_columns;
+            casted_shared_variant_columns.reserve(variant_types_from_shared_variant.size());
+            for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i)
+            {
+                ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}};
+                auto variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type);
+                casted_shared_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size()));
+            }
+
+            /// Construct result column from all casted variants.
+            auto res = result_type->createColumn();
+            res->reserve(input_rows_count);
+            for (size_t i = 0; i != input_rows_count; ++i)
+            {
+                auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
+                if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
+                    res->insertDefault();
+                else if (global_discr == shared_variant_discr)
+                    res->insertFrom(*casted_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]);
+                else
+                    res->insertFrom(*casted_variant_columns[global_discr], offsets[i]);
+            }
+
+            return res;
         };
     }
 
@@ -4320,200 +4406,51 @@ private:
         };
     }
 
-    std::pair<ColumnPtr, DataTypePtr> getReducedVariant(
-        const ColumnVariant & variant_column,
-        const DataTypePtr & variant_type,
-        const std::unordered_map<String, ColumnVariant::Discriminator> & variant_name_to_discriminator,
-        size_t max_result_num_variants,
-        const ColumnDynamic::Statistics & statistics = {}) const
+    WrapperType createVariantToDynamicWrapper(const DataTypeVariant & from_variant_type, const DataTypeDynamic & dynamic_type) const
     {
-        const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_type).getVariants();
-        /// First check if we don't exceed the limit in current Variant column.
-        if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String")))
-            return {variant_column.getPtr(), variant_type};
-
-        /// We want to keep the most frequent variants and convert to string the rarest.
-        std::vector<std::pair<size_t, ColumnVariant::Discriminator>> variant_sizes;
-        variant_sizes.reserve(variant_types.size());
-        std::optional<ColumnVariant::Discriminator> old_string_discriminator;
-        /// List of variants that should be converted to a single String variant.
-        std::vector<ColumnVariant::Discriminator> variants_to_convert_to_string;
-        for (size_t i = 0; i != variant_types.size(); ++i)
+        /// First create extended Variant with shared variant type and cast this Variant to it.
+        auto variants_for_dynamic = from_variant_type.getVariants();
+        size_t number_of_variants = variants_for_dynamic.size();
+        variants_for_dynamic.push_back(ColumnDynamic::getSharedVariantDataType());
+        const auto & variant_type_for_dynamic = std::make_shared<DataTypeVariant>(variants_for_dynamic);
+        auto old_to_new_variant_wrapper = createVariantToVariantWrapper(from_variant_type, *variant_type_for_dynamic);
+        auto max_dynamic_types = dynamic_type.getMaxDynamicTypes();
+        return [old_to_new_variant_wrapper, variant_type_for_dynamic, number_of_variants, max_dynamic_types]
+               (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
         {
-            /// String variant won't be removed.
-            String variant_name = variant_types[i]->getName();
+            auto variant_column_for_dynamic = old_to_new_variant_wrapper(arguments, result_type, col_nullable, input_rows_count);
+            /// If resulting Dynamic column can contain all variants from this Variant column, just create Dynamic column from it.
+            if (max_dynamic_types >= number_of_variants)
+                return ColumnDynamic::create(variant_column_for_dynamic, variant_type_for_dynamic, max_dynamic_types, max_dynamic_types);
 
-            if (variant_name == "String")
-            {
-                old_string_discriminator = i;
-                /// For simplicity, add this variant to the list that will be converted to string,
-                /// so we will process it with other variants when constructing the new String variant.
-                variants_to_convert_to_string.push_back(i);
-            }
-            else
-            {
-                size_t size = 0;
-                if (statistics.data.empty())
-                    size = variant_column.getVariantByGlobalDiscriminator(i).size();
-                else
-                    size = statistics.data.at(variant_name);
-                variant_sizes.emplace_back(size, i);
-            }
-        }
-
-        /// Sort variants by sizes, so we will keep the most frequent.
-        std::sort(variant_sizes.begin(), variant_sizes.end(), std::greater());
-
-        DataTypes remaining_variants;
-        remaining_variants.reserve(max_result_num_variants);
-        /// Add String variant in advance.
-        remaining_variants.push_back(std::make_shared<DataTypeString>());
-        for (auto [_, discr] : variant_sizes)
-        {
-            if (remaining_variants.size() != max_result_num_variants)
-                remaining_variants.push_back(variant_types[discr]);
-            else
-                variants_to_convert_to_string.push_back(discr);
-        }
-
-        auto reduced_variant = std::make_shared<DataTypeVariant>(remaining_variants);
-        const auto & new_variants = reduced_variant->getVariants();
-        /// To construct reduced variant column we will need mapping from old to new discriminators.
-        std::vector<ColumnVariant::Discriminator> old_to_new_discriminators_mapping;
-        old_to_new_discriminators_mapping.resize(variant_types.size());
-        ColumnVariant::Discriminator string_variant_discriminator = 0;
-        for (size_t i = 0; i != new_variants.size(); ++i)
-        {
-            String variant_name = new_variants[i]->getName();
-            if (variant_name == "String")
-            {
-                string_variant_discriminator = i;
-                for (auto discr : variants_to_convert_to_string)
-                    old_to_new_discriminators_mapping[discr] = i;
-            }
-            else
-            {
-                auto old_discr = variant_name_to_discriminator.at(variant_name);
-                old_to_new_discriminators_mapping[old_discr] = i;
-            }
-        }
-
-        /// Convert all reduced variants to String.
-        std::unordered_map<ColumnVariant::Discriminator, ColumnPtr> variants_converted_to_string;
-        variants_converted_to_string.reserve(variants_to_convert_to_string.size());
-        size_t string_variant_size = 0;
-        for (auto discr : variants_to_convert_to_string)
-        {
-            auto string_type = std::make_shared<DataTypeString>();
-            auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type);
-            auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], "");
-            ColumnsWithTypeAndName args = {column_to_convert};
-            auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size());
-            string_variant_size += variant_string_column->size();
-            variants_converted_to_string[discr] = variant_string_column;
-        }
-
-        /// Create new discriminators and offsets and fill new String variant according to old discriminators.
-        auto string_variant = ColumnString::create();
-        string_variant->reserve(string_variant_size);
-        auto new_discriminators_column = variant_column.getLocalDiscriminatorsPtr()->cloneEmpty();
-        auto & new_discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators &>(*new_discriminators_column).getData();
-        new_discriminators_data.reserve(variant_column.size());
-        auto new_offsets = variant_column.getOffsetsPtr()->cloneEmpty();
-        auto & new_offsets_data = assert_cast<ColumnVariant::ColumnOffsets &>(*new_offsets).getData();
-        new_offsets_data.reserve(variant_column.size());
-        const auto & old_local_discriminators = variant_column.getLocalDiscriminators();
-        const auto & old_offsets = variant_column.getOffsets();
-        for (size_t i = 0; i != old_local_discriminators.size(); ++i)
-        {
-            auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]);
-
-            if (old_discr == ColumnVariant::NULL_DISCRIMINATOR)
-            {
-                new_discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR);
-                new_offsets_data.push_back(0);
-                continue;
-            }
-
-            auto new_discr = old_to_new_discriminators_mapping[old_discr];
-            new_discriminators_data.push_back(new_discr);
-            if (new_discr != string_variant_discriminator)
-            {
-                new_offsets_data.push_back(old_offsets[i]);
-            }
-            else
-            {
-                new_offsets_data.push_back(string_variant->size());
-                string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]);
-            }
-        }
-
-        /// Create new list of variant columns.
-        Columns new_variant_columns;
-        new_variant_columns.resize(new_variants.size());
-        for (size_t i = 0; i != variant_types.size(); ++i)
-        {
-            auto new_discr = old_to_new_discriminators_mapping[i];
-            if (new_discr != string_variant_discriminator)
-                new_variant_columns[new_discr] = variant_column.getVariantPtrByGlobalDiscriminator(i);
-        }
-        new_variant_columns[string_variant_discriminator] = std::move(string_variant);
-        return {ColumnVariant::create(std::move(new_discriminators_column), std::move(new_offsets), new_variant_columns), reduced_variant};
-    }
-
-    WrapperType createVariantToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const
-    {
-        const auto & from_variant_type = assert_cast<const DataTypeVariant &>(*from_type);
-        size_t max_dynamic_types = dynamic_type.getMaxDynamicTypes();
-        const auto & variants = from_variant_type.getVariants();
-        std::unordered_map<String, ColumnVariant::Discriminator> variant_name_to_discriminator;
-        variant_name_to_discriminator.reserve(variants.size());
-        for (size_t i = 0; i != variants.size(); ++i)
-            variant_name_to_discriminator[variants[i]->getName()] = i;
-
-        return [from_type, max_dynamic_types, variant_name_to_discriminator, this]
-               (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
-        {
-            const auto & variant_column = assert_cast<const ColumnVariant &>(*arguments.front().column);
-            auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(variant_column, from_type, variant_name_to_discriminator, max_dynamic_types);
-            return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, max_dynamic_types);
+            /// Otherwise some variants should go to the shared variant. Create temporary Dynamic column from this Variant and insert
+            /// all data to the resulting Dynamic column, this insertion will do all the logic with shared variant.
+            auto tmp_dynamic_column = ColumnDynamic::create(variant_column_for_dynamic, variant_type_for_dynamic, number_of_variants, number_of_variants);
+            auto result_dynamic_column = ColumnDynamic::create(max_dynamic_types);
+            result_dynamic_column->insertRangeFrom(*tmp_dynamic_column, 0, tmp_dynamic_column->size());
+            return result_dynamic_column;
         };
     }
 
     WrapperType createColumnToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const
     {
         if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(from_type.get()))
-            return createVariantToDynamicWrapper(from_type, dynamic_type);
-
-        if (dynamic_type.getMaxDynamicTypes() == 1)
-        {
-            DataTypePtr string_type = std::make_shared<DataTypeString>();
-            if (from_type->isNullable())
-                string_type = makeNullable(string_type);
-            auto string_wrapper = prepareUnpackDictionaries(from_type, string_type);
-            auto variant_type = std::make_shared<DataTypeVariant>(DataTypes{removeNullable(string_type)});
-            auto variant_wrapper = createColumnToVariantWrapper(string_type, *variant_type);
-            return [string_wrapper, variant_wrapper, string_type, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()]
-                   (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
-            {
-                auto string_column = string_wrapper(arguments, string_type, col_nullable, input_rows_count);
-                auto column = ColumnWithTypeAndName(string_column, string_type, "");
-                ColumnsWithTypeAndName args = {column};
-                auto variant_column = variant_wrapper(args, variant_type, nullptr, string_column->size());
-                return ColumnDynamic::create(variant_column, variant_type, max_dynamic_types);
-            };
-        }
+            return createVariantToDynamicWrapper(*variant_type, dynamic_type);
 
         if (context && context->getSettingsRef().cast_string_to_dynamic_use_inference && isStringOrFixedString(removeNullable(removeLowCardinality(from_type))))
             return createStringToDynamicThroughParsingWrapper();
 
+        /// First, cast column to Variant with 2 variants - the type of the column we cast and shared variant type.
         auto variant_type = std::make_shared<DataTypeVariant>(DataTypes{removeNullableOrLowCardinalityNullable(from_type)});
-        auto variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type);
-        return [variant_wrapper, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()]
-               (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
+        auto column_to_variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type);
+        /// Second, cast this Variant to Dynamic.
+        auto variant_to_dynamic_wrapper = createVariantToDynamicWrapper(*variant_type, dynamic_type);
+        return [column_to_variant_wrapper, variant_to_dynamic_wrapper, variant_type]
+               (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
         {
-            auto variant_res = variant_wrapper(arguments, variant_type, col_nullable, input_rows_count);
-            return ColumnDynamic::create(variant_res, variant_type, max_dynamic_types);
+            auto variant_res = column_to_variant_wrapper(arguments, variant_type, col_nullable, input_rows_count);
+            ColumnsWithTypeAndName args = {{variant_res, variant_type, ""}};
+            return variant_to_dynamic_wrapper(args, result_type, nullptr, input_rows_count);
         };
     }
 
@@ -4530,21 +4467,26 @@ private:
                    (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
             {
                 const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
-                return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), to_max_types);
+                /// We should use the same limit as already used in column and change only global limit.
+                /// It's needed because shared variant should contain values only when limit is exceeded,
+                /// so if there are already some data, we cannot increase the limit.
+                return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types);
             };
         }
 
-        return [to_max_types, this]
+        return [to_max_types]
                (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
         {
             const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
-            auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(
-                column_dynamic.getVariantColumn(),
-                column_dynamic.getVariantInfo().variant_type,
-                column_dynamic.getVariantInfo().variant_name_to_discriminator,
-                to_max_types,
-                column_dynamic.getStatistics());
-            return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, to_max_types);
+            /// If real limit in the column is not greater than desired, just use the same variant column.
+            if (column_dynamic.getMaxDynamicTypes() <= to_max_types)
+                return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types);
+
+            /// Otherwise some variants should go to the shared variant. In this case we can just insert all
+            /// the data into resulting column and it will do all the logic with shared variant.
+            auto result_dynamic_column = ColumnDynamic::create(to_max_types);
+            result_dynamic_column->insertRangeFrom(column_dynamic, 0, column_dynamic.size());
+            return result_dynamic_column;
         };
     }
 
diff --git a/src/Functions/dynamicType.cpp b/src/Functions/dynamicType.cpp
index e8ca73597d6..327cdfe1616 100644
--- a/src/Functions/dynamicType.cpp
+++ b/src/Functions/dynamicType.cpp
@@ -2,10 +2,14 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypesBinaryEncoding.h>
+#include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <Columns/ColumnVariant.h>
 #include <Columns/ColumnDynamic.h>
+#include <Columns/ColumnsNumber.h>
+#include <IO/ReadBufferFromMemory.h>
 #include <Common/assert_cast.h>
 
 
@@ -65,11 +69,15 @@ public:
         const auto & variant_column = dynamic_column->getVariantColumn();
         auto res = result_type->createColumn();
         String element_type;
+        auto shared_variant_discr = dynamic_column->getSharedVariantDiscriminator();
+        const auto & shared_variant = dynamic_column->getSharedVariant();
         for (size_t i = 0; i != input_rows_count; ++i)
         {
             auto global_discr = variant_column.globalDiscriminatorAt(i);
             if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
                 element_type = name_for_null;
+            else if (global_discr == shared_variant_discr)
+                element_type = getTypeNameFromSharedVariantValue(shared_variant.getDataAt(variant_column.offsetAt(i)));
             else
                 element_type = variant_info.variant_names[global_discr];
 
@@ -78,6 +86,63 @@ public:
 
         return res;
     }
+
+    String getTypeNameFromSharedVariantValue(StringRef value) const
+    {
+        ReadBufferFromMemory buf(value.data, value.size);
+        return decodeDataType(buf)->getName();
+    }
+};
+
+class FunctionIsDynamicElementInSharedData : public IFunction
+{
+public:
+    static constexpr auto name = "isDynamicElementInSharedData";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionIsDynamicElementInSharedData>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (arguments.empty() || arguments.size() > 1)
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be 1",
+                getName(), arguments.empty());
+
+        if (!isDynamic(arguments[0].type.get()))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "First argument for function {} must be Dynamic, got {} instead",
+                getName(), arguments[0].type->getName());
+
+        return DataTypeFactory::instance().get("Bool");
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        const ColumnDynamic * dynamic_column = checkAndGetColumn<ColumnDynamic>(arguments[0].column.get());
+        if (!dynamic_column)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "First argument for function {} must be Dynamic, got {} instead",
+                            getName(), arguments[0].type->getName());
+
+        const auto & variant_column = dynamic_column->getVariantColumn();
+        const auto & local_discriminators = variant_column.getLocalDiscriminators();
+        auto res = result_type->createColumn();
+        auto & res_data = assert_cast<ColumnUInt8 &>(*res).getData();
+        res_data.reserve(dynamic_column->size());
+        auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(dynamic_column->getSharedVariantDiscriminator());
+        for (size_t i = 0; i != input_rows_count; ++i)
+            res_data.push_back(local_discriminators[i] == shared_variant_local_discr);
+
+        return res;
+    }
 };
 
 }
@@ -88,7 +153,7 @@ REGISTER_FUNCTION(DynamicType)
         .description = R"(
 Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it.
 )",
-        .syntax = {"dynamicType(variant)"},
+        .syntax = {"dynamicType(dynamic)"},
         .arguments = {{"dynamic", "Dynamic column"}},
         .examples = {{{
             "Example",
@@ -104,6 +169,30 @@ SELECT d, dynamicType(d) FROM test;
 │ Hello, World! │ String         │
 │ [1,2,3]       │ Array(Int64)   │
 └───────────────┴────────────────┘
+)"}}},
+        .categories{"Variant"},
+    });
+
+    factory.registerFunction<FunctionIsDynamicElementInSharedData>(FunctionDocumentation{
+        .description = R"(
+Returns true for rows in Dynamic column that are not separated into subcolumns and stored inside shared variant in binary form.
+)",
+        .syntax = {"isDynamicElementInSharedData(dynamic)"},
+        .arguments = {{"dynamic", "Dynamic column"}},
+        .examples = {{{
+            "Example",
+            R"(
+CREATE TABLE test (d Dynamic(max_types=2)) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
+SELECT d, isDynamicElementInSharedData(d) FROM test;
+)",
+            R"(
+┌─d─────────────┬─isDynamicElementInSharedData(d)─┐
+│ ᴺᵁᴸᴸ          │ false              │
+│ 42            │ false              │
+│ Hello, World! │ true               │
+│ [1,2,3]       │ true               │
+└───────────────┴────────────────────┘
 )"}}},
         .categories{"Variant"},
     });
diff --git a/tests/queries/0_stateless/00000_test.sql b/tests/queries/0_stateless/00000_test.sql
new file mode 100644
index 00000000000..db9dd774484
--- /dev/null
+++ b/tests/queries/0_stateless/00000_test.sql
@@ -0,0 +1,43 @@
+set allow_experimental_variant_type = 1;
+set use_variant_as_common_type = 1;
+set allow_experimental_dynamic_type = 1;
+set enable_named_columns_in_function_tuple = 0;
+drop table if exists test;
+create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;
+
+
+system stop merges test;
+insert into test select number, number from numbers(10);
+insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(10);
+insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(5);
+insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=3)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(10);
+
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
+
+system stop merges test;
+insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(5);
+insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(20);
+
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
+
+system stop merges test;
+insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=3)) from numbers(4);
+
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
+
+drop table test;
+
diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference
index d965245266c..9fc356cc5e6 100644
--- a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference
+++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference
@@ -11,11 +11,11 @@ JSON
 {"d":["1","str",["1","2","3"]],"dynamicType(d)":"Tuple(Int64, String, Array(Int64))"}
 {"d":null,"dynamicType(d)":"None"}
 {"d":true,"dynamicType(d)":"Bool"}
-{"d":"42","dynamicType(d)":"Int64"}
-{"d":"42.42","dynamicType(d)":"String"}
-{"d":"str","dynamicType(d)":"String"}
-{"d":null,"dynamicType(d)":"None"}
-{"d":"1","dynamicType(d)":"Int64"}
+{"d":"42","dynamicType(d)":"Int64","isDynamicElementInSharedData(d)":false}
+{"d":42.42,"dynamicType(d)":"Float64","isDynamicElementInSharedData(d)":false}
+{"d":"str","dynamicType(d)":"String","isDynamicElementInSharedData(d)":true}
+{"d":null,"dynamicType(d)":"None","isDynamicElementInSharedData(d)":false}
+{"d":true,"dynamicType(d)":"Bool","isDynamicElementInSharedData(d)":true}
 CSV
 42,"Int64"
 42.42,"Float64"
@@ -44,12 +44,12 @@ Cast using parsing
 [1,2,3]	Array(Int64)
 2020-01-01	Date
 2020-01-01 10:00:00.000000000	DateTime64(9)
-\N	None
+NULL	String
 true	Bool
-42	Int64
-42.42	Float64
-[1, 2, 3]	String
-2020-01-01	String
-2020-01-01 10:00:00	String
-\N	None
-true	String
+42	Int64	false
+42.42	Float64	false
+[1,2,3]	Array(Int64)	false
+2020-01-01	Date	true
+2020-01-01 10:00:00.000000000	DateTime64(9)	true
+NULL	String	true
+true	Bool	true
diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.sql b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql
index d12d110fe28..45539cb13eb 100644
--- a/tests/queries/0_stateless/03033_dynamic_text_serialization.sql
+++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql
@@ -16,7 +16,7 @@ select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic', $$
 {"d" : true}
 $$) format JSONEachRow;
 
-select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic(max_types=2)', $$
+select d, dynamicType(d), isDynamicElementInSharedData(d) from format(JSONEachRow, 'd Dynamic(max_types=2)', $$
 {"d" : 42}
 {"d" : 42.42}
 {"d" : "str"}
@@ -69,6 +69,6 @@ create table test (s String) engine=Memory;
 insert into test values ('42'), ('42.42'), ('[1, 2, 3]'), ('2020-01-01'), ('2020-01-01 10:00:00'), ('NULL'), ('true');
 set cast_string_to_dynamic_use_inference=1;
 select s::Dynamic as d, dynamicType(d) from test;
-select s::Dynamic(max_types=3) as d, dynamicType(d) from test;
+select s::Dynamic(max_types=3) as d, dynamicType(d), isDynamicElementInSharedData(d) from test;
 drop table test;
 
diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.reference b/tests/queries/0_stateless/03034_dynamic_conversions.reference
index 45f94f7ecc4..e22b64701a3 100644
--- a/tests/queries/0_stateless/03034_dynamic_conversions.reference
+++ b/tests/queries/0_stateless/03034_dynamic_conversions.reference
@@ -1,9 +1,9 @@
 0	UInt64
 1	UInt64
 2	UInt64
-0	String
-1	String
-2	String
+0	UInt64
+1	UInt64
+2	UInt64
 0
 1
 2
@@ -25,15 +25,15 @@ str_1	String
 \N	None
 4	UInt64
 str_5	String
-0	String
+0	UInt64
 str_1	String
-[0,1]	String
+[0,1]	Array(UInt64)
 \N	None
-4	String
+4	UInt64
 str_5	String
 0	UInt64
 str_1	String
-[0,1]	String
+[0,1]	Array(UInt64)
 \N	None
 4	UInt64
 str_5	String
@@ -51,13 +51,13 @@ str_5	String
 2
 0	UInt64
 str_1	String
-[0,1]	String
+[0,1]	Array(UInt64)
 \N	None
 4	UInt64
 str_5	String
 0	UInt64
 1970-01-02	Date
-[0,1]	String
+[0,1]	Array(UInt64)
 \N	None
 4	UInt64
 1970-01-06	Date
diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.sql b/tests/queries/0_stateless/03034_dynamic_conversions.sql
index ed75fbf2377..c0b470f29c5 100644
--- a/tests/queries/0_stateless/03034_dynamic_conversions.sql
+++ b/tests/queries/0_stateless/03034_dynamic_conversions.sql
@@ -3,7 +3,7 @@ set allow_experimental_variant_type=1;
 set use_variant_as_common_type=1;
 
 select number::Dynamic as d, dynamicType(d) from numbers(3);
-select number::Dynamic(max_types=1) as d, dynamicType(d) from numbers(3);
+select number::Dynamic(max_types=0) as d, dynamicType(d) from numbers(3);
 select number::Dynamic::UInt64 as v from numbers(3);
 select number::Dynamic::String as v from numbers(3);
 select number::Dynamic::Date as v from numbers(3);
@@ -12,13 +12,13 @@ select number::Dynamic::Variant(UInt64, String) as v, variantType(v) from number
 select (number % 2 ? NULL : number)::Dynamic as d, dynamicType(d) from numbers(3);
 
 select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic as d, dynamicType(d) from numbers(6);
+select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=0) as d, dynamicType(d) from numbers(6);
 select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=1) as d, dynamicType(d) from numbers(6);
 select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6);
-select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6);
 
 select number::Dynamic(max_types=2)::Dynamic(max_types=3) as d from numbers(3);
 select number::Dynamic(max_types=2)::Dynamic(max_types=1) as d from numbers(3);
-select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6);
+select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=2)::Dynamic(max_types=1) as d, dynamicType(d) from numbers(6);
 select multiIf(number % 4 == 0, number, number % 4 == 1, toDate(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=4)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6);
 
 
diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.reference b/tests/queries/0_stateless/03035_dynamic_sorting.reference
index 9b8df11c7a9..f253c34ce8a 100644
--- a/tests/queries/0_stateless/03035_dynamic_sorting.reference
+++ b/tests/queries/0_stateless/03035_dynamic_sorting.reference
@@ -1,299 +1,442 @@
 order by d1 nulls first
-\N	None
-\N	None
-\N	None
-\N	None
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,4]	Array(Int64)
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-43	Int64
-abc	String
-abc	String
-abc	String
-abc	String
-abc	String
-abd	String
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,4]	Array(Int64)	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-02	Date	true
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+43	Int64	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abd	String	false
 order by d1 nulls last
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,4]	Array(Int64)
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-43	Int64
-abc	String
-abc	String
-abc	String
-abc	String
-abc	String
-abd	String
-\N	None
-\N	None
-\N	None
-\N	None
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,4]	Array(Int64)	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-02	Date	true
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+43	Int64	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abd	String	false
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
 order by d2 nulls first
-\N	None
-\N	None
-\N	None
-\N	None
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,4]	Array(Int64)
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-43	Int64
-abc	String
-abc	String
-abc	String
-abc	String
-abc	String
-abd	String
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,4]	Array(Int64)	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-02	Date	true
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+43	Int64	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abd	String	false
 order by d2 nulls last
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,3]	Array(Int64)
-[1,2,4]	Array(Int64)
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-42	Int64
-43	Int64
-abc	String
-abc	String
-abc	String
-abc	String
-abc	String
-abd	String
-\N	None
-\N	None
-\N	None
-\N	None
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,3]	Array(Int64)	true
+[1,2,4]	Array(Int64)	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-01	Date	true
+2020-01-02	Date	true
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+42	Int64	false
+43	Int64	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abc	String	false
+abd	String	false
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
+\N	None	false
 order by d1, d2 nulls first
-[1,2,3]	\N	Array(Int64)	None
-[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
-[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
-[1,2,3]	42	Array(Int64)	Int64
-[1,2,3]	abc	Array(Int64)	String
-[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
-42	\N	Int64	None
-42	[1,2,3]	Int64	Array(Int64)
-42	42	Int64	Int64
-42	43	Int64	Int64
-42	abc	Int64	String
-43	42	Int64	Int64
-abc	\N	String	None
-abc	[1,2,3]	String	Array(Int64)
-abc	42	String	Int64
-abc	abc	String	String
-abc	abd	String	String
-abd	abc	String	String
-\N	\N	None	None
-\N	[1,2,3]	None	Array(Int64)
-\N	42	None	Int64
-\N	abc	None	String
+[1,2,3]	\N	Array(Int64)	true	None	false
+[1,2,3]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	[1,2,4]	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	Array(Int64)	true	Date	true
+[1,2,3]	42	Array(Int64)	true	Int64	false
+[1,2,3]	abc	Array(Int64)	true	String	false
+[1,2,4]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+2020-01-01	\N	Date	true	None	false
+2020-01-01	[1,2,3]	Date	true	Array(Int64)	true
+2020-01-01	2020-01-01	Date	true	Date	true
+2020-01-01	2020-01-02	Date	true	Date	true
+2020-01-01	42	Date	true	Int64	false
+2020-01-01	abc	Date	true	String	false
+2020-01-02	2020-01-01	Date	true	Date	true
+42	\N	Int64	false	None	false
+42	[1,2,3]	Int64	false	Array(Int64)	true
+42	2020-01-01	Int64	false	Date	true
+42	42	Int64	false	Int64	false
+42	43	Int64	false	Int64	false
+42	abc	Int64	false	String	false
+43	42	Int64	false	Int64	false
+abc	\N	String	false	None	false
+abc	[1,2,3]	String	false	Array(Int64)	true
+abc	2020-01-01	String	false	Date	true
+abc	42	String	false	Int64	false
+abc	abc	String	false	String	false
+abc	abd	String	false	String	false
+abd	abc	String	false	String	false
+\N	\N	None	false	None	false
+\N	[1,2,3]	None	false	Array(Int64)	true
+\N	2020-01-01	None	false	Date	true
+\N	42	None	false	Int64	false
+\N	abc	None	false	String	false
 order by d1, d2 nulls last
-[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
-[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
-[1,2,3]	42	Array(Int64)	Int64
-[1,2,3]	abc	Array(Int64)	String
-[1,2,3]	\N	Array(Int64)	None
-[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
-42	[1,2,3]	Int64	Array(Int64)
-42	42	Int64	Int64
-42	43	Int64	Int64
-42	abc	Int64	String
-42	\N	Int64	None
-43	42	Int64	Int64
-abc	[1,2,3]	String	Array(Int64)
-abc	42	String	Int64
-abc	abc	String	String
-abc	abd	String	String
-abc	\N	String	None
-abd	abc	String	String
-\N	[1,2,3]	None	Array(Int64)
-\N	42	None	Int64
-\N	abc	None	String
-\N	\N	None	None
+[1,2,3]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	[1,2,4]	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	Array(Int64)	true	Date	true
+[1,2,3]	42	Array(Int64)	true	Int64	false
+[1,2,3]	abc	Array(Int64)	true	String	false
+[1,2,3]	\N	Array(Int64)	true	None	false
+[1,2,4]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	Date	true	Array(Int64)	true
+2020-01-01	2020-01-01	Date	true	Date	true
+2020-01-01	2020-01-02	Date	true	Date	true
+2020-01-01	42	Date	true	Int64	false
+2020-01-01	abc	Date	true	String	false
+2020-01-01	\N	Date	true	None	false
+2020-01-02	2020-01-01	Date	true	Date	true
+42	[1,2,3]	Int64	false	Array(Int64)	true
+42	2020-01-01	Int64	false	Date	true
+42	42	Int64	false	Int64	false
+42	43	Int64	false	Int64	false
+42	abc	Int64	false	String	false
+42	\N	Int64	false	None	false
+43	42	Int64	false	Int64	false
+abc	[1,2,3]	String	false	Array(Int64)	true
+abc	2020-01-01	String	false	Date	true
+abc	42	String	false	Int64	false
+abc	abc	String	false	String	false
+abc	abd	String	false	String	false
+abc	\N	String	false	None	false
+abd	abc	String	false	String	false
+\N	[1,2,3]	None	false	Array(Int64)	true
+\N	2020-01-01	None	false	Date	true
+\N	42	None	false	Int64	false
+\N	abc	None	false	String	false
+\N	\N	None	false	None	false
 order by d2, d1 nulls first
-\N	[1,2,3]	None	Array(Int64)
-[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
-[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
-42	[1,2,3]	Int64	Array(Int64)
-abc	[1,2,3]	String	Array(Int64)
-[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
-\N	42	None	Int64
-[1,2,3]	42	Array(Int64)	Int64
-42	42	Int64	Int64
-43	42	Int64	Int64
-abc	42	String	Int64
-42	43	Int64	Int64
-\N	abc	None	String
-[1,2,3]	abc	Array(Int64)	String
-42	abc	Int64	String
-abc	abc	String	String
-abd	abc	String	String
-abc	abd	String	String
-\N	\N	None	None
-[1,2,3]	\N	Array(Int64)	None
-42	\N	Int64	None
-abc	\N	String	None
+\N	[1,2,3]	None	false	Array(Int64)	true
+[1,2,3]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+[1,2,4]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	Date	true	Array(Int64)	true
+42	[1,2,3]	Int64	false	Array(Int64)	true
+abc	[1,2,3]	String	false	Array(Int64)	true
+[1,2,3]	[1,2,4]	Array(Int64)	true	Array(Int64)	true
+\N	2020-01-01	None	false	Date	true
+[1,2,3]	2020-01-01	Array(Int64)	true	Date	true
+2020-01-01	2020-01-01	Date	true	Date	true
+2020-01-02	2020-01-01	Date	true	Date	true
+42	2020-01-01	Int64	false	Date	true
+abc	2020-01-01	String	false	Date	true
+2020-01-01	2020-01-02	Date	true	Date	true
+\N	42	None	false	Int64	false
+[1,2,3]	42	Array(Int64)	true	Int64	false
+2020-01-01	42	Date	true	Int64	false
+42	42	Int64	false	Int64	false
+43	42	Int64	false	Int64	false
+abc	42	String	false	Int64	false
+42	43	Int64	false	Int64	false
+\N	abc	None	false	String	false
+[1,2,3]	abc	Array(Int64)	true	String	false
+2020-01-01	abc	Date	true	String	false
+42	abc	Int64	false	String	false
+abc	abc	String	false	String	false
+abd	abc	String	false	String	false
+abc	abd	String	false	String	false
+\N	\N	None	false	None	false
+[1,2,3]	\N	Array(Int64)	true	None	false
+2020-01-01	\N	Date	true	None	false
+42	\N	Int64	false	None	false
+abc	\N	String	false	None	false
 order by d2, d1 nulls last
-[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
-[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
-42	[1,2,3]	Int64	Array(Int64)
-abc	[1,2,3]	String	Array(Int64)
-\N	[1,2,3]	None	Array(Int64)
-[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
-[1,2,3]	42	Array(Int64)	Int64
-42	42	Int64	Int64
-43	42	Int64	Int64
-abc	42	String	Int64
-\N	42	None	Int64
-42	43	Int64	Int64
-[1,2,3]	abc	Array(Int64)	String
-42	abc	Int64	String
-abc	abc	String	String
-abd	abc	String	String
-\N	abc	None	String
-abc	abd	String	String
-[1,2,3]	\N	Array(Int64)	None
-42	\N	Int64	None
-abc	\N	String	None
-\N	\N	None	None
+[1,2,3]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+[1,2,4]	[1,2,3]	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	Date	true	Array(Int64)	true
+42	[1,2,3]	Int64	false	Array(Int64)	true
+abc	[1,2,3]	String	false	Array(Int64)	true
+\N	[1,2,3]	None	false	Array(Int64)	true
+[1,2,3]	[1,2,4]	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	Array(Int64)	true	Date	true
+2020-01-01	2020-01-01	Date	true	Date	true
+2020-01-02	2020-01-01	Date	true	Date	true
+42	2020-01-01	Int64	false	Date	true
+abc	2020-01-01	String	false	Date	true
+\N	2020-01-01	None	false	Date	true
+2020-01-01	2020-01-02	Date	true	Date	true
+[1,2,3]	42	Array(Int64)	true	Int64	false
+2020-01-01	42	Date	true	Int64	false
+42	42	Int64	false	Int64	false
+43	42	Int64	false	Int64	false
+abc	42	String	false	Int64	false
+\N	42	None	false	Int64	false
+42	43	Int64	false	Int64	false
+[1,2,3]	abc	Array(Int64)	true	String	false
+2020-01-01	abc	Date	true	String	false
+42	abc	Int64	false	String	false
+abc	abc	String	false	String	false
+abd	abc	String	false	String	false
+\N	abc	None	false	String	false
+abc	abd	String	false	String	false
+[1,2,3]	\N	Array(Int64)	true	None	false
+2020-01-01	\N	Date	true	None	false
+42	\N	Int64	false	None	false
+abc	\N	String	false	None	false
+\N	\N	None	false	None	false
 d1 = d2
-[1,2,3]	[1,2,3]	1	Array(Int64)	Array(Int64)
-[1,2,3]	[1,2,4]	0	Array(Int64)	Array(Int64)
-[1,2,3]	42	0	Array(Int64)	Int64
-[1,2,3]	abc	0	Array(Int64)	String
-[1,2,3]	\N	0	Array(Int64)	None
-[1,2,4]	[1,2,3]	0	Array(Int64)	Array(Int64)
-42	[1,2,3]	0	Int64	Array(Int64)
-42	42	1	Int64	Int64
-42	43	0	Int64	Int64
-42	abc	0	Int64	String
-42	\N	0	Int64	None
-43	42	0	Int64	Int64
-abc	[1,2,3]	0	String	Array(Int64)
-abc	42	0	String	Int64
-abc	abc	1	String	String
-abc	abd	0	String	String
-abc	\N	0	String	None
-abd	abc	0	String	String
-\N	[1,2,3]	0	None	Array(Int64)
-\N	42	0	None	Int64
-\N	abc	0	None	String
-\N	\N	1	None	None
+[1,2,3]	[1,2,3]	1	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	[1,2,4]	0	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	0	Array(Int64)	true	Date	true
+[1,2,3]	42	0	Array(Int64)	true	Int64	false
+[1,2,3]	abc	0	Array(Int64)	true	String	false
+[1,2,3]	\N	0	Array(Int64)	true	None	false
+[1,2,4]	[1,2,3]	0	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	0	Date	true	Array(Int64)	true
+2020-01-01	2020-01-01	1	Date	true	Date	true
+2020-01-01	2020-01-02	0	Date	true	Date	true
+2020-01-01	42	0	Date	true	Int64	false
+2020-01-01	abc	0	Date	true	String	false
+2020-01-01	\N	0	Date	true	None	false
+2020-01-02	2020-01-01	0	Date	true	Date	true
+42	[1,2,3]	0	Int64	false	Array(Int64)	true
+42	2020-01-01	0	Int64	false	Date	true
+42	42	1	Int64	false	Int64	false
+42	43	0	Int64	false	Int64	false
+42	abc	0	Int64	false	String	false
+42	\N	0	Int64	false	None	false
+43	42	0	Int64	false	Int64	false
+abc	[1,2,3]	0	String	false	Array(Int64)	true
+abc	2020-01-01	0	String	false	Date	true
+abc	42	0	String	false	Int64	false
+abc	abc	1	String	false	String	false
+abc	abd	0	String	false	String	false
+abc	\N	0	String	false	None	false
+abd	abc	0	String	false	String	false
+\N	[1,2,3]	0	None	false	Array(Int64)	true
+\N	2020-01-01	0	None	false	Date	true
+\N	42	0	None	false	Int64	false
+\N	abc	0	None	false	String	false
+\N	\N	1	None	false	None	false
 d1 < d2
-[1,2,3]	[1,2,3]	0	Array(Int64)	Array(Int64)
-[1,2,3]	[1,2,4]	1	Array(Int64)	Array(Int64)
-[1,2,3]	42	1	Array(Int64)	Int64
-[1,2,3]	abc	1	Array(Int64)	String
-[1,2,3]	\N	1	Array(Int64)	None
-[1,2,4]	[1,2,3]	0	Array(Int64)	Array(Int64)
-42	[1,2,3]	0	Int64	Array(Int64)
-42	42	0	Int64	Int64
-42	43	1	Int64	Int64
-42	abc	1	Int64	String
-42	\N	1	Int64	None
-43	42	0	Int64	Int64
-abc	[1,2,3]	0	String	Array(Int64)
-abc	42	0	String	Int64
-abc	abc	0	String	String
-abc	abd	1	String	String
-abc	\N	1	String	None
-abd	abc	0	String	String
-\N	[1,2,3]	0	None	Array(Int64)
-\N	42	0	None	Int64
-\N	abc	0	None	String
-\N	\N	0	None	None
+[1,2,3]	[1,2,3]	0	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	[1,2,4]	1	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	1	Array(Int64)	true	Date	true
+[1,2,3]	42	1	Array(Int64)	true	Int64	false
+[1,2,3]	abc	1	Array(Int64)	true	String	false
+[1,2,3]	\N	1	Array(Int64)	true	None	false
+[1,2,4]	[1,2,3]	0	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	0	Date	true	Array(Int64)	true
+2020-01-01	2020-01-01	0	Date	true	Date	true
+2020-01-01	2020-01-02	1	Date	true	Date	true
+2020-01-01	42	1	Date	true	Int64	false
+2020-01-01	abc	1	Date	true	String	false
+2020-01-01	\N	1	Date	true	None	false
+2020-01-02	2020-01-01	0	Date	true	Date	true
+42	[1,2,3]	0	Int64	false	Array(Int64)	true
+42	2020-01-01	0	Int64	false	Date	true
+42	42	0	Int64	false	Int64	false
+42	43	1	Int64	false	Int64	false
+42	abc	1	Int64	false	String	false
+42	\N	1	Int64	false	None	false
+43	42	0	Int64	false	Int64	false
+abc	[1,2,3]	0	String	false	Array(Int64)	true
+abc	2020-01-01	0	String	false	Date	true
+abc	42	0	String	false	Int64	false
+abc	abc	0	String	false	String	false
+abc	abd	1	String	false	String	false
+abc	\N	1	String	false	None	false
+abd	abc	0	String	false	String	false
+\N	[1,2,3]	0	None	false	Array(Int64)	true
+\N	2020-01-01	0	None	false	Date	true
+\N	42	0	None	false	Int64	false
+\N	abc	0	None	false	String	false
+\N	\N	0	None	false	None	false
 d1 <= d2
-[1,2,3]	[1,2,3]	1	Array(Int64)	Array(Int64)
-[1,2,3]	[1,2,4]	1	Array(Int64)	Array(Int64)
-[1,2,3]	42	1	Array(Int64)	Int64
-[1,2,3]	abc	1	Array(Int64)	String
-[1,2,3]	\N	1	Array(Int64)	None
-[1,2,4]	[1,2,3]	0	Array(Int64)	Array(Int64)
-42	[1,2,3]	0	Int64	Array(Int64)
-42	42	1	Int64	Int64
-42	43	1	Int64	Int64
-42	abc	1	Int64	String
-42	\N	1	Int64	None
-43	42	0	Int64	Int64
-abc	[1,2,3]	0	String	Array(Int64)
-abc	42	0	String	Int64
-abc	abc	1	String	String
-abc	abd	1	String	String
-abc	\N	1	String	None
-abd	abc	0	String	String
-\N	[1,2,3]	0	None	Array(Int64)
-\N	42	0	None	Int64
-\N	abc	0	None	String
-\N	\N	1	None	None
+[1,2,3]	[1,2,3]	1	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	[1,2,4]	1	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	1	Array(Int64)	true	Date	true
+[1,2,3]	42	1	Array(Int64)	true	Int64	false
+[1,2,3]	abc	1	Array(Int64)	true	String	false
+[1,2,3]	\N	1	Array(Int64)	true	None	false
+[1,2,4]	[1,2,3]	0	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	0	Date	true	Array(Int64)	true
+2020-01-01	2020-01-01	1	Date	true	Date	true
+2020-01-01	2020-01-02	1	Date	true	Date	true
+2020-01-01	42	1	Date	true	Int64	false
+2020-01-01	abc	1	Date	true	String	false
+2020-01-01	\N	1	Date	true	None	false
+2020-01-02	2020-01-01	0	Date	true	Date	true
+42	[1,2,3]	0	Int64	false	Array(Int64)	true
+42	2020-01-01	0	Int64	false	Date	true
+42	42	1	Int64	false	Int64	false
+42	43	1	Int64	false	Int64	false
+42	abc	1	Int64	false	String	false
+42	\N	1	Int64	false	None	false
+43	42	0	Int64	false	Int64	false
+abc	[1,2,3]	0	String	false	Array(Int64)	true
+abc	2020-01-01	0	String	false	Date	true
+abc	42	0	String	false	Int64	false
+abc	abc	1	String	false	String	false
+abc	abd	1	String	false	String	false
+abc	\N	1	String	false	None	false
+abd	abc	0	String	false	String	false
+\N	[1,2,3]	0	None	false	Array(Int64)	true
+\N	2020-01-01	0	None	false	Date	true
+\N	42	0	None	false	Int64	false
+\N	abc	0	None	false	String	false
+\N	\N	1	None	false	None	false
 d1 > d2
-[1,2,3]	[1,2,3]	0	Array(Int64)	Array(Int64)
-[1,2,3]	[1,2,4]	0	Array(Int64)	Array(Int64)
-[1,2,3]	42	0	Array(Int64)	Int64
-[1,2,3]	abc	0	Array(Int64)	String
-[1,2,3]	\N	0	Array(Int64)	None
-[1,2,4]	[1,2,3]	1	Array(Int64)	Array(Int64)
-42	[1,2,3]	1	Int64	Array(Int64)
-42	42	0	Int64	Int64
-42	43	0	Int64	Int64
-42	abc	0	Int64	String
-42	\N	0	Int64	None
-43	42	1	Int64	Int64
-abc	[1,2,3]	1	String	Array(Int64)
-abc	42	1	String	Int64
-abc	abc	0	String	String
-abc	abd	0	String	String
-abc	\N	0	String	None
-abd	abc	1	String	String
-\N	[1,2,3]	1	None	Array(Int64)
-\N	42	1	None	Int64
-\N	abc	1	None	String
-\N	\N	0	None	None
+[1,2,3]	[1,2,3]	0	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	[1,2,4]	0	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	0	Array(Int64)	true	Date	true
+[1,2,3]	42	0	Array(Int64)	true	Int64	false
+[1,2,3]	abc	0	Array(Int64)	true	String	false
+[1,2,3]	\N	0	Array(Int64)	true	None	false
+[1,2,4]	[1,2,3]	1	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	1	Date	true	Array(Int64)	true
+2020-01-01	2020-01-01	0	Date	true	Date	true
+2020-01-01	2020-01-02	0	Date	true	Date	true
+2020-01-01	42	0	Date	true	Int64	false
+2020-01-01	abc	0	Date	true	String	false
+2020-01-01	\N	0	Date	true	None	false
+2020-01-02	2020-01-01	1	Date	true	Date	true
+42	[1,2,3]	1	Int64	false	Array(Int64)	true
+42	2020-01-01	1	Int64	false	Date	true
+42	42	0	Int64	false	Int64	false
+42	43	0	Int64	false	Int64	false
+42	abc	0	Int64	false	String	false
+42	\N	0	Int64	false	None	false
+43	42	1	Int64	false	Int64	false
+abc	[1,2,3]	1	String	false	Array(Int64)	true
+abc	2020-01-01	1	String	false	Date	true
+abc	42	1	String	false	Int64	false
+abc	abc	0	String	false	String	false
+abc	abd	0	String	false	String	false
+abc	\N	0	String	false	None	false
+abd	abc	1	String	false	String	false
+\N	[1,2,3]	1	None	false	Array(Int64)	true
+\N	2020-01-01	1	None	false	Date	true
+\N	42	1	None	false	Int64	false
+\N	abc	1	None	false	String	false
+\N	\N	0	None	false	None	false
 d1 >= d2
-[1,2,3]	[1,2,3]	1	Array(Int64)	Array(Int64)
-[1,2,3]	[1,2,4]	1	Array(Int64)	Array(Int64)
-[1,2,3]	42	1	Array(Int64)	Int64
-[1,2,3]	abc	1	Array(Int64)	String
-[1,2,3]	\N	1	Array(Int64)	None
-[1,2,4]	[1,2,3]	1	Array(Int64)	Array(Int64)
-42	[1,2,3]	1	Int64	Array(Int64)
-42	42	1	Int64	Int64
-42	43	1	Int64	Int64
-42	abc	1	Int64	String
-42	\N	1	Int64	None
-43	42	1	Int64	Int64
-abc	[1,2,3]	1	String	Array(Int64)
-abc	42	1	String	Int64
-abc	abc	1	String	String
-abc	abd	1	String	String
-abc	\N	1	String	None
-abd	abc	1	String	String
-\N	[1,2,3]	1	None	Array(Int64)
-\N	42	1	None	Int64
-\N	abc	1	None	String
-\N	\N	1	None	None
+[1,2,3]	[1,2,3]	1	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	[1,2,4]	1	Array(Int64)	true	Array(Int64)	true
+[1,2,3]	2020-01-01	1	Array(Int64)	true	Date	true
+[1,2,3]	42	1	Array(Int64)	true	Int64	false
+[1,2,3]	abc	1	Array(Int64)	true	String	false
+[1,2,3]	\N	1	Array(Int64)	true	None	false
+[1,2,4]	[1,2,3]	1	Array(Int64)	true	Array(Int64)	true
+2020-01-01	[1,2,3]	1	Date	true	Array(Int64)	true
+2020-01-01	2020-01-01	1	Date	true	Date	true
+2020-01-01	2020-01-02	1	Date	true	Date	true
+2020-01-01	42	1	Date	true	Int64	false
+2020-01-01	abc	1	Date	true	String	false
+2020-01-01	\N	1	Date	true	None	false
+2020-01-02	2020-01-01	1	Date	true	Date	true
+42	[1,2,3]	1	Int64	false	Array(Int64)	true
+42	2020-01-01	1	Int64	false	Date	true
+42	42	1	Int64	false	Int64	false
+42	43	1	Int64	false	Int64	false
+42	abc	1	Int64	false	String	false
+42	\N	1	Int64	false	None	false
+43	42	1	Int64	false	Int64	false
+abc	[1,2,3]	1	String	false	Array(Int64)	true
+abc	2020-01-01	1	String	false	Date	true
+abc	42	1	String	false	Int64	false
+abc	abc	1	String	false	String	false
+abc	abd	1	String	false	String	false
+abc	\N	1	String	false	None	false
+abd	abc	1	String	false	String	false
+\N	[1,2,3]	1	None	false	Array(Int64)	true
+\N	2020-01-01	1	None	false	Date	true
+\N	42	1	None	false	Int64	false
+\N	abc	1	None	false	String	false
+\N	\N	1	None	false	None	false
diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.sql b/tests/queries/0_stateless/03035_dynamic_sorting.sql
index 0487fafc955..e0039a348c6 100644
--- a/tests/queries/0_stateless/03035_dynamic_sorting.sql
+++ b/tests/queries/0_stateless/03035_dynamic_sorting.sql
@@ -1,80 +1,55 @@
 set allow_experimental_dynamic_type = 1;
 
 drop table if exists test;
-create table test (d1 Dynamic, d2 Dynamic) engine=Memory;
-
-insert into test values (42, 42);
-insert into test values (42, 43);
-insert into test values (43, 42);
-
-insert into test values ('abc', 'abc');
-insert into test values ('abc', 'abd');
-insert into test values ('abd', 'abc');
-
-insert into test values ([1,2,3], [1,2,3]);
-insert into test values ([1,2,3], [1,2,4]);
-insert into test values ([1,2,4], [1,2,3]);
-
-insert into test values (NULL, NULL);
-
-insert into test values (42, 'abc');
-insert into test values ('abc', 42);
-
-insert into test values (42, [1,2,3]);
-insert into test values ([1,2,3], 42);
-
-insert into test values (42, NULL);
-insert into test values (NULL, 42);
-
-insert into test values ('abc', [1,2,3]);
-insert into test values ([1,2,3], 'abc');
-
-insert into test values ('abc', NULL);
-insert into test values (NULL, 'abc');
-
-insert into test values ([1,2,3], NULL);
-insert into test values (NULL, [1,2,3]);
+create table test (d1 Dynamic(max_types=2), d2 Dynamic(max_types=2)) engine=Memory;
 
+insert into test values (42, 42), (42, 43), (43, 42), ('abc', 'abc'), ('abc', 'abd'), ('abd', 'abc'),
+([1,2,3], [1,2,3]), ([1,2,3], [1,2,4]), ([1,2,4], [1,2,3]),
+('2020-01-01', '2020-01-01'), ('2020-01-01', '2020-01-02'), ('2020-01-02', '2020-01-01'),
+(NULL, NULL), (42, 'abc'), ('abc', 42), (42, [1,2,3]), ([1,2,3], 42), (42, NULL), (NULL, 42),
+('abc', [1,2,3]), ([1,2,3], 'abc'), ('abc', NULL), (NULL, 'abc'), ([1,2,3], NULL), (NULL, [1,2,3]),
+(42, '2020-01-01'), ('2020-01-01', 42), ('2020-01-01', 'abc'), ('abc', '2020-01-01'),
+('2020-01-01', [1,2,3]), ([1,2,3], '2020-01-01'), ('2020-01-01', NULL), (NULL, '2020-01-01');
 
 select 'order by d1 nulls first';
-select d1, dynamicType(d1) from test order by d1 nulls first;
+select d1, dynamicType(d1), isDynamicElementInSharedData(d1) from test order by d1 nulls first;
 
 select 'order by d1 nulls last';
-select d1, dynamicType(d1) from test order by d1 nulls last;
+select d1, dynamicType(d1), isDynamicElementInSharedData(d1) from test order by d1 nulls last;
 
 select 'order by d2 nulls first';
-select d2, dynamicType(d2) from test order by d2 nulls first;
+select d2, dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2 nulls first;
 
 select 'order by d2 nulls last';
-select d2, dynamicType(d2) from test order by d2 nulls last;
+select d2, dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2 nulls last;
 
 
 select 'order by d1, d2 nulls first';
-select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls first;
+select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2 nulls first;
 
 select 'order by d1, d2 nulls last';
-select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls last;
+select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2 nulls last;
 
 select 'order by d2, d1 nulls first';
-select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls first;
+select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2, d1 nulls first;
 
 select 'order by d2, d1 nulls last';
-select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls last;
+select d1, d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d2, d1 nulls last;
 
 select 'd1 = d2';
-select d1, d2, d1 = d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+select d1, d2, d1 = d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2;
 
 select 'd1 < d2';
-select d1, d2, d1 < d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+select d1, d2, d1 < d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2;
 
 select 'd1 <= d2';
-select d1, d2, d1 <= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+select d1, d2, d1 <= d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2;
 
 select 'd1 > d2';
-select d1, d2, d1 > d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+select d1, d2, d1 > d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2;
 
 select 'd1 >= d2';
-select d1, d2, d2 >= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+select d1, d2, d2 >= d2, dynamicType(d1), isDynamicElementInSharedData(d1), dynamicType(d2), isDynamicElementInSharedData(d2) from test order by d1, d2;
 
 drop table test;
 
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.reference
new file mode 100644
index 00000000000..ca6c5dbba82
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.reference
@@ -0,0 +1,20 @@
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+LowCardinality(String)
+None
+String
+UInt64
+360000
+360000
+200000
+200000
+0
+0
+20000
+20000
+200000
+200000
+20000
+20000
+200000
+0
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.sql
new file mode 100644
index 00000000000..bff28fb5c90
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_compact_merge_tree.sql
@@ -0,0 +1,43 @@
+-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+
+set allow_experimental_variant_type = 1;
+set use_variant_as_common_type = 1;
+set allow_experimental_dynamic_type = 1;
+
+drop table if exists test;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;
+
+insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
+insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
+insert into test select number, if (number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, if (number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+
+select distinct dynamicType(d) as type from test order by type;
+select count() from test where dynamicType(d) == 'UInt64';
+select count() from test where d.UInt64 is not NULL;
+select count() from test where dynamicType(d) == 'String';
+select count() from test where d.String is not NULL;
+select count() from test where dynamicType(d) == 'Date';
+select count() from test where d.Date is not NULL;
+select count() from test where dynamicType(d) == 'LowCardinality(String)';
+select count() from test where d.`LowCardinality(String)` is not NULL;
+select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
+select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
+select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
+select count() from test where not empty(d.`Array(Array(Dynamic))`);
+select count() from test where d is NULL;
+select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
+
+select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
+select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
+select d.Int8, d.Date, d.`LowCardinality(String)`, d.`Array(String)` from test format Null;
+select d, d.UInt64, d.Date, d.`LowCardinality(String)`, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.UInt64, d.Date, d.`LowCardinality(String)`, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
+select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
+    
+drop table test;
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.reference b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.reference
new file mode 100644
index 00000000000..ca6c5dbba82
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.reference
@@ -0,0 +1,20 @@
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+LowCardinality(String)
+None
+String
+UInt64
+360000
+360000
+200000
+200000
+0
+0
+20000
+20000
+200000
+200000
+20000
+20000
+200000
+0
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.sql
new file mode 100644
index 00000000000..4eed3d15529
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_memory.sql
@@ -0,0 +1,43 @@
+-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+
+set allow_experimental_variant_type = 1;
+set use_variant_as_common_type = 1;
+set allow_experimental_dynamic_type = 1;
+
+drop table if exists test;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=Memory;
+
+insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
+insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
+insert into test select number, if (number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, if (number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+
+select distinct dynamicType(d) as type from test order by type;
+select count() from test where dynamicType(d) == 'UInt64';
+select count() from test where d.UInt64 is not NULL;
+select count() from test where dynamicType(d) == 'String';
+select count() from test where d.String is not NULL;
+select count() from test where dynamicType(d) == 'Date';
+select count() from test where d.Date is not NULL;
+select count() from test where dynamicType(d) == 'LowCardinality(String)';
+select count() from test where d.`LowCardinality(String)` is not NULL;
+select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
+select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
+select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
+select count() from test where not empty(d.`Array(Array(Dynamic))`);
+select count() from test where d is NULL;
+select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
+
+select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
+select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
+select d.Int8, d.Date, d.`Array(String)` from test format Null;
+select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
+select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
+    
+drop table test;
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2 b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2
new file mode 100644
index 00000000000..9c1f8fa45e8
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2
@@ -0,0 +1,2460 @@
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+LowCardinality(String)
+None
+String
+UInt64
+36
+36
+20
+20
+0
+0
+2
+2
+20
+20
+2
+2
+20
+0
+0	0	\N	[]
+1	1	\N	[]
+2	2	\N	[]
+3	3	\N	[]
+4	4	\N	[]
+5	5	\N	[]
+6	6	\N	[]
+7	7	\N	[]
+8	8	\N	[]
+9	9	\N	[]
+str_10	\N	str_10	[]
+10	10	\N	[]
+10	10	\N	[]
+[[0,1]]	\N	\N	[]
+str_11	\N	\N	[]
+str_11	\N	str_11	[]
+str_12	\N	str_12	[]
+12	12	\N	[]
+12	12	\N	[]
+str_13	\N	str_13	[]
+13	13	\N	[]
+13	13	\N	[]
+str_14	\N	str_14	[]
+14	14	\N	[]
+14	14	\N	[]
+str_15	\N	str_15	[]
+15	15	\N	[]
+15	15	\N	[]
+[[0,1,2,3,4,5,6]]	\N	\N	[]
+str_16	\N	\N	[]
+str_16	\N	str_16	[]
+str_17	\N	str_17	[]
+17	17	\N	[]
+17	17	\N	[]
+str_18	\N	str_18	[]
+18	18	\N	[]
+18	18	\N	[]
+str_19	\N	str_19	[]
+19	19	\N	[]
+19	19	\N	[]
+[20]	\N	\N	[20]
+['str_21','str_21']	\N	\N	['str_21','str_21']
+[22,22,22]	\N	\N	[22,22,22]
+[23,23,23,23]	\N	\N	[23,23,23,23]
+[24,24,24,24,24]	\N	\N	[24,24,24,24,24]
+[25,25,25,25,25,25]	\N	\N	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	\N	\N	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	\N	\N	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	\N	\N	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+[40]	\N	\N	[40]
+41	41	\N	[]
+\N	\N	\N	[]
+str_43	\N	str_43	[]
+[44,44,44,44,44]	\N	\N	[44,44,44,44,44]
+45	45	\N	[]
+\N	\N	\N	[]
+str_47	\N	str_47	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	49	\N	[]
+\N	\N	\N	[]
+str_51	\N	str_51	[]
+[52,52,52]	\N	\N	[52,52,52]
+53	53	\N	[]
+\N	\N	\N	[]
+str_55	\N	str_55	[]
+[56,56,56,56,56,56,56]	\N	\N	[56,56,56,56,56,56,56]
+57	57	\N	[]
+\N	\N	\N	[]
+str_59	\N	str_59	[]
+[60]	\N	\N	[60]
+61	61	\N	[]
+\N	\N	\N	[]
+str_63	\N	str_63	[]
+[64,64,64,64,64]	\N	\N	[64,64,64,64,64]
+65	65	\N	[]
+\N	\N	\N	[]
+str_67	\N	str_67	[]
+[68,68,68,68,68,68,68,68,68]	\N	\N	[68,68,68,68,68,68,68,68,68]
+69	69	\N	[]
+\N	\N	\N	[]
+str_71	\N	str_71	[]
+[NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL]
+73	73	\N	[]
+\N	\N	\N	[]
+str_75	\N	str_75	[]
+[76,76,76,76,76,76,76]	\N	\N	[76,76,76,76,76,76,76]
+77	77	\N	[]
+\N	\N	\N	[]
+str_79	\N	str_79	[]
+0	\N	[]
+1	\N	[]
+2	\N	[]
+3	\N	[]
+4	\N	[]
+5	\N	[]
+6	\N	[]
+7	\N	[]
+8	\N	[]
+9	\N	[]
+\N	str_10	[]
+10	\N	[]
+10	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	str_11	[]
+\N	str_12	[]
+12	\N	[]
+12	\N	[]
+\N	str_13	[]
+13	\N	[]
+13	\N	[]
+\N	str_14	[]
+14	\N	[]
+14	\N	[]
+\N	str_15	[]
+15	\N	[]
+15	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	str_16	[]
+\N	str_17	[]
+17	\N	[]
+17	\N	[]
+\N	str_18	[]
+18	\N	[]
+18	\N	[]
+\N	str_19	[]
+19	\N	[]
+19	\N	[]
+\N	\N	[20]
+\N	\N	['str_21','str_21']
+\N	\N	[22,22,22]
+\N	\N	[23,23,23,23]
+\N	\N	[24,24,24,24,24]
+\N	\N	[25,25,25,25,25,25]
+\N	\N	[26,26,26,26,26,26,26]
+\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[28,28,28,28,28,28,28,28,28]
+\N	\N	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[40]
+41	\N	[]
+\N	\N	[]
+\N	str_43	[]
+\N	\N	[44,44,44,44,44]
+45	\N	[]
+\N	\N	[]
+\N	str_47	[]
+\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	\N	[]
+\N	\N	[]
+\N	str_51	[]
+\N	\N	[52,52,52]
+53	\N	[]
+\N	\N	[]
+\N	str_55	[]
+\N	\N	[56,56,56,56,56,56,56]
+57	\N	[]
+\N	\N	[]
+\N	str_59	[]
+\N	\N	[60]
+61	\N	[]
+\N	\N	[]
+\N	str_63	[]
+\N	\N	[64,64,64,64,64]
+65	\N	[]
+\N	\N	[]
+\N	str_67	[]
+\N	\N	[68,68,68,68,68,68,68,68,68]
+69	\N	[]
+\N	\N	[]
+\N	str_71	[]
+\N	\N	[NULL,NULL,NULL]
+73	\N	[]
+\N	\N	[]
+\N	str_75	[]
+\N	\N	[76,76,76,76,76,76,76]
+77	\N	[]
+\N	\N	[]
+\N	str_79	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+0	0	\N	[]	0	[]
+1	1	\N	[]	0	[]
+2	2	\N	[]	0	[]
+3	3	\N	[]	0	[]
+4	4	\N	[]	0	[]
+5	5	\N	[]	0	[]
+6	6	\N	[]	0	[]
+7	7	\N	[]	0	[]
+8	8	\N	[]	0	[]
+9	9	\N	[]	0	[]
+str_10	\N	\N	[]	0	[]
+10	10	\N	[]	0	[]
+10	10	\N	[]	0	[]
+[[0,1]]	\N	\N	[]	0	[]
+str_11	\N	\N	[]	0	[]
+str_11	\N	\N	[]	0	[]
+str_12	\N	\N	[]	0	[]
+12	12	\N	[]	0	[]
+12	12	\N	[]	0	[]
+str_13	\N	\N	[]	0	[]
+13	13	\N	[]	0	[]
+13	13	\N	[]	0	[]
+str_14	\N	\N	[]	0	[]
+14	14	\N	[]	0	[]
+14	14	\N	[]	0	[]
+str_15	\N	\N	[]	0	[]
+15	15	\N	[]	0	[]
+15	15	\N	[]	0	[]
+[[0,1,2,3,4,5,6]]	\N	\N	[]	0	[]
+str_16	\N	\N	[]	0	[]
+str_16	\N	\N	[]	0	[]
+str_17	\N	\N	[]	0	[]
+17	17	\N	[]	0	[]
+17	17	\N	[]	0	[]
+str_18	\N	\N	[]	0	[]
+18	18	\N	[]	0	[]
+18	18	\N	[]	0	[]
+str_19	\N	\N	[]	0	[]
+19	19	\N	[]	0	[]
+19	19	\N	[]	0	[]
+[20]	\N	\N	[20]	1	[20]
+['str_21','str_21']	\N	\N	['str_21','str_21']	2	[NULL,NULL]
+[22,22,22]	\N	\N	[22,22,22]	3	[22,22,22]
+[23,23,23,23]	\N	\N	[23,23,23,23]	4	[23,23,23,23]
+[24,24,24,24,24]	\N	\N	[24,24,24,24,24]	5	[24,24,24,24,24]
+[25,25,25,25,25,25]	\N	\N	[25,25,25,25,25,25]	6	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	\N	\N	[26,26,26,26,26,26,26]	7	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	8	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	\N	\N	[28,28,28,28,28,28,28,28,28]	9	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	\N	\N	[29,29,29,29,29,29,29,29,29,29]	10	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+[40]	\N	\N	[40]	1	[40]
+41	41	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_43	\N	\N	[]	0	[]
+[44,44,44,44,44]	\N	\N	[44,44,44,44,44]	5	[44,44,44,44,44]
+45	45	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_47	\N	\N	[]	0	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	9	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+49	49	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_51	\N	\N	[]	0	[]
+[52,52,52]	\N	\N	[52,52,52]	3	[52,52,52]
+53	53	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_55	\N	\N	[]	0	[]
+[56,56,56,56,56,56,56]	\N	\N	[56,56,56,56,56,56,56]	7	[56,56,56,56,56,56,56]
+57	57	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_59	\N	\N	[]	0	[]
+[60]	\N	\N	[60]	1	[60]
+61	61	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_63	\N	\N	[]	0	[]
+[64,64,64,64,64]	\N	\N	[64,64,64,64,64]	5	[64,64,64,64,64]
+65	65	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_67	\N	\N	[]	0	[]
+[68,68,68,68,68,68,68,68,68]	\N	\N	[68,68,68,68,68,68,68,68,68]	9	[68,68,68,68,68,68,68,68,68]
+69	69	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_71	\N	\N	[]	0	[]
+[NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL]	3	[NULL,NULL,NULL]
+73	73	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_75	\N	\N	[]	0	[]
+[76,76,76,76,76,76,76]	\N	\N	[76,76,76,76,76,76,76]	7	[76,76,76,76,76,76,76]
+77	77	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_79	\N	\N	[]	0	[]
+0	\N	[]	0	[]	[]
+1	\N	[]	0	[]	[]
+2	\N	[]	0	[]	[]
+3	\N	[]	0	[]	[]
+4	\N	[]	0	[]	[]
+5	\N	[]	0	[]	[]
+6	\N	[]	0	[]	[]
+7	\N	[]	0	[]	[]
+8	\N	[]	0	[]	[]
+9	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+10	\N	[]	0	[]	[]
+10	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+12	\N	[]	0	[]	[]
+12	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+13	\N	[]	0	[]	[]
+13	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+14	\N	[]	0	[]	[]
+14	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+15	\N	[]	0	[]	[]
+15	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+17	\N	[]	0	[]	[]
+17	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+18	\N	[]	0	[]	[]
+18	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+19	\N	[]	0	[]	[]
+19	\N	[]	0	[]	[]
+\N	\N	[20]	1	[20]	[NULL]
+\N	\N	['str_21','str_21']	2	[NULL,NULL]	['str_21','str_21']
+\N	\N	[22,22,22]	3	[22,22,22]	[NULL,NULL,NULL]
+\N	\N	[23,23,23,23]	4	[23,23,23,23]	[NULL,NULL,NULL,NULL]
+\N	\N	[24,24,24,24,24]	5	[24,24,24,24,24]	[NULL,NULL,NULL,NULL,NULL]
+\N	\N	[25,25,25,25,25,25]	6	[25,25,25,25,25,25]	[NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[26,26,26,26,26,26,26]	7	[26,26,26,26,26,26,26]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	8	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[28,28,28,28,28,28,28,28,28]	9	[28,28,28,28,28,28,28,28,28]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[29,29,29,29,29,29,29,29,29,29]	10	[29,29,29,29,29,29,29,29,29,29]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[40]	1	[40]	[NULL]
+41	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[44,44,44,44,44]	5	[44,44,44,44,44]	[NULL,NULL,NULL,NULL,NULL]
+45	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	9	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[52,52,52]	3	[52,52,52]	[NULL,NULL,NULL]
+53	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[56,56,56,56,56,56,56]	7	[56,56,56,56,56,56,56]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+57	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[60]	1	[60]	[NULL]
+61	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[64,64,64,64,64]	5	[64,64,64,64,64]	[NULL,NULL,NULL,NULL,NULL]
+65	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[68,68,68,68,68,68,68,68,68]	9	[68,68,68,68,68,68,68,68,68]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+69	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[NULL,NULL,NULL]	3	[NULL,NULL,NULL]	[NULL,NULL,NULL]
+73	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[76,76,76,76,76,76,76]	7	[76,76,76,76,76,76,76]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+77	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+0	0	[]	[]
+1	0	[]	[]
+2	0	[]	[]
+3	0	[]	[]
+4	0	[]	[]
+5	0	[]	[]
+6	0	[]	[]
+7	0	[]	[]
+8	0	[]	[]
+9	0	[]	[]
+str_10	0	[]	[]
+10	0	[]	[]
+10	0	[]	[]
+[[0,1]]	0	[]	[]
+str_11	0	[]	[]
+str_11	0	[]	[]
+str_12	0	[]	[]
+12	0	[]	[]
+12	0	[]	[]
+str_13	0	[]	[]
+13	0	[]	[]
+13	0	[]	[]
+str_14	0	[]	[]
+14	0	[]	[]
+14	0	[]	[]
+str_15	0	[]	[]
+15	0	[]	[]
+15	0	[]	[]
+[[0,1,2,3,4,5,6]]	0	[]	[]
+str_16	0	[]	[]
+str_16	0	[]	[]
+str_17	0	[]	[]
+17	0	[]	[]
+17	0	[]	[]
+str_18	0	[]	[]
+18	0	[]	[]
+18	0	[]	[]
+str_19	0	[]	[]
+19	0	[]	[]
+19	0	[]	[]
+[20]	0	[]	[20]
+['str_21','str_21']	0	[]	[NULL,NULL]
+[22,22,22]	0	[]	[22,22,22]
+[23,23,23,23]	0	[]	[23,23,23,23]
+[24,24,24,24,24]	0	[]	[24,24,24,24,24]
+[25,25,25,25,25,25]	0	[]	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	0	[]	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	0	[]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	0	[]	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	0	[]	[29,29,29,29,29,29,29,29,29,29]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+[40]	0	[]	[40]
+41	0	[]	[]
+\N	0	[]	[]
+str_43	0	[]	[]
+[44,44,44,44,44]	0	[]	[44,44,44,44,44]
+45	0	[]	[]
+\N	0	[]	[]
+str_47	0	[]	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	0	[]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+49	0	[]	[]
+\N	0	[]	[]
+str_51	0	[]	[]
+[52,52,52]	0	[]	[52,52,52]
+53	0	[]	[]
+\N	0	[]	[]
+str_55	0	[]	[]
+[56,56,56,56,56,56,56]	0	[]	[56,56,56,56,56,56,56]
+57	0	[]	[]
+\N	0	[]	[]
+str_59	0	[]	[]
+[60]	0	[]	[60]
+61	0	[]	[]
+\N	0	[]	[]
+str_63	0	[]	[]
+[64,64,64,64,64]	0	[]	[64,64,64,64,64]
+65	0	[]	[]
+\N	0	[]	[]
+str_67	0	[]	[]
+[68,68,68,68,68,68,68,68,68]	0	[]	[68,68,68,68,68,68,68,68,68]
+69	0	[]	[]
+\N	0	[]	[]
+str_71	0	[]	[]
+[NULL,NULL,NULL]	0	[]	[NULL,NULL,NULL]
+73	0	[]	[]
+\N	0	[]	[]
+str_75	0	[]	[]
+[76,76,76,76,76,76,76]	0	[]	[76,76,76,76,76,76,76]
+77	0	[]	[]
+\N	0	[]	[]
+str_79	0	[]	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[20]
+[]	0	[NULL,NULL]
+[]	0	[22,22,22]
+[]	0	[23,23,23,23]
+[]	0	[24,24,24,24,24]
+[]	0	[25,25,25,25,25,25]
+[]	0	[26,26,26,26,26,26,26]
+[]	0	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[]	0	[28,28,28,28,28,28,28,28,28]
+[]	0	[29,29,29,29,29,29,29,29,29,29]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[40]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[44,44,44,44,44]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[52,52,52]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[56,56,56,56,56,56,56]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[60]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[64,64,64,64,64]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[68,68,68,68,68,68,68,68,68]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[NULL,NULL,NULL]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[76,76,76,76,76,76,76]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[2]	[[0,1]]	[[[],[]]]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[7]	[[0,1,2,3,4,5,6]]	[[[],[],[],[],[],[],[]]]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+LowCardinality(String)
+None
+String
+UInt64
+36
+36
+20
+20
+0
+0
+2
+2
+20
+20
+2
+2
+20
+0
+0	0	\N	[]
+1	1	\N	[]
+2	2	\N	[]
+3	3	\N	[]
+4	4	\N	[]
+5	5	\N	[]
+6	6	\N	[]
+7	7	\N	[]
+8	8	\N	[]
+9	9	\N	[]
+str_10	\N	str_10	[]
+10	10	\N	[]
+10	10	\N	[]
+[[0,1]]	\N	\N	[]
+str_11	\N	\N	[]
+str_11	\N	str_11	[]
+str_12	\N	str_12	[]
+12	12	\N	[]
+12	12	\N	[]
+str_13	\N	str_13	[]
+13	13	\N	[]
+13	13	\N	[]
+str_14	\N	str_14	[]
+14	14	\N	[]
+14	14	\N	[]
+str_15	\N	str_15	[]
+15	15	\N	[]
+15	15	\N	[]
+[[0,1,2,3,4,5,6]]	\N	\N	[]
+str_16	\N	\N	[]
+str_16	\N	str_16	[]
+str_17	\N	str_17	[]
+17	17	\N	[]
+17	17	\N	[]
+str_18	\N	str_18	[]
+18	18	\N	[]
+18	18	\N	[]
+str_19	\N	str_19	[]
+19	19	\N	[]
+19	19	\N	[]
+[20]	\N	\N	[20]
+['str_21','str_21']	\N	\N	['str_21','str_21']
+[22,22,22]	\N	\N	[22,22,22]
+[23,23,23,23]	\N	\N	[23,23,23,23]
+[24,24,24,24,24]	\N	\N	[24,24,24,24,24]
+[25,25,25,25,25,25]	\N	\N	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	\N	\N	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	\N	\N	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	\N	\N	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+[40]	\N	\N	[40]
+41	41	\N	[]
+\N	\N	\N	[]
+str_43	\N	str_43	[]
+[44,44,44,44,44]	\N	\N	[44,44,44,44,44]
+45	45	\N	[]
+\N	\N	\N	[]
+str_47	\N	str_47	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	49	\N	[]
+\N	\N	\N	[]
+str_51	\N	str_51	[]
+[52,52,52]	\N	\N	[52,52,52]
+53	53	\N	[]
+\N	\N	\N	[]
+str_55	\N	str_55	[]
+[56,56,56,56,56,56,56]	\N	\N	[56,56,56,56,56,56,56]
+57	57	\N	[]
+\N	\N	\N	[]
+str_59	\N	str_59	[]
+[60]	\N	\N	[60]
+61	61	\N	[]
+\N	\N	\N	[]
+str_63	\N	str_63	[]
+[64,64,64,64,64]	\N	\N	[64,64,64,64,64]
+65	65	\N	[]
+\N	\N	\N	[]
+str_67	\N	str_67	[]
+[68,68,68,68,68,68,68,68,68]	\N	\N	[68,68,68,68,68,68,68,68,68]
+69	69	\N	[]
+\N	\N	\N	[]
+str_71	\N	str_71	[]
+[NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL]
+73	73	\N	[]
+\N	\N	\N	[]
+str_75	\N	str_75	[]
+[76,76,76,76,76,76,76]	\N	\N	[76,76,76,76,76,76,76]
+77	77	\N	[]
+\N	\N	\N	[]
+str_79	\N	str_79	[]
+0	\N	[]
+1	\N	[]
+2	\N	[]
+3	\N	[]
+4	\N	[]
+5	\N	[]
+6	\N	[]
+7	\N	[]
+8	\N	[]
+9	\N	[]
+\N	str_10	[]
+10	\N	[]
+10	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	str_11	[]
+\N	str_12	[]
+12	\N	[]
+12	\N	[]
+\N	str_13	[]
+13	\N	[]
+13	\N	[]
+\N	str_14	[]
+14	\N	[]
+14	\N	[]
+\N	str_15	[]
+15	\N	[]
+15	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	str_16	[]
+\N	str_17	[]
+17	\N	[]
+17	\N	[]
+\N	str_18	[]
+18	\N	[]
+18	\N	[]
+\N	str_19	[]
+19	\N	[]
+19	\N	[]
+\N	\N	[20]
+\N	\N	['str_21','str_21']
+\N	\N	[22,22,22]
+\N	\N	[23,23,23,23]
+\N	\N	[24,24,24,24,24]
+\N	\N	[25,25,25,25,25,25]
+\N	\N	[26,26,26,26,26,26,26]
+\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[28,28,28,28,28,28,28,28,28]
+\N	\N	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[40]
+41	\N	[]
+\N	\N	[]
+\N	str_43	[]
+\N	\N	[44,44,44,44,44]
+45	\N	[]
+\N	\N	[]
+\N	str_47	[]
+\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	\N	[]
+\N	\N	[]
+\N	str_51	[]
+\N	\N	[52,52,52]
+53	\N	[]
+\N	\N	[]
+\N	str_55	[]
+\N	\N	[56,56,56,56,56,56,56]
+57	\N	[]
+\N	\N	[]
+\N	str_59	[]
+\N	\N	[60]
+61	\N	[]
+\N	\N	[]
+\N	str_63	[]
+\N	\N	[64,64,64,64,64]
+65	\N	[]
+\N	\N	[]
+\N	str_67	[]
+\N	\N	[68,68,68,68,68,68,68,68,68]
+69	\N	[]
+\N	\N	[]
+\N	str_71	[]
+\N	\N	[NULL,NULL,NULL]
+73	\N	[]
+\N	\N	[]
+\N	str_75	[]
+\N	\N	[76,76,76,76,76,76,76]
+77	\N	[]
+\N	\N	[]
+\N	str_79	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+0	0	\N	[]	0	[]
+1	1	\N	[]	0	[]
+2	2	\N	[]	0	[]
+3	3	\N	[]	0	[]
+4	4	\N	[]	0	[]
+5	5	\N	[]	0	[]
+6	6	\N	[]	0	[]
+7	7	\N	[]	0	[]
+8	8	\N	[]	0	[]
+9	9	\N	[]	0	[]
+str_10	\N	\N	[]	0	[]
+10	10	\N	[]	0	[]
+10	10	\N	[]	0	[]
+[[0,1]]	\N	\N	[]	0	[]
+str_11	\N	\N	[]	0	[]
+str_11	\N	\N	[]	0	[]
+str_12	\N	\N	[]	0	[]
+12	12	\N	[]	0	[]
+12	12	\N	[]	0	[]
+str_13	\N	\N	[]	0	[]
+13	13	\N	[]	0	[]
+13	13	\N	[]	0	[]
+str_14	\N	\N	[]	0	[]
+14	14	\N	[]	0	[]
+14	14	\N	[]	0	[]
+str_15	\N	\N	[]	0	[]
+15	15	\N	[]	0	[]
+15	15	\N	[]	0	[]
+[[0,1,2,3,4,5,6]]	\N	\N	[]	0	[]
+str_16	\N	\N	[]	0	[]
+str_16	\N	\N	[]	0	[]
+str_17	\N	\N	[]	0	[]
+17	17	\N	[]	0	[]
+17	17	\N	[]	0	[]
+str_18	\N	\N	[]	0	[]
+18	18	\N	[]	0	[]
+18	18	\N	[]	0	[]
+str_19	\N	\N	[]	0	[]
+19	19	\N	[]	0	[]
+19	19	\N	[]	0	[]
+[20]	\N	\N	[20]	1	[20]
+['str_21','str_21']	\N	\N	['str_21','str_21']	2	[NULL,NULL]
+[22,22,22]	\N	\N	[22,22,22]	3	[22,22,22]
+[23,23,23,23]	\N	\N	[23,23,23,23]	4	[23,23,23,23]
+[24,24,24,24,24]	\N	\N	[24,24,24,24,24]	5	[24,24,24,24,24]
+[25,25,25,25,25,25]	\N	\N	[25,25,25,25,25,25]	6	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	\N	\N	[26,26,26,26,26,26,26]	7	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	8	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	\N	\N	[28,28,28,28,28,28,28,28,28]	9	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	\N	\N	[29,29,29,29,29,29,29,29,29,29]	10	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+[40]	\N	\N	[40]	1	[40]
+41	41	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_43	\N	\N	[]	0	[]
+[44,44,44,44,44]	\N	\N	[44,44,44,44,44]	5	[44,44,44,44,44]
+45	45	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_47	\N	\N	[]	0	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	9	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+49	49	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_51	\N	\N	[]	0	[]
+[52,52,52]	\N	\N	[52,52,52]	3	[52,52,52]
+53	53	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_55	\N	\N	[]	0	[]
+[56,56,56,56,56,56,56]	\N	\N	[56,56,56,56,56,56,56]	7	[56,56,56,56,56,56,56]
+57	57	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_59	\N	\N	[]	0	[]
+[60]	\N	\N	[60]	1	[60]
+61	61	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_63	\N	\N	[]	0	[]
+[64,64,64,64,64]	\N	\N	[64,64,64,64,64]	5	[64,64,64,64,64]
+65	65	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_67	\N	\N	[]	0	[]
+[68,68,68,68,68,68,68,68,68]	\N	\N	[68,68,68,68,68,68,68,68,68]	9	[68,68,68,68,68,68,68,68,68]
+69	69	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_71	\N	\N	[]	0	[]
+[NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL]	3	[NULL,NULL,NULL]
+73	73	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_75	\N	\N	[]	0	[]
+[76,76,76,76,76,76,76]	\N	\N	[76,76,76,76,76,76,76]	7	[76,76,76,76,76,76,76]
+77	77	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_79	\N	\N	[]	0	[]
+0	\N	[]	0	[]	[]
+1	\N	[]	0	[]	[]
+2	\N	[]	0	[]	[]
+3	\N	[]	0	[]	[]
+4	\N	[]	0	[]	[]
+5	\N	[]	0	[]	[]
+6	\N	[]	0	[]	[]
+7	\N	[]	0	[]	[]
+8	\N	[]	0	[]	[]
+9	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+10	\N	[]	0	[]	[]
+10	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+12	\N	[]	0	[]	[]
+12	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+13	\N	[]	0	[]	[]
+13	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+14	\N	[]	0	[]	[]
+14	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+15	\N	[]	0	[]	[]
+15	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+17	\N	[]	0	[]	[]
+17	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+18	\N	[]	0	[]	[]
+18	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+19	\N	[]	0	[]	[]
+19	\N	[]	0	[]	[]
+\N	\N	[20]	1	[20]	[NULL]
+\N	\N	['str_21','str_21']	2	[NULL,NULL]	['str_21','str_21']
+\N	\N	[22,22,22]	3	[22,22,22]	[NULL,NULL,NULL]
+\N	\N	[23,23,23,23]	4	[23,23,23,23]	[NULL,NULL,NULL,NULL]
+\N	\N	[24,24,24,24,24]	5	[24,24,24,24,24]	[NULL,NULL,NULL,NULL,NULL]
+\N	\N	[25,25,25,25,25,25]	6	[25,25,25,25,25,25]	[NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[26,26,26,26,26,26,26]	7	[26,26,26,26,26,26,26]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	8	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[28,28,28,28,28,28,28,28,28]	9	[28,28,28,28,28,28,28,28,28]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[29,29,29,29,29,29,29,29,29,29]	10	[29,29,29,29,29,29,29,29,29,29]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[40]	1	[40]	[NULL]
+41	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[44,44,44,44,44]	5	[44,44,44,44,44]	[NULL,NULL,NULL,NULL,NULL]
+45	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	9	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[52,52,52]	3	[52,52,52]	[NULL,NULL,NULL]
+53	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[56,56,56,56,56,56,56]	7	[56,56,56,56,56,56,56]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+57	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[60]	1	[60]	[NULL]
+61	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[64,64,64,64,64]	5	[64,64,64,64,64]	[NULL,NULL,NULL,NULL,NULL]
+65	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[68,68,68,68,68,68,68,68,68]	9	[68,68,68,68,68,68,68,68,68]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+69	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[NULL,NULL,NULL]	3	[NULL,NULL,NULL]	[NULL,NULL,NULL]
+73	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[76,76,76,76,76,76,76]	7	[76,76,76,76,76,76,76]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+77	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+0	0	[]	[]
+1	0	[]	[]
+2	0	[]	[]
+3	0	[]	[]
+4	0	[]	[]
+5	0	[]	[]
+6	0	[]	[]
+7	0	[]	[]
+8	0	[]	[]
+9	0	[]	[]
+str_10	0	[]	[]
+10	0	[]	[]
+10	0	[]	[]
+[[0,1]]	0	[]	[]
+str_11	0	[]	[]
+str_11	0	[]	[]
+str_12	0	[]	[]
+12	0	[]	[]
+12	0	[]	[]
+str_13	0	[]	[]
+13	0	[]	[]
+13	0	[]	[]
+str_14	0	[]	[]
+14	0	[]	[]
+14	0	[]	[]
+str_15	0	[]	[]
+15	0	[]	[]
+15	0	[]	[]
+[[0,1,2,3,4,5,6]]	0	[]	[]
+str_16	0	[]	[]
+str_16	0	[]	[]
+str_17	0	[]	[]
+17	0	[]	[]
+17	0	[]	[]
+str_18	0	[]	[]
+18	0	[]	[]
+18	0	[]	[]
+str_19	0	[]	[]
+19	0	[]	[]
+19	0	[]	[]
+[20]	0	[]	[20]
+['str_21','str_21']	0	[]	[NULL,NULL]
+[22,22,22]	0	[]	[22,22,22]
+[23,23,23,23]	0	[]	[23,23,23,23]
+[24,24,24,24,24]	0	[]	[24,24,24,24,24]
+[25,25,25,25,25,25]	0	[]	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	0	[]	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	0	[]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	0	[]	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	0	[]	[29,29,29,29,29,29,29,29,29,29]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+[40]	0	[]	[40]
+41	0	[]	[]
+\N	0	[]	[]
+str_43	0	[]	[]
+[44,44,44,44,44]	0	[]	[44,44,44,44,44]
+45	0	[]	[]
+\N	0	[]	[]
+str_47	0	[]	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	0	[]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+49	0	[]	[]
+\N	0	[]	[]
+str_51	0	[]	[]
+[52,52,52]	0	[]	[52,52,52]
+53	0	[]	[]
+\N	0	[]	[]
+str_55	0	[]	[]
+[56,56,56,56,56,56,56]	0	[]	[56,56,56,56,56,56,56]
+57	0	[]	[]
+\N	0	[]	[]
+str_59	0	[]	[]
+[60]	0	[]	[60]
+61	0	[]	[]
+\N	0	[]	[]
+str_63	0	[]	[]
+[64,64,64,64,64]	0	[]	[64,64,64,64,64]
+65	0	[]	[]
+\N	0	[]	[]
+str_67	0	[]	[]
+[68,68,68,68,68,68,68,68,68]	0	[]	[68,68,68,68,68,68,68,68,68]
+69	0	[]	[]
+\N	0	[]	[]
+str_71	0	[]	[]
+[NULL,NULL,NULL]	0	[]	[NULL,NULL,NULL]
+73	0	[]	[]
+\N	0	[]	[]
+str_75	0	[]	[]
+[76,76,76,76,76,76,76]	0	[]	[76,76,76,76,76,76,76]
+77	0	[]	[]
+\N	0	[]	[]
+str_79	0	[]	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[20]
+[]	0	[NULL,NULL]
+[]	0	[22,22,22]
+[]	0	[23,23,23,23]
+[]	0	[24,24,24,24,24]
+[]	0	[25,25,25,25,25,25]
+[]	0	[26,26,26,26,26,26,26]
+[]	0	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[]	0	[28,28,28,28,28,28,28,28,28]
+[]	0	[29,29,29,29,29,29,29,29,29,29]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[40]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[44,44,44,44,44]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[52,52,52]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[56,56,56,56,56,56,56]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[60]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[64,64,64,64,64]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[68,68,68,68,68,68,68,68,68]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[NULL,NULL,NULL]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[76,76,76,76,76,76,76]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[2]	[[0,1]]	[[[],[]]]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[7]	[[0,1,2,3,4,5,6]]	[[[],[],[],[],[],[],[]]]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+LowCardinality(String)
+None
+String
+UInt64
+36
+36
+20
+20
+0
+0
+2
+2
+20
+20
+2
+2
+20
+0
+0	0	\N	[]
+1	1	\N	[]
+2	2	\N	[]
+3	3	\N	[]
+4	4	\N	[]
+5	5	\N	[]
+6	6	\N	[]
+7	7	\N	[]
+8	8	\N	[]
+9	9	\N	[]
+str_10	\N	str_10	[]
+10	10	\N	[]
+10	10	\N	[]
+[[0,1]]	\N	\N	[]
+str_11	\N	\N	[]
+str_11	\N	str_11	[]
+str_12	\N	str_12	[]
+12	12	\N	[]
+12	12	\N	[]
+str_13	\N	str_13	[]
+13	13	\N	[]
+13	13	\N	[]
+str_14	\N	str_14	[]
+14	14	\N	[]
+14	14	\N	[]
+str_15	\N	str_15	[]
+15	15	\N	[]
+15	15	\N	[]
+[[0,1,2,3,4,5,6]]	\N	\N	[]
+str_16	\N	\N	[]
+str_16	\N	str_16	[]
+str_17	\N	str_17	[]
+17	17	\N	[]
+17	17	\N	[]
+str_18	\N	str_18	[]
+18	18	\N	[]
+18	18	\N	[]
+str_19	\N	str_19	[]
+19	19	\N	[]
+19	19	\N	[]
+[20]	\N	\N	[20]
+['str_21','str_21']	\N	\N	['str_21','str_21']
+[22,22,22]	\N	\N	[22,22,22]
+[23,23,23,23]	\N	\N	[23,23,23,23]
+[24,24,24,24,24]	\N	\N	[24,24,24,24,24]
+[25,25,25,25,25,25]	\N	\N	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	\N	\N	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	\N	\N	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	\N	\N	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+\N	\N	\N	[]
+[40]	\N	\N	[40]
+41	41	\N	[]
+\N	\N	\N	[]
+str_43	\N	str_43	[]
+[44,44,44,44,44]	\N	\N	[44,44,44,44,44]
+45	45	\N	[]
+\N	\N	\N	[]
+str_47	\N	str_47	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	49	\N	[]
+\N	\N	\N	[]
+str_51	\N	str_51	[]
+[52,52,52]	\N	\N	[52,52,52]
+53	53	\N	[]
+\N	\N	\N	[]
+str_55	\N	str_55	[]
+[56,56,56,56,56,56,56]	\N	\N	[56,56,56,56,56,56,56]
+57	57	\N	[]
+\N	\N	\N	[]
+str_59	\N	str_59	[]
+[60]	\N	\N	[60]
+61	61	\N	[]
+\N	\N	\N	[]
+str_63	\N	str_63	[]
+[64,64,64,64,64]	\N	\N	[64,64,64,64,64]
+65	65	\N	[]
+\N	\N	\N	[]
+str_67	\N	str_67	[]
+[68,68,68,68,68,68,68,68,68]	\N	\N	[68,68,68,68,68,68,68,68,68]
+69	69	\N	[]
+\N	\N	\N	[]
+str_71	\N	str_71	[]
+[NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL]
+73	73	\N	[]
+\N	\N	\N	[]
+str_75	\N	str_75	[]
+[76,76,76,76,76,76,76]	\N	\N	[76,76,76,76,76,76,76]
+77	77	\N	[]
+\N	\N	\N	[]
+str_79	\N	str_79	[]
+0	\N	[]
+1	\N	[]
+2	\N	[]
+3	\N	[]
+4	\N	[]
+5	\N	[]
+6	\N	[]
+7	\N	[]
+8	\N	[]
+9	\N	[]
+\N	str_10	[]
+10	\N	[]
+10	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	str_11	[]
+\N	str_12	[]
+12	\N	[]
+12	\N	[]
+\N	str_13	[]
+13	\N	[]
+13	\N	[]
+\N	str_14	[]
+14	\N	[]
+14	\N	[]
+\N	str_15	[]
+15	\N	[]
+15	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	str_16	[]
+\N	str_17	[]
+17	\N	[]
+17	\N	[]
+\N	str_18	[]
+18	\N	[]
+18	\N	[]
+\N	str_19	[]
+19	\N	[]
+19	\N	[]
+\N	\N	[20]
+\N	\N	['str_21','str_21']
+\N	\N	[22,22,22]
+\N	\N	[23,23,23,23]
+\N	\N	[24,24,24,24,24]
+\N	\N	[25,25,25,25,25,25]
+\N	\N	[26,26,26,26,26,26,26]
+\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[28,28,28,28,28,28,28,28,28]
+\N	\N	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[40]
+41	\N	[]
+\N	\N	[]
+\N	str_43	[]
+\N	\N	[44,44,44,44,44]
+45	\N	[]
+\N	\N	[]
+\N	str_47	[]
+\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	\N	[]
+\N	\N	[]
+\N	str_51	[]
+\N	\N	[52,52,52]
+53	\N	[]
+\N	\N	[]
+\N	str_55	[]
+\N	\N	[56,56,56,56,56,56,56]
+57	\N	[]
+\N	\N	[]
+\N	str_59	[]
+\N	\N	[60]
+61	\N	[]
+\N	\N	[]
+\N	str_63	[]
+\N	\N	[64,64,64,64,64]
+65	\N	[]
+\N	\N	[]
+\N	str_67	[]
+\N	\N	[68,68,68,68,68,68,68,68,68]
+69	\N	[]
+\N	\N	[]
+\N	str_71	[]
+\N	\N	[NULL,NULL,NULL]
+73	\N	[]
+\N	\N	[]
+\N	str_75	[]
+\N	\N	[76,76,76,76,76,76,76]
+77	\N	[]
+\N	\N	[]
+\N	str_79	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+\N	\N	[]
+0	0	\N	[]	0	[]
+1	1	\N	[]	0	[]
+2	2	\N	[]	0	[]
+3	3	\N	[]	0	[]
+4	4	\N	[]	0	[]
+5	5	\N	[]	0	[]
+6	6	\N	[]	0	[]
+7	7	\N	[]	0	[]
+8	8	\N	[]	0	[]
+9	9	\N	[]	0	[]
+str_10	\N	\N	[]	0	[]
+10	10	\N	[]	0	[]
+10	10	\N	[]	0	[]
+[[0,1]]	\N	\N	[]	0	[]
+str_11	\N	\N	[]	0	[]
+str_11	\N	\N	[]	0	[]
+str_12	\N	\N	[]	0	[]
+12	12	\N	[]	0	[]
+12	12	\N	[]	0	[]
+str_13	\N	\N	[]	0	[]
+13	13	\N	[]	0	[]
+13	13	\N	[]	0	[]
+str_14	\N	\N	[]	0	[]
+14	14	\N	[]	0	[]
+14	14	\N	[]	0	[]
+str_15	\N	\N	[]	0	[]
+15	15	\N	[]	0	[]
+15	15	\N	[]	0	[]
+[[0,1,2,3,4,5,6]]	\N	\N	[]	0	[]
+str_16	\N	\N	[]	0	[]
+str_16	\N	\N	[]	0	[]
+str_17	\N	\N	[]	0	[]
+17	17	\N	[]	0	[]
+17	17	\N	[]	0	[]
+str_18	\N	\N	[]	0	[]
+18	18	\N	[]	0	[]
+18	18	\N	[]	0	[]
+str_19	\N	\N	[]	0	[]
+19	19	\N	[]	0	[]
+19	19	\N	[]	0	[]
+[20]	\N	\N	[20]	1	[20]
+['str_21','str_21']	\N	\N	['str_21','str_21']	2	[NULL,NULL]
+[22,22,22]	\N	\N	[22,22,22]	3	[22,22,22]
+[23,23,23,23]	\N	\N	[23,23,23,23]	4	[23,23,23,23]
+[24,24,24,24,24]	\N	\N	[24,24,24,24,24]	5	[24,24,24,24,24]
+[25,25,25,25,25,25]	\N	\N	[25,25,25,25,25,25]	6	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	\N	\N	[26,26,26,26,26,26,26]	7	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	8	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	\N	\N	[28,28,28,28,28,28,28,28,28]	9	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	\N	\N	[29,29,29,29,29,29,29,29,29,29]	10	[29,29,29,29,29,29,29,29,29,29]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+[40]	\N	\N	[40]	1	[40]
+41	41	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_43	\N	\N	[]	0	[]
+[44,44,44,44,44]	\N	\N	[44,44,44,44,44]	5	[44,44,44,44,44]
+45	45	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_47	\N	\N	[]	0	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	9	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+49	49	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_51	\N	\N	[]	0	[]
+[52,52,52]	\N	\N	[52,52,52]	3	[52,52,52]
+53	53	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_55	\N	\N	[]	0	[]
+[56,56,56,56,56,56,56]	\N	\N	[56,56,56,56,56,56,56]	7	[56,56,56,56,56,56,56]
+57	57	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_59	\N	\N	[]	0	[]
+[60]	\N	\N	[60]	1	[60]
+61	61	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_63	\N	\N	[]	0	[]
+[64,64,64,64,64]	\N	\N	[64,64,64,64,64]	5	[64,64,64,64,64]
+65	65	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_67	\N	\N	[]	0	[]
+[68,68,68,68,68,68,68,68,68]	\N	\N	[68,68,68,68,68,68,68,68,68]	9	[68,68,68,68,68,68,68,68,68]
+69	69	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_71	\N	\N	[]	0	[]
+[NULL,NULL,NULL]	\N	\N	[NULL,NULL,NULL]	3	[NULL,NULL,NULL]
+73	73	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_75	\N	\N	[]	0	[]
+[76,76,76,76,76,76,76]	\N	\N	[76,76,76,76,76,76,76]	7	[76,76,76,76,76,76,76]
+77	77	\N	[]	0	[]
+\N	\N	\N	[]	0	[]
+str_79	\N	\N	[]	0	[]
+0	\N	[]	0	[]	[]
+1	\N	[]	0	[]	[]
+2	\N	[]	0	[]	[]
+3	\N	[]	0	[]	[]
+4	\N	[]	0	[]	[]
+5	\N	[]	0	[]	[]
+6	\N	[]	0	[]	[]
+7	\N	[]	0	[]	[]
+8	\N	[]	0	[]	[]
+9	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+10	\N	[]	0	[]	[]
+10	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+12	\N	[]	0	[]	[]
+12	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+13	\N	[]	0	[]	[]
+13	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+14	\N	[]	0	[]	[]
+14	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+15	\N	[]	0	[]	[]
+15	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+17	\N	[]	0	[]	[]
+17	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+18	\N	[]	0	[]	[]
+18	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+19	\N	[]	0	[]	[]
+19	\N	[]	0	[]	[]
+\N	\N	[20]	1	[20]	[NULL]
+\N	\N	['str_21','str_21']	2	[NULL,NULL]	['str_21','str_21']
+\N	\N	[22,22,22]	3	[22,22,22]	[NULL,NULL,NULL]
+\N	\N	[23,23,23,23]	4	[23,23,23,23]	[NULL,NULL,NULL,NULL]
+\N	\N	[24,24,24,24,24]	5	[24,24,24,24,24]	[NULL,NULL,NULL,NULL,NULL]
+\N	\N	[25,25,25,25,25,25]	6	[25,25,25,25,25,25]	[NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[26,26,26,26,26,26,26]	7	[26,26,26,26,26,26,26]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	8	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[28,28,28,28,28,28,28,28,28]	9	[28,28,28,28,28,28,28,28,28]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[29,29,29,29,29,29,29,29,29,29]	10	[29,29,29,29,29,29,29,29,29,29]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[40]	1	[40]	[NULL]
+41	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[44,44,44,44,44]	5	[44,44,44,44,44]	[NULL,NULL,NULL,NULL,NULL]
+45	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	9	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']
+49	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[52,52,52]	3	[52,52,52]	[NULL,NULL,NULL]
+53	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[56,56,56,56,56,56,56]	7	[56,56,56,56,56,56,56]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+57	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[60]	1	[60]	[NULL]
+61	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[64,64,64,64,64]	5	[64,64,64,64,64]	[NULL,NULL,NULL,NULL,NULL]
+65	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[68,68,68,68,68,68,68,68,68]	9	[68,68,68,68,68,68,68,68,68]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+69	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[NULL,NULL,NULL]	3	[NULL,NULL,NULL]	[NULL,NULL,NULL]
+73	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[76,76,76,76,76,76,76]	7	[76,76,76,76,76,76,76]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+77	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+\N	\N	[]	0	[]	[]
+0	0	[]	[]
+1	0	[]	[]
+2	0	[]	[]
+3	0	[]	[]
+4	0	[]	[]
+5	0	[]	[]
+6	0	[]	[]
+7	0	[]	[]
+8	0	[]	[]
+9	0	[]	[]
+str_10	0	[]	[]
+10	0	[]	[]
+10	0	[]	[]
+[[0,1]]	0	[]	[]
+str_11	0	[]	[]
+str_11	0	[]	[]
+str_12	0	[]	[]
+12	0	[]	[]
+12	0	[]	[]
+str_13	0	[]	[]
+13	0	[]	[]
+13	0	[]	[]
+str_14	0	[]	[]
+14	0	[]	[]
+14	0	[]	[]
+str_15	0	[]	[]
+15	0	[]	[]
+15	0	[]	[]
+[[0,1,2,3,4,5,6]]	0	[]	[]
+str_16	0	[]	[]
+str_16	0	[]	[]
+str_17	0	[]	[]
+17	0	[]	[]
+17	0	[]	[]
+str_18	0	[]	[]
+18	0	[]	[]
+18	0	[]	[]
+str_19	0	[]	[]
+19	0	[]	[]
+19	0	[]	[]
+[20]	0	[]	[20]
+['str_21','str_21']	0	[]	[NULL,NULL]
+[22,22,22]	0	[]	[22,22,22]
+[23,23,23,23]	0	[]	[23,23,23,23]
+[24,24,24,24,24]	0	[]	[24,24,24,24,24]
+[25,25,25,25,25,25]	0	[]	[25,25,25,25,25,25]
+[26,26,26,26,26,26,26]	0	[]	[26,26,26,26,26,26,26]
+[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]	0	[]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[28,28,28,28,28,28,28,28,28]	0	[]	[28,28,28,28,28,28,28,28,28]
+[29,29,29,29,29,29,29,29,29,29]	0	[]	[29,29,29,29,29,29,29,29,29,29]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+\N	0	[]	[]
+[40]	0	[]	[40]
+41	0	[]	[]
+\N	0	[]	[]
+str_43	0	[]	[]
+[44,44,44,44,44]	0	[]	[44,44,44,44,44]
+45	0	[]	[]
+\N	0	[]	[]
+str_47	0	[]	[]
+['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48']	0	[]	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+49	0	[]	[]
+\N	0	[]	[]
+str_51	0	[]	[]
+[52,52,52]	0	[]	[52,52,52]
+53	0	[]	[]
+\N	0	[]	[]
+str_55	0	[]	[]
+[56,56,56,56,56,56,56]	0	[]	[56,56,56,56,56,56,56]
+57	0	[]	[]
+\N	0	[]	[]
+str_59	0	[]	[]
+[60]	0	[]	[60]
+61	0	[]	[]
+\N	0	[]	[]
+str_63	0	[]	[]
+[64,64,64,64,64]	0	[]	[64,64,64,64,64]
+65	0	[]	[]
+\N	0	[]	[]
+str_67	0	[]	[]
+[68,68,68,68,68,68,68,68,68]	0	[]	[68,68,68,68,68,68,68,68,68]
+69	0	[]	[]
+\N	0	[]	[]
+str_71	0	[]	[]
+[NULL,NULL,NULL]	0	[]	[NULL,NULL,NULL]
+73	0	[]	[]
+\N	0	[]	[]
+str_75	0	[]	[]
+[76,76,76,76,76,76,76]	0	[]	[76,76,76,76,76,76,76]
+77	0	[]	[]
+\N	0	[]	[]
+str_79	0	[]	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[20]
+[]	0	[NULL,NULL]
+[]	0	[22,22,22]
+[]	0	[23,23,23,23]
+[]	0	[24,24,24,24,24]
+[]	0	[25,25,25,25,25,25]
+[]	0	[26,26,26,26,26,26,26]
+[]	0	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[]	0	[28,28,28,28,28,28,28,28,28]
+[]	0	[29,29,29,29,29,29,29,29,29,29]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[40]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[44,44,44,44,44]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[52,52,52]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[56,56,56,56,56,56,56]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[60]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[64,64,64,64,64]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[68,68,68,68,68,68,68,68,68]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[NULL,NULL,NULL]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	0	[76,76,76,76,76,76,76]
+[]	0	[]
+[]	0	[]
+[]	0	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[2]	[[0,1]]	[[[],[]]]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[7]	[[0,1,2,3,4,5,6]]	[[[],[],[],[],[],[],[]]]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
+[]	[]	[]
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2 b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2
new file mode 100644
index 00000000000..0c123d5f6fe
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2
@@ -0,0 +1,46 @@
+set allow_experimental_variant_type = 1;
+set use_variant_as_common_type = 1;
+set allow_experimental_dynamic_type = 1;
+
+drop table if exists test;
+
+{% for engine in ['Memory', 'MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1'] -%}
+
+create table test (id UInt64, d Dynamic(max_types=2)) engine={{ engine }};
+
+insert into test select number, number from numbers(10);
+insert into test select number, 'str_' || toString(number) from numbers(10, 10);
+insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10);
+insert into test select number, NULL from numbers(30, 10);
+insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40);
+insert into test select number, if(number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(10, 10);
+insert into test select number, if(number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(10, 10);
+
+select distinct dynamicType(d) as type from test order by type;
+select count() from test where dynamicType(d) == 'UInt64';
+select count() from test where d.UInt64 is not NULL;
+select count() from test where dynamicType(d) == 'String';
+select count() from test where d.String is not NULL;
+select count() from test where dynamicType(d) == 'Date';
+select count() from test where d.Date is not NULL;
+select count() from test where dynamicType(d) == 'LowCardinality(String)';
+select count() from test where d.`LowCardinality(String)` is not NULL;
+select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
+select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
+select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
+select count() from test where not empty(d.`Array(Array(Dynamic))`);
+select count() from test where d is NULL;
+select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
+
+select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d;
+select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d;
+select d.Int8, d.Date, d.`Array(String)` from test order by id, d;
+select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d;
+select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test order by id, d;
+select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d;
+select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d;
+select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test order by id, d;
+
+drop table test;
+
+{% endfor -%}
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.reference
new file mode 100644
index 00000000000..ca6c5dbba82
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.reference
@@ -0,0 +1,20 @@
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+LowCardinality(String)
+None
+String
+UInt64
+360000
+360000
+200000
+200000
+0
+0
+20000
+20000
+200000
+200000
+20000
+20000
+200000
+0
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql
new file mode 100644
index 00000000000..61dc8fca01a
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_wide_merge_tree.sql
@@ -0,0 +1,43 @@
+-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan
+
+set allow_experimental_variant_type = 1;
+set use_variant_as_common_type = 1;
+set allow_experimental_dynamic_type = 1;
+
+drop table if exists test;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
+
+insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000;
+insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000;
+insert into test select number, if (number % 5 == 1, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)), number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+insert into test select number, if (number % 5 == 1, ('str_' || number)::LowCardinality(String)::Dynamic, number::Dynamic) from numbers(100000, 100000) settings min_insert_block_size_rows=50000;
+
+select distinct dynamicType(d) as type from test order by type;
+select count() from test where dynamicType(d) == 'UInt64';
+select count() from test where d.UInt64 is not NULL;
+select count() from test where dynamicType(d) == 'String';
+select count() from test where d.String is not NULL;
+select count() from test where dynamicType(d) == 'Date';
+select count() from test where d.Date is not NULL;
+select count() from test where dynamicType(d) == 'LowCardinality(String)';
+select count() from test where d.`LowCardinality(String)` is not NULL;
+select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))';
+select count() from test where not empty(d.`Array(Variant(String, UInt64))`);
+select count() from test where dynamicType(d) == 'Array(Array(Dynamic))';
+select count() from test where not empty(d.`Array(Array(Dynamic))`);
+select count() from test where d is NULL;
+select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String);
+
+select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
+select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null;
+select d.Int8, d.Date, d.`Array(String)` from test format Null;
+select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null;
+select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null;
+select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null;
+    
+drop table test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference
index d0d777a5a38..b0be05f07a2 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference
@@ -1,28 +1,66 @@
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
+50000	DateTime	false
+60000	Date	false
+70000	Array(UInt16)	false
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+30000	String	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+100000	None	false
+100000	UInt64	true
+110000	String	false
+200000	Map(UInt64, UInt64)	false
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
index d2c787040e5..fb23e15738e 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql
@@ -2,7 +2,7 @@
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
@@ -12,22 +12,37 @@ insert into test select number, toDate(number) from numbers(60000);
 insert into test select number, toDateTime(number) from numbers(50000);
 insert into test select number, NULL from numbers(100000);
 
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test; optimize table test final;;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, map(number, number) from numbers(200000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, tuple(number, number) from numbers(10000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+
+system stop merges test;
+insert into test select number, 'str_' || number from numbers(30000);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+
 
 drop table test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference
index d0d777a5a38..b0be05f07a2 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference
@@ -1,28 +1,66 @@
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
+50000	DateTime	false
+60000	Date	false
+70000	Array(UInt16)	false
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+30000	String	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+100000	None	false
+100000	UInt64	true
+110000	String	false
+200000	Map(UInt64, UInt64)	false
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
index f99bf771608..c098a3191e0 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql
@@ -2,7 +2,7 @@
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
@@ -12,22 +12,36 @@ insert into test select number, toDate(number) from numbers(60000);
 insert into test select number, toDateTime(number) from numbers(50000);
 insert into test select number, NULL from numbers(100000);
 
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test; optimize table test final;;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, map(number, number) from numbers(200000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, tuple(number, number) from numbers(10000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+
+system stop merges test;
+insert into test select number, 'str_' || number from numbers(30000);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 drop table test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference
index d0d777a5a38..b0be05f07a2 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference
@@ -1,28 +1,66 @@
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
+50000	DateTime	false
+60000	Date	false
+70000	Array(UInt16)	false
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+30000	String	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+100000	None	false
+100000	UInt64	true
+110000	String	false
+200000	Map(UInt64, UInt64)	false
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
index be81596d043..17b1e451143 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql
@@ -2,7 +2,7 @@
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
@@ -12,23 +12,36 @@ insert into test select number, toDate(number) from numbers(60000);
 insert into test select number, toDateTime(number) from numbers(50000);
 insert into test select number, NULL from numbers(100000);
 
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
-system start merges test;
-optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+system start merges test; optimize table test final;;
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, map(number, number) from numbers(200000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, tuple(number, number) from numbers(10000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+
+system stop merges test;
+insert into test select number, 'str_' || number from numbers(30000);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 drop table test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference
index d0d777a5a38..b0be05f07a2 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference
@@ -1,28 +1,66 @@
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
+50000	DateTime	false
+60000	Date	false
+70000	Array(UInt16)	false
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	false
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+30000	String	false
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+80000	String	true
+100000	None	false
+100000	UInt64	false
+200000	Map(UInt64, UInt64)	false
+---------------------
+10000	Tuple(UInt64, UInt64)	true
+50000	DateTime	true
+60000	Date	true
+70000	Array(UInt16)	true
+100000	None	false
+100000	UInt64	true
+110000	String	false
+200000	Map(UInt64, UInt64)	false
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
index f6396af42a8..fd6c0109263 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql
@@ -2,7 +2,7 @@
 set allow_experimental_dynamic_type=1;
 
 drop table if exists test;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
@@ -12,22 +12,36 @@ insert into test select number, toDate(number) from numbers(60000);
 insert into test select number, toDateTime(number) from numbers(50000);
 insert into test select number, NULL from numbers(100000);
 
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test; optimize table test final;;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, map(number, number) from numbers(200000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, tuple(number, number) from numbers(10000);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+
+system stop merges test;
+insert into test select number, 'str_' || number from numbers(30000);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 drop table test;
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2
index 96a854630ed..7d3bc371e36 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2
+++ b/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2
@@ -1,112 +1,264 @@
-5	DateTime
-6	Date
-7	Array(UInt16)
-8	String
-10	None
-10	UInt64
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-20	Map(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-1	Tuple(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-27	String
-5	DateTime
-6	Date
-7	Array(UInt16)
-8	String
-10	None
-10	UInt64
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-20	Map(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-1	Tuple(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-27	String
-5	DateTime
-6	Date
-7	Array(UInt16)
-8	String
-10	None
-10	UInt64
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-20	Map(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-1	Tuple(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-27	String
-5	DateTime
-6	Date
-7	Array(UInt16)
-8	String
-10	None
-10	UInt64
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-7	Array(UInt16)
-10	None
-10	UInt64
-19	String
-20	Map(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-1	Tuple(UInt64, UInt64)
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-26	String
-10	None
-10	UInt64
-20	Map(UInt64, UInt64)
-27	String
+5	DateTime	false
+6	Date	false
+7	Array(UInt16)	false
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+3	String	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+10	None	false
+10	UInt64	true
+11	String	false
+20	Map(UInt64, UInt64)	false
+5	DateTime	false
+6	Date	false
+7	Array(UInt16)	false
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+3	String	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+10	None	false
+10	UInt64	true
+11	String	false
+20	Map(UInt64, UInt64)	false
+5	DateTime	false
+6	Date	false
+7	Array(UInt16)	false
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+3	String	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+10	None	false
+10	UInt64	true
+11	String	false
+20	Map(UInt64, UInt64)	false
+5	DateTime	false
+6	Date	false
+7	Array(UInt16)	false
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	false
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+3	String	false
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+8	String	true
+10	None	false
+10	UInt64	false
+20	Map(UInt64, UInt64)	false
+---------------------
+1	Tuple(UInt64, UInt64)	true
+5	DateTime	true
+6	Date	true
+7	Array(UInt16)	true
+10	None	false
+10	UInt64	true
+11	String	false
+20	Map(UInt64, UInt64)	false
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2
index 263e92be403..3778399d0a4 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2
+++ b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2
@@ -9,7 +9,7 @@ drop table if exists test;
                   'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1',
                   'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1'] -%}
 
-create table test (id UInt64, d Dynamic(max_types=3)) engine={{ engine }};
+create table test (id UInt64, d Dynamic(max_types=2)) engine={{ engine }};
 
 system stop merges test;
 insert into test select number, number from numbers(10);
@@ -19,23 +19,37 @@ insert into test select number, toDate(number) from numbers(6);
 insert into test select number, toDateTime(number) from numbers(5);
 insert into test select number, NULL from numbers(10);
 
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test; optimize table test final;;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, map(number, number) from numbers(20);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 system stop merges test;
 insert into test select number, tuple(number, number) from numbers(1);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+
+system stop merges test;
+insert into test select number, 'str_' || number from numbers(3);
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 
 drop table test;
 
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference
index 4be740f6050..a4c2df74a74 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.reference
@@ -1,21 +1,63 @@
-16667	Tuple(a Dynamic(max_types=3)):Date
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):String
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	UInt64:None
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	UInt64:None
-16667	Tuple(a Dynamic(max_types=3)):DateTime
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-133333	Tuple(a Dynamic(max_types=3)):None
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-116667	Tuple(a Dynamic(max_types=3)):String
-133333	Tuple(a Dynamic(max_types=3)):None
+6667	Tuple(a Dynamic(max_types=2)):DateTime	false
+13333	Tuple(a Dynamic(max_types=2)):IPv4	false
+16667	Tuple(a Dynamic(max_types=2)):Date	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+16667	Tuple(a Dynamic(max_types=2)):DateTime	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+40000	Tuple(a Dynamic(max_types=2)):DateTime	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+63334	Tuple(a Dynamic(max_types=2)):DateTime	true
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
index 81888946681..8ba192cb5db 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql
@@ -6,24 +6,39 @@ set allow_experimental_dynamic_type = 1;
 set enable_named_columns_in_function_tuple = 0;
 
 drop table if exists test;;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
-insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000);
-insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
+insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000);
+insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
-insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
-insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000);
+system stop merges test;
+insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+
+system stop merges test;
+insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000);
+
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
 drop table test;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference
index 4be740f6050..a4c2df74a74 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.reference
@@ -1,21 +1,63 @@
-16667	Tuple(a Dynamic(max_types=3)):Date
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):String
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	UInt64:None
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	UInt64:None
-16667	Tuple(a Dynamic(max_types=3)):DateTime
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-133333	Tuple(a Dynamic(max_types=3)):None
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-116667	Tuple(a Dynamic(max_types=3)):String
-133333	Tuple(a Dynamic(max_types=3)):None
+6667	Tuple(a Dynamic(max_types=2)):DateTime	false
+13333	Tuple(a Dynamic(max_types=2)):IPv4	false
+16667	Tuple(a Dynamic(max_types=2)):Date	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+16667	Tuple(a Dynamic(max_types=2)):DateTime	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+40000	Tuple(a Dynamic(max_types=2)):DateTime	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+63334	Tuple(a Dynamic(max_types=2)):DateTime	true
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
index ba58ca471a2..1ea7eefdd53 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql
@@ -6,24 +6,39 @@ set allow_experimental_dynamic_type = 1;
 set enable_named_columns_in_function_tuple = 0;
 
 drop table if exists test;;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
-insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000);
-insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
+insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000);
+insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
-insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
-insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000);
+system stop merges test;
+insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+
+system stop merges test;
+insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000);
+
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
 drop table test;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2 b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2
index ae07c164074..3d7e8b60f73 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.reference.j2
@@ -1,84 +1,256 @@
-2	Tuple(a Dynamic(max_types=3)):Date
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):String
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	UInt64:None
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	UInt64:None
-2	Tuple(a Dynamic(max_types=3)):DateTime
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-13	Tuple(a Dynamic(max_types=3)):None
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-12	Tuple(a Dynamic(max_types=3)):String
-13	Tuple(a Dynamic(max_types=3)):None
-2	Tuple(a Dynamic(max_types=3)):Date
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):String
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	UInt64:None
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	UInt64:None
-2	Tuple(a Dynamic(max_types=3)):DateTime
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-13	Tuple(a Dynamic(max_types=3)):None
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-12	Tuple(a Dynamic(max_types=3)):String
-13	Tuple(a Dynamic(max_types=3)):None
-2	Tuple(a Dynamic(max_types=3)):Date
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):String
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	UInt64:None
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	UInt64:None
-2	Tuple(a Dynamic(max_types=3)):DateTime
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-13	Tuple(a Dynamic(max_types=3)):None
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-12	Tuple(a Dynamic(max_types=3)):String
-13	Tuple(a Dynamic(max_types=3)):None
-2	Tuple(a Dynamic(max_types=3)):Date
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):String
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	UInt64:None
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	UInt64:None
-2	Tuple(a Dynamic(max_types=3)):DateTime
-3	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-5	Tuple(a Dynamic(max_types=3)):UInt64
-7	Tuple(a Dynamic(max_types=3)):String
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-13	Tuple(a Dynamic(max_types=3)):None
-5	Tuple(a Dynamic(max_types=3)):UInt64
-10	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-10	UInt64:None
-12	Tuple(a Dynamic(max_types=3)):String
-13	Tuple(a Dynamic(max_types=3)):None
+test
+2	Tuple(a Dynamic(max_types=2)):DateTime	false
+2	Tuple(a Dynamic(max_types=2)):IPv4	false
+4	Tuple(a Dynamic(max_types=2)):Date	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+4	Tuple(a Dynamic(max_types=2)):DateTime	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+8	Tuple(a Dynamic(max_types=2)):DateTime	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+14	Tuple(a Dynamic(max_types=2)):DateTime	true
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+test
+2	Tuple(a Dynamic(max_types=2)):DateTime	false
+2	Tuple(a Dynamic(max_types=2)):IPv4	false
+4	Tuple(a Dynamic(max_types=2)):Date	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+4	Tuple(a Dynamic(max_types=2)):DateTime	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+8	Tuple(a Dynamic(max_types=2)):DateTime	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+14	Tuple(a Dynamic(max_types=2)):DateTime	true
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+test
+2	Tuple(a Dynamic(max_types=2)):DateTime	false
+2	Tuple(a Dynamic(max_types=2)):IPv4	false
+4	Tuple(a Dynamic(max_types=2)):Date	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+4	Tuple(a Dynamic(max_types=2)):DateTime	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+8	Tuple(a Dynamic(max_types=2)):DateTime	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	true
+14	Tuple(a Dynamic(max_types=2)):DateTime	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+test
+2	Tuple(a Dynamic(max_types=2)):DateTime	false
+2	Tuple(a Dynamic(max_types=2)):IPv4	false
+4	Tuple(a Dynamic(max_types=2)):Date	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):DateTime	true
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+4	Tuple(a Dynamic(max_types=2)):DateTime	false
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	false
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+6	Tuple(a Dynamic(max_types=2)):DateTime	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+8	Tuple(a Dynamic(max_types=2)):DateTime	false
+13	Tuple(a Dynamic(max_types=2)):String	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
+---------------------
+2	Tuple(a Dynamic(max_types=2)):IPv4	true
+4	Tuple(a Dynamic(max_types=2)):Date	true
+6	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+7	Tuple(a Dynamic(max_types=2)):UInt64	true
+8	String:None	false
+13	Tuple(a Dynamic(max_types=2)):String	true
+14	Tuple(a Dynamic(max_types=2)):DateTime	false
+20	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+26	Tuple(a Dynamic(max_types=2)):None	false
+28	UInt64:None	false
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2 b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2
index 7828c2af49c..8682b6cef81 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_small.sql.j2
@@ -10,25 +10,41 @@ drop table if exists test;
                   'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1',
                   'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1'] -%}
 
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600;
+select 'test';
+create table test (id UInt64, d Dynamic(max_types=2)) engine={{ engine }};
 
 system stop merges test;
-insert into test select number, number from numbers(10);
-insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(10);
-insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(5);
+insert into test select number, number from numbers(20);
+insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(20);
+insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(10);
+insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(20);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
-insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(5);
-insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(20);
+system stop merges test;
+insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(10);
+insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(40);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+
+system stop merges test;
+insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(8);
+
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
 drop table test;
 
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference
index 4be740f6050..a4c2df74a74 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.reference
@@ -1,21 +1,63 @@
-16667	Tuple(a Dynamic(max_types=3)):Date
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):String
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	UInt64:None
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	UInt64:None
-16667	Tuple(a Dynamic(max_types=3)):DateTime
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-133333	Tuple(a Dynamic(max_types=3)):None
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-116667	Tuple(a Dynamic(max_types=3)):String
-133333	Tuple(a Dynamic(max_types=3)):None
+6667	Tuple(a Dynamic(max_types=2)):DateTime	false
+13333	Tuple(a Dynamic(max_types=2)):IPv4	false
+16667	Tuple(a Dynamic(max_types=2)):Date	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+16667	Tuple(a Dynamic(max_types=2)):DateTime	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+40000	Tuple(a Dynamic(max_types=2)):DateTime	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+63334	Tuple(a Dynamic(max_types=2)):DateTime	true
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
index a53c5b0b2a5..c6a09036c30 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql
@@ -6,24 +6,39 @@ set allow_experimental_dynamic_type = 1;
 set enable_named_columns_in_function_tuple = 0;
 
 drop table if exists test;;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
-insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000);
-insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
+insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000);
+insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
-insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
-insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000);
+system stop merges test;
+insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+
+system stop merges test;
+insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000);
+
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
 drop table test;
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference
index 4be740f6050..a4c2df74a74 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.reference
@@ -1,21 +1,63 @@
-16667	Tuple(a Dynamic(max_types=3)):Date
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):String
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	UInt64:None
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	UInt64:None
-16667	Tuple(a Dynamic(max_types=3)):DateTime
-33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-66667	Tuple(a Dynamic(max_types=3)):String
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-133333	Tuple(a Dynamic(max_types=3)):None
-50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
-100000	UInt64:None
-116667	Tuple(a Dynamic(max_types=3)):String
-133333	Tuple(a Dynamic(max_types=3)):None
+6667	Tuple(a Dynamic(max_types=2)):DateTime	false
+13333	Tuple(a Dynamic(max_types=2)):IPv4	false
+16667	Tuple(a Dynamic(max_types=2)):Date	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	false
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+140000	UInt64:None	false
+---------------------
+6667	Tuple(a Dynamic(max_types=2)):DateTime	true
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+16667	Tuple(a Dynamic(max_types=2)):DateTime	false
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	false
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+23334	Tuple(a Dynamic(max_types=2)):DateTime	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+40000	Tuple(a Dynamic(max_types=2)):DateTime	false
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
+---------------------
+13333	Tuple(a Dynamic(max_types=2)):IPv4	true
+16667	Tuple(a Dynamic(max_types=2)):Date	true
+33333	Tuple(a Dynamic(max_types=2)):Array(UInt8)	true
+33334	Tuple(a Dynamic(max_types=2)):UInt64	true
+40000	String:None	false
+63334	Tuple(a Dynamic(max_types=2)):DateTime	true
+66666	Tuple(a Dynamic(max_types=2)):String	false
+100000	Tuple(a Dynamic(max_types=2)):Tuple(UInt64)	false
+133333	Tuple(a Dynamic(max_types=2)):None	false
+140000	UInt64:None	false
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
index 4256b010ec0..c1964c45d98 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql
@@ -6,24 +6,39 @@ set allow_experimental_dynamic_type = 1;
 set enable_named_columns_in_function_tuple = 0;
 
 drop table if exists test;;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600;
+create table test (id UInt64, d Dynamic(max_types=2)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, lock_acquire_timeout_for_background_operations=600;
 
 system stop merges test;
 insert into test select number, number from numbers(100000);
-insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000);
-insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
+insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=2)) from numbers(100000);
+insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=2)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(100000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
-insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000);
-insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000);
+system stop merges test;
+insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(50000);
+insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=2)) from numbers(200000);
 
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 system start merges test;
 optimize table test final;
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+
+system stop merges test;
+insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=2)) from numbers(40000);
+
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
+system start merges test;
+optimize table test final;
+select '---------------------';
+select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=2))`.a) as type, isDynamicElementInSharedData(d.`Tuple(a Dynamic(max_types=2))`.a) as flag from test group by type, flag order by count(), type;
 
 drop table test;
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference
index 2ec301b747b..9386548c74d 100644
--- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference
@@ -1,13 +1,13 @@
 initial insert
 alter add column 1
-3	None
+3	None	false
 0	0	\N	\N	\N	0
 1	1	\N	\N	\N	0
 2	2	\N	\N	\N	0
 insert after alter add column 1
-4	String
-4	UInt64
-7	None
+4	String	false
+4	UInt64	false
+7	None	false
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
@@ -24,147 +24,158 @@ insert after alter add column 1
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 alter modify column 1
-7	None
-8	String
+4	String	true
+4	UInt64	true
+7	None	false
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
 6	6	str_6	str_6	\N	\N	0
 7	7	str_7	str_7	\N	\N	0
 8	8	str_8	str_8	\N	\N	0
 9	9	\N	\N	\N	\N	0
 10	10	\N	\N	\N	\N	0
 11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
+12	12	12	\N	12	\N	0
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 insert after alter modify column 1
-8	None
-11	String
+1	Date	true
+5	String	true
+5	UInt64	true
+8	None	false
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
 6	6	str_6	str_6	\N	\N	0
 7	7	str_7	str_7	\N	\N	0
 8	8	str_8	str_8	\N	\N	0
 9	9	\N	\N	\N	\N	0
 10	10	\N	\N	\N	\N	0
 11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
+12	12	12	\N	12	\N	0
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
+16	16	16	\N	16	\N	0
 17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
 alter modify column 2
-8	None
-11	String
+1	Date	true
+5	String	true
+5	UInt64	true
+8	None	false
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
 6	6	str_6	str_6	\N	\N	0
 7	7	str_7	str_7	\N	\N	0
 8	8	str_8	str_8	\N	\N	0
 9	9	\N	\N	\N	\N	0
 10	10	\N	\N	\N	\N	0
 11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
+12	12	12	\N	12	\N	0
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
+16	16	16	\N	16	\N	0
 17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
 insert after alter modify column 2
-1	Date
-1	UInt64
-9	None
-12	String
+1	String	false
+1	UInt64	false
+2	Date	true
+5	String	true
+5	UInt64	true
+9	None	false
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
 6	6	str_6	str_6	\N	\N	0
 7	7	str_7	str_7	\N	\N	0
 8	8	str_8	str_8	\N	\N	0
 9	9	\N	\N	\N	\N	0
 10	10	\N	\N	\N	\N	0
 11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
+12	12	12	\N	12	\N	0
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
+16	16	16	\N	16	\N	0
 17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
 19	19	\N	\N	\N	\N	0
 20	20	20	\N	20	\N	0
 21	21	str_21	str_21	\N	\N	0
 22	22	1970-01-23	\N	\N	1970-01-23	0
 alter modify column 3
-1	Date
-1	UInt64
-9	None
-12	String
+1	String	false
+1	UInt64	false
+2	Date	true
+5	String	true
+5	UInt64	true
+9	None	false
 0	0	0	\N	0	\N	\N	\N	0
 1	1	1	\N	0	\N	\N	\N	0
 2	2	2	\N	0	\N	\N	\N	0
-3	3	3	\N	0	3	\N	\N	0
-4	4	4	\N	0	4	\N	\N	0
-5	5	5	\N	0	5	\N	\N	0
+3	3	3	\N	0	\N	3	\N	0
+4	4	4	\N	0	\N	4	\N	0
+5	5	5	\N	0	\N	5	\N	0
 6	6	6	\N	0	str_6	\N	\N	0
 7	7	7	\N	0	str_7	\N	\N	0
 8	8	8	\N	0	str_8	\N	\N	0
 9	9	9	\N	0	\N	\N	\N	0
 10	10	10	\N	0	\N	\N	\N	0
 11	11	11	\N	0	\N	\N	\N	0
-12	12	12	\N	0	12	\N	\N	0
+12	12	12	\N	0	\N	12	\N	0
 13	13	13	\N	0	str_13	\N	\N	0
 14	14	14	\N	0	\N	\N	\N	0
 15	15	15	\N	0	\N	\N	\N	0
-16	16	16	\N	0	16	\N	\N	0
+16	16	16	\N	0	\N	16	\N	0
 17	17	17	\N	0	str_17	\N	\N	0
-18	18	18	\N	0	1970-01-19	\N	\N	0
+18	18	18	\N	0	\N	\N	1970-01-19	0
 19	19	19	\N	0	\N	\N	\N	0
 20	20	20	\N	0	\N	20	\N	0
 21	21	21	\N	0	str_21	\N	\N	0
 22	22	22	\N	0	\N	\N	1970-01-23	0
 insert after alter modify column 3
-1	Date
-1	UInt64
-12	None
-12	String
+1	String	false
+1	UInt64	false
+2	Date	true
+5	String	true
+5	UInt64	true
+12	None	false
 0	0	0	\N	0	\N	\N	\N	0
 1	1	1	\N	0	\N	\N	\N	0
 2	2	2	\N	0	\N	\N	\N	0
-3	3	3	\N	0	3	\N	\N	0
-4	4	4	\N	0	4	\N	\N	0
-5	5	5	\N	0	5	\N	\N	0
+3	3	3	\N	0	\N	3	\N	0
+4	4	4	\N	0	\N	4	\N	0
+5	5	5	\N	0	\N	5	\N	0
 6	6	6	\N	0	str_6	\N	\N	0
 7	7	7	\N	0	str_7	\N	\N	0
 8	8	8	\N	0	str_8	\N	\N	0
 9	9	9	\N	0	\N	\N	\N	0
 10	10	10	\N	0	\N	\N	\N	0
 11	11	11	\N	0	\N	\N	\N	0
-12	12	12	\N	0	12	\N	\N	0
+12	12	12	\N	0	\N	12	\N	0
 13	13	13	\N	0	str_13	\N	\N	0
 14	14	14	\N	0	\N	\N	\N	0
 15	15	15	\N	0	\N	\N	\N	0
-16	16	16	\N	0	16	\N	\N	0
+16	16	16	\N	0	\N	16	\N	0
 17	17	17	\N	0	str_17	\N	\N	0
-18	18	18	\N	0	1970-01-19	\N	\N	0
+18	18	18	\N	0	\N	\N	1970-01-19	0
 19	19	19	\N	0	\N	\N	\N	0
 20	20	20	\N	0	\N	20	\N	0
 21	21	21	\N	0	str_21	\N	\N	0
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql
index 4ab700306d4..de05ba36177 100644
--- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql
@@ -9,7 +9,7 @@ insert into test select number, number from numbers(3);
 
 select 'alter add column 1';
 alter table test add column d Dynamic(max_types=3) settings mutations_sync=1;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x;
 
 select 'insert after alter add column 1';
@@ -17,37 +17,37 @@ insert into test select number, number, number from numbers(3, 3);
 insert into test select number, number, 'str_' || toString(number) from numbers(6, 3);
 insert into test select number, number, NULL from numbers(9, 3);
 insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x;
 
 select 'alter modify column 1';
-alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+alter table test modify column d Dynamic(max_types=0) settings mutations_sync=1;
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x;
 
 select 'insert after alter modify column 1';
 insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x;
 
 select 'alter modify column 2';
-alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+alter table test modify column d Dynamic(max_types=2) settings mutations_sync=1;
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x;
 
 select 'insert after alter modify column 2';
 insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x;
 
 select 'alter modify column 3';
 alter table test modify column y Dynamic settings mutations_sync=1;
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x;
 
 select 'insert after alter modify column 3';
 insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3);
-select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d);
+select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d) order by count(), dynamicType(d);
 select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x;
 
 drop table test;
\ No newline at end of file
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference
index c592528c3cd..d7123288280 100644
--- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference
@@ -24,48 +24,28 @@ insert after alter add column 1
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 alter modify column 1
+4	String
+4	UInt64
 7	None
-8	String
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
 6	6	str_6	str_6	\N	\N	0
 7	7	str_7	str_7	\N	\N	0
 8	8	str_8	str_8	\N	\N	0
 9	9	\N	\N	\N	\N	0
 10	10	\N	\N	\N	\N	0
 11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
+12	12	12	\N	12	\N	0
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 insert after alter modify column 1
-8	None
-11	String
-0	0	\N	\N	\N	\N	0
-1	1	\N	\N	\N	\N	0
-2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
-6	6	str_6	str_6	\N	\N	0
-7	7	str_7	str_7	\N	\N	0
-8	8	str_8	str_8	\N	\N	0
-9	9	\N	\N	\N	\N	0
-10	10	\N	\N	\N	\N	0
-11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
-13	13	str_13	str_13	\N	\N	0
-14	14	\N	\N	\N	\N	0
-15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
-17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
-alter modify column 2
-4	UInt64
-7	String
+1	Date
+5	String
+5	UInt64
 8	None
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
@@ -83,13 +63,37 @@ alter modify column 2
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
+16	16	16	\N	16	\N	0
 17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
-insert after alter modify column 2
+18	18	1970-01-19	\N	\N	1970-01-19	0
+alter modify column 2
 1	Date
+5	String
 5	UInt64
-8	String
+8	None
+0	0	\N	\N	\N	\N	0
+1	1	\N	\N	\N	\N	0
+2	2	\N	\N	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
+6	6	str_6	str_6	\N	\N	0
+7	7	str_7	str_7	\N	\N	0
+8	8	str_8	str_8	\N	\N	0
+9	9	\N	\N	\N	\N	0
+10	10	\N	\N	\N	\N	0
+11	11	\N	\N	\N	\N	0
+12	12	12	\N	12	\N	0
+13	13	str_13	str_13	\N	\N	0
+14	14	\N	\N	\N	\N	0
+15	15	\N	\N	\N	\N	0
+16	16	16	\N	16	\N	0
+17	17	str_17	str_17	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
+insert after alter modify column 2
+2	Date
+6	String
+6	UInt64
 9	None
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
@@ -107,17 +111,17 @@ insert after alter modify column 2
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
+16	16	16	\N	16	\N	0
 17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
 19	19	\N	\N	\N	\N	0
 20	20	20	\N	20	\N	0
 21	21	str_21	str_21	\N	\N	0
 22	22	1970-01-23	\N	\N	1970-01-23	0
 alter modify column 3
-1	Date
-5	UInt64
-8	String
+2	Date
+6	String
+6	UInt64
 9	None
 0	0	0	\N	0	\N	\N	\N	0
 1	1	1	\N	0	\N	\N	\N	0
@@ -135,17 +139,17 @@ alter modify column 3
 13	13	13	\N	0	str_13	\N	\N	0
 14	14	14	\N	0	\N	\N	\N	0
 15	15	15	\N	0	\N	\N	\N	0
-16	16	16	\N	0	16	\N	\N	0
+16	16	16	\N	0	\N	16	\N	0
 17	17	17	\N	0	str_17	\N	\N	0
-18	18	18	\N	0	1970-01-19	\N	\N	0
+18	18	18	\N	0	\N	\N	1970-01-19	0
 19	19	19	\N	0	\N	\N	\N	0
 20	20	20	\N	0	\N	20	\N	0
 21	21	21	\N	0	str_21	\N	\N	0
 22	22	22	\N	0	\N	\N	1970-01-23	0
 insert after alter modify column 3
-1	Date
-5	UInt64
-8	String
+2	Date
+6	String
+6	UInt64
 12	None
 0	0	0	\N	0	\N	\N	\N	0
 1	1	1	\N	0	\N	\N	\N	0
@@ -163,9 +167,9 @@ insert after alter modify column 3
 13	13	13	\N	0	str_13	\N	\N	0
 14	14	14	\N	0	\N	\N	\N	0
 15	15	15	\N	0	\N	\N	\N	0
-16	16	16	\N	0	16	\N	\N	0
+16	16	16	\N	0	\N	16	\N	0
 17	17	17	\N	0	str_17	\N	\N	0
-18	18	18	\N	0	1970-01-19	\N	\N	0
+18	18	18	\N	0	\N	\N	1970-01-19	0
 19	19	19	\N	0	\N	\N	\N	0
 20	20	20	\N	0	\N	20	\N	0
 21	21	21	\N	0	str_21	\N	\N	0
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference
index 2ec301b747b..d7123288280 100644
--- a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference
@@ -24,147 +24,152 @@ insert after alter add column 1
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 alter modify column 1
+4	String
+4	UInt64
 7	None
-8	String
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
 6	6	str_6	str_6	\N	\N	0
 7	7	str_7	str_7	\N	\N	0
 8	8	str_8	str_8	\N	\N	0
 9	9	\N	\N	\N	\N	0
 10	10	\N	\N	\N	\N	0
 11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
+12	12	12	\N	12	\N	0
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 insert after alter modify column 1
-8	None
-11	String
-0	0	\N	\N	\N	\N	0
-1	1	\N	\N	\N	\N	0
-2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
-6	6	str_6	str_6	\N	\N	0
-7	7	str_7	str_7	\N	\N	0
-8	8	str_8	str_8	\N	\N	0
-9	9	\N	\N	\N	\N	0
-10	10	\N	\N	\N	\N	0
-11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
-13	13	str_13	str_13	\N	\N	0
-14	14	\N	\N	\N	\N	0
-15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
-17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
-alter modify column 2
-8	None
-11	String
-0	0	\N	\N	\N	\N	0
-1	1	\N	\N	\N	\N	0
-2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
-6	6	str_6	str_6	\N	\N	0
-7	7	str_7	str_7	\N	\N	0
-8	8	str_8	str_8	\N	\N	0
-9	9	\N	\N	\N	\N	0
-10	10	\N	\N	\N	\N	0
-11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
-13	13	str_13	str_13	\N	\N	0
-14	14	\N	\N	\N	\N	0
-15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
-17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
-insert after alter modify column 2
 1	Date
-1	UInt64
-9	None
-12	String
+5	String
+5	UInt64
+8	None
 0	0	\N	\N	\N	\N	0
 1	1	\N	\N	\N	\N	0
 2	2	\N	\N	\N	\N	0
-3	3	3	3	\N	\N	0
-4	4	4	4	\N	\N	0
-5	5	5	5	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
 6	6	str_6	str_6	\N	\N	0
 7	7	str_7	str_7	\N	\N	0
 8	8	str_8	str_8	\N	\N	0
 9	9	\N	\N	\N	\N	0
 10	10	\N	\N	\N	\N	0
 11	11	\N	\N	\N	\N	0
-12	12	12	12	\N	\N	0
+12	12	12	\N	12	\N	0
 13	13	str_13	str_13	\N	\N	0
 14	14	\N	\N	\N	\N	0
 15	15	\N	\N	\N	\N	0
-16	16	16	16	\N	\N	0
+16	16	16	\N	16	\N	0
 17	17	str_17	str_17	\N	\N	0
-18	18	1970-01-19	1970-01-19	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
+alter modify column 2
+1	Date
+5	String
+5	UInt64
+8	None
+0	0	\N	\N	\N	\N	0
+1	1	\N	\N	\N	\N	0
+2	2	\N	\N	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
+6	6	str_6	str_6	\N	\N	0
+7	7	str_7	str_7	\N	\N	0
+8	8	str_8	str_8	\N	\N	0
+9	9	\N	\N	\N	\N	0
+10	10	\N	\N	\N	\N	0
+11	11	\N	\N	\N	\N	0
+12	12	12	\N	12	\N	0
+13	13	str_13	str_13	\N	\N	0
+14	14	\N	\N	\N	\N	0
+15	15	\N	\N	\N	\N	0
+16	16	16	\N	16	\N	0
+17	17	str_17	str_17	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
+insert after alter modify column 2
+2	Date
+6	String
+6	UInt64
+9	None
+0	0	\N	\N	\N	\N	0
+1	1	\N	\N	\N	\N	0
+2	2	\N	\N	\N	\N	0
+3	3	3	\N	3	\N	0
+4	4	4	\N	4	\N	0
+5	5	5	\N	5	\N	0
+6	6	str_6	str_6	\N	\N	0
+7	7	str_7	str_7	\N	\N	0
+8	8	str_8	str_8	\N	\N	0
+9	9	\N	\N	\N	\N	0
+10	10	\N	\N	\N	\N	0
+11	11	\N	\N	\N	\N	0
+12	12	12	\N	12	\N	0
+13	13	str_13	str_13	\N	\N	0
+14	14	\N	\N	\N	\N	0
+15	15	\N	\N	\N	\N	0
+16	16	16	\N	16	\N	0
+17	17	str_17	str_17	\N	\N	0
+18	18	1970-01-19	\N	\N	1970-01-19	0
 19	19	\N	\N	\N	\N	0
 20	20	20	\N	20	\N	0
 21	21	str_21	str_21	\N	\N	0
 22	22	1970-01-23	\N	\N	1970-01-23	0
 alter modify column 3
-1	Date
-1	UInt64
+2	Date
+6	String
+6	UInt64
 9	None
-12	String
 0	0	0	\N	0	\N	\N	\N	0
 1	1	1	\N	0	\N	\N	\N	0
 2	2	2	\N	0	\N	\N	\N	0
-3	3	3	\N	0	3	\N	\N	0
-4	4	4	\N	0	4	\N	\N	0
-5	5	5	\N	0	5	\N	\N	0
+3	3	3	\N	0	\N	3	\N	0
+4	4	4	\N	0	\N	4	\N	0
+5	5	5	\N	0	\N	5	\N	0
 6	6	6	\N	0	str_6	\N	\N	0
 7	7	7	\N	0	str_7	\N	\N	0
 8	8	8	\N	0	str_8	\N	\N	0
 9	9	9	\N	0	\N	\N	\N	0
 10	10	10	\N	0	\N	\N	\N	0
 11	11	11	\N	0	\N	\N	\N	0
-12	12	12	\N	0	12	\N	\N	0
+12	12	12	\N	0	\N	12	\N	0
 13	13	13	\N	0	str_13	\N	\N	0
 14	14	14	\N	0	\N	\N	\N	0
 15	15	15	\N	0	\N	\N	\N	0
-16	16	16	\N	0	16	\N	\N	0
+16	16	16	\N	0	\N	16	\N	0
 17	17	17	\N	0	str_17	\N	\N	0
-18	18	18	\N	0	1970-01-19	\N	\N	0
+18	18	18	\N	0	\N	\N	1970-01-19	0
 19	19	19	\N	0	\N	\N	\N	0
 20	20	20	\N	0	\N	20	\N	0
 21	21	21	\N	0	str_21	\N	\N	0
 22	22	22	\N	0	\N	\N	1970-01-23	0
 insert after alter modify column 3
-1	Date
-1	UInt64
+2	Date
+6	String
+6	UInt64
 12	None
-12	String
 0	0	0	\N	0	\N	\N	\N	0
 1	1	1	\N	0	\N	\N	\N	0
 2	2	2	\N	0	\N	\N	\N	0
-3	3	3	\N	0	3	\N	\N	0
-4	4	4	\N	0	4	\N	\N	0
-5	5	5	\N	0	5	\N	\N	0
+3	3	3	\N	0	\N	3	\N	0
+4	4	4	\N	0	\N	4	\N	0
+5	5	5	\N	0	\N	5	\N	0
 6	6	6	\N	0	str_6	\N	\N	0
 7	7	7	\N	0	str_7	\N	\N	0
 8	8	8	\N	0	str_8	\N	\N	0
 9	9	9	\N	0	\N	\N	\N	0
 10	10	10	\N	0	\N	\N	\N	0
 11	11	11	\N	0	\N	\N	\N	0
-12	12	12	\N	0	12	\N	\N	0
+12	12	12	\N	0	\N	12	\N	0
 13	13	13	\N	0	str_13	\N	\N	0
 14	14	14	\N	0	\N	\N	\N	0
 15	15	15	\N	0	\N	\N	\N	0
-16	16	16	\N	0	16	\N	\N	0
+16	16	16	\N	0	\N	16	\N	0
 17	17	17	\N	0	str_17	\N	\N	0
-18	18	18	\N	0	1970-01-19	\N	\N	0
+18	18	18	\N	0	\N	\N	1970-01-19	0
 19	19	19	\N	0	\N	\N	\N	0
 20	20	20	\N	0	\N	20	\N	0
 21	21	21	\N	0	str_21	\N	\N	0
diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh
index c8bd533e253..da24b892cbd 100755
--- a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh
+++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh
@@ -13,7 +13,7 @@ function run()
     $CH_CLIENT -q "insert into test select number, number from numbers(3)"
 
     echo "alter add column"
-    $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1"
+    $CH_CLIENT -q "alter table test add column d Dynamic(max_types=2) settings mutations_sync=1"
     $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x"
 
diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference
index 0b76d30953e..370e6352657 100644
--- a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference
+++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference
@@ -1,35 +1,35 @@
 1	2024-01-01	Date
 2	1704056400	Decimal(18, 3)
-3	1	String
-4	2	String
+3	1	Float32
+4	2	Float64
 
 1	2024-01-01	Date
 1	2024-01-01	Date
 2	1704056400	Decimal(18, 3)
 2	1704056400	Decimal(18, 3)
-3	1	String
-3	1	String
-4	2	String
-4	2	String
-
-1	2024-01-01	String
-1	2024-01-01	String
-2	1704056400	String
-2	1704056400	String
-3	1	String
-3	1	String
-4	2	String
-4	2	String
+3	1	Float32
+3	1	Float32
+4	2	Float64
+4	2	Float64
 
 1	2024-01-01	Date
-1	2024-01-01	String
-1	2024-01-01	String
+1	2024-01-01	Date
 2	1704056400	Decimal(18, 3)
-2	1704056400	String
-2	1704056400	String
-3	1	String
-3	1	String
-3	1	String
-4	2	String
-4	2	String
-4	2	String
+2	1704056400	Decimal(18, 3)
+3	1	Float32
+3	1	Float32
+4	2	Float64
+4	2	Float64
+
+1	2024-01-01	Date
+1	2024-01-01	Date
+1	2024-01-01	Date
+2	1704056400	Decimal(18, 3)
+2	1704056400	Decimal(18, 3)
+2	1704056400	Decimal(18, 3)
+3	1	Float32
+3	1	Float32
+3	1	Float32
+4	2	Float64
+4	2	Float64
+4	2	Float64
diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql
index ad5ea9512c6..71d5dd4abd1 100644
--- a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql
+++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql
@@ -1,5 +1,6 @@
 SET allow_experimental_dynamic_type=1;
 
+DROP TABLE IF EXISTS null_table;
 CREATE TABLE null_table
 (
     n1 UInt8,
@@ -7,9 +8,11 @@ CREATE TABLE null_table
 )
 ENGINE = Null;
 
+DROP VIEW IF EXISTS dummy_rmv;
 CREATE MATERIALIZED VIEW dummy_rmv TO to_table
 AS SELECT * FROM null_table;
 
+DROP TABLE IF EXISTS to_table;
 CREATE TABLE to_table
 (
     n1 UInt8,
@@ -32,3 +35,7 @@ select '';
 ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=10);
 INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
 SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+
+DROP TABLE null_table;
+DROP VIEW dummy_rmv;
+DROP TABLE to_table;
diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference
index d96fbf658d8..2d3b2f118f6 100644
--- a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference
+++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference
@@ -1,26 +1,26 @@
-1	2024-01-01	Date
-2	1704056400	String
-3	1	String
-4	2	String
+1	2024-01-01	Date	false
+2	1704056400	Decimal(18, 3)	false
+3	1	Float32	true
+4	2	Float64	true
 
-1	2024-01-01	Date
-1	2024-01-01	Date
-2	1704056400	Decimal(18, 3)
-2	1704056400	String
-3	1	Float32
-3	1	String
-4	2	Float64
-4	2	String
+1	2024-01-01	Date	false
+1	2024-01-01	Date	false
+2	1704056400	Decimal(18, 3)	false
+2	1704056400	Decimal(18, 3)	false
+3	1	Float32	false
+3	1	Float32	false
+4	2	Float64	false
+4	2	Float64	false
 
-1	2024-01-01	String
-1	2024-01-01	String
-1	2024-01-01	String
-2	1704056400	String
-2	1704056400	String
-2	1704056400	String
-3	1	String
-3	1	String
-3	1	String
-4	2	String
-4	2	String
-4	2	String
+1	2024-01-01	Date	true
+1	2024-01-01	Date	true
+1	2024-01-01	Date	true
+2	1704056400	Decimal(18, 3)	true
+2	1704056400	Decimal(18, 3)	true
+2	1704056400	Decimal(18, 3)	true
+3	1	Float32	true
+3	1	Float32	true
+3	1	Float32	true
+4	2	Float64	true
+4	2	Float64	true
+4	2	Float64	true
diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
index 632f3504fdb..e476d34a1db 100644
--- a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
+++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
@@ -11,16 +11,16 @@ CREATE TABLE to_table
 ENGINE = MergeTree ORDER BY n1;
 
 INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
-SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+SELECT *, dynamicType(n2), isDynamicElementInSharedData(n2) FROM to_table ORDER BY ALL;
 
 select '';
 ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=5);
 INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
-SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+SELECT *, dynamicType(n2), isDynamicElementInSharedData(n2) FROM to_table ORDER BY ALL;
 
 select '';
-ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1);
+ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=0);
 INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
-SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+SELECT *, dynamicType(n2), isDynamicElementInSharedData(n2) FROM to_table ORDER BY ALL;
 
 ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=500); -- { serverError UNEXPECTED_AST_STRUCTURE }
diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.reference b/tests/queries/0_stateless/03152_dynamic_type_simple.reference
index 5f243209ff3..e508bdd1990 100644
--- a/tests/queries/0_stateless/03152_dynamic_type_simple.reference
+++ b/tests/queries/0_stateless/03152_dynamic_type_simple.reference
@@ -7,7 +7,7 @@ string2	String
 
 \N	None	42	Int64
 42	Int64	string	String
-string	String	[1, 2]	String
+string	String	[1,2]	Array(Int64)
 [1,2]	Array(Int64)	\N	None
     ┌─d────────────────────────┬─dynamicType(d)─┬─d.Int64─┬─d.String─┬─────d.Date─┬─d.Float64─┬──────────d.DateTime─┬─d.Array(Int64)─┬─d.Array(String)──────────┐
  1. │ 42                       │ Int64          │      42 │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ []             │ []                       │
diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.sql b/tests/queries/0_stateless/03152_dynamic_type_simple.sql
index fd5328faf15..ed24b213b1c 100644
--- a/tests/queries/0_stateless/03152_dynamic_type_simple.sql
+++ b/tests/queries/0_stateless/03152_dynamic_type_simple.sql
@@ -1,14 +1,17 @@
 SET allow_experimental_dynamic_type=1;
 
+DROP TABLE IF EXISTS test_max_types;
 CREATE TABLE test_max_types (d Dynamic(max_types=5)) ENGINE = Memory;
 INSERT INTO test_max_types VALUES ('string1'), (42), (3.14), ([1, 2]), (toDate('2021-01-01')), ('string2');
 SELECT d, dynamicType(d) FROM test_max_types;
 
 SELECT '';
+DROP TABLE IF EXISTS test_nested_dynamic;
 CREATE TABLE test_nested_dynamic (d1 Dynamic, d2 Dynamic(max_types=2)) ENGINE = Memory;
 INSERT INTO test_nested_dynamic VALUES (NULL, 42), (42, 'string'), ('string', [1, 2]), ([1, 2], NULL);
 SELECT d1, dynamicType(d1), d2, dynamicType(d2) FROM test_nested_dynamic;
 
+DROP TABLE IF EXISTS test_rapid_schema;
 CREATE TABLE test_rapid_schema (d Dynamic) ENGINE = Memory;
 INSERT INTO test_rapid_schema VALUES (42), ('string1'), (toDate('2021-01-01')), ([1, 2, 3]), (3.14), ('string2'), (toDateTime('2021-01-01 12:00:00')), (['array', 'of', 'strings']), (NULL), (toFloat64(42.42));
 
@@ -27,3 +30,8 @@ FROM
         FROM numbers(10000)
     )
 );
+
+DROP TABLE test_max_types;
+DROP TABLE test_nested_dynamic;
+DROP TABLE test_rapid_schema;
+
diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.sql b/tests/queries/0_stateless/03153_dynamic_type_empty.sql
index 8e942fe6f6e..3a0c98e63ee 100644
--- a/tests/queries/0_stateless/03153_dynamic_type_empty.sql
+++ b/tests/queries/0_stateless/03153_dynamic_type_empty.sql
@@ -1,5 +1,7 @@
 SET allow_experimental_dynamic_type=1;
 
+DROP TABLE IF EXISTS test_null_empty;
 CREATE TABLE test_null_empty (d Dynamic) ENGINE = Memory;
 INSERT INTO test_null_empty VALUES ([]), ([1]), ([]), (['1']), ([]), (()),((1)), (()), (('1')), (()), ({}), ({1:2}), ({}), ({'1':'2'}), ({});
 SELECT d, dynamicType(d) FROM test_null_empty;
+DROP TABLE test_null_empty;
diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
index d302205ca23..fffea1bd0f5 100644
--- a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
@@ -6,7 +6,7 @@ SET allow_experimental_variant_type=1;
 SET allow_suspicious_low_cardinality_types=1;
 
 
-CREATE TABLE t (d Dynamic(max_types=255)) ENGINE = Memory;
+CREATE TABLE t (d Dynamic(max_types=254)) ENGINE = Memory;
 -- Integer types: signed and unsigned integers (UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256)
 INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8);
 INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8);
@@ -84,7 +84,7 @@ INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, [
 
 SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d;
 
-CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory;
+CREATE TABLE t2 (d Dynamic(max_types=254)) ENGINE = Memory;
 INSERT INTO t2 SELECT * FROM t;
 
 SELECT '';
diff --git a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh
index 9b57e5c8718..b9bab2bd70b 100755
--- a/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh
+++ b/tests/queries/0_stateless/03172_dynamic_binary_serialization.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 $CLICKHOUSE_CLIENT -q "drop table if exists test"
-$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "create table test (id UInt64, d Dynamic(max_types=255)) engine=Memory"
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "create table test (id UInt64, d Dynamic(max_types=254)) engine=Memory"
 
 $CLICKHOUSE_CLIENT -q "insert into test select 0, NULL"
 $CLICKHOUSE_CLIENT -q "insert into test select 1, materialize(42)::UInt8"
@@ -58,6 +58,6 @@ $CLICKHOUSE_CLIENT -q "insert into test select 47, materialize([[(20, 20), (50,
 $CLICKHOUSE_CLIENT -q "insert into test select 48, materialize([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]])::MultiPolygon"
 $CLICKHOUSE_CLIENT -q "insert into test select 49, materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])"
 
-$CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=255)' -q "select d, dynamicType(d) from table order by id"
+$CLICKHOUSE_CLIENT -q "select * from test format RowBinary" | $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --input-format RowBinary --structure 'id UInt64, d Dynamic(max_types=254)' -q "select d, dynamicType(d) from table order by id"
 $CLICKHOUSE_CLIENT -q "drop table test"
 
diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference
index 6d2c1334d6e..8d2470dea44 100644
--- a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference
+++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.reference
@@ -1,10 +1,10 @@
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
+0
+1
+2
+3
+4
+0
+1
+2
+3
+4
diff --git a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql
index 25f3bb0f4c8..939b49e1599 100644
--- a/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql
+++ b/tests/queries/0_stateless/03200_memory_engine_alter_dynamic.sql
@@ -1,6 +1,8 @@
 set allow_experimental_dynamic_type=1;
+drop table if exists test;
 create table test (d Dynamic) engine=Memory;
 insert into table test select * from numbers(5);
-alter table test modify column d Dynamic(max_types=1);
+alter table test modify column d Dynamic(max_types=0);
 select d.UInt64 from test settings enable_analyzer=1;
 select d.UInt64 from test settings enable_analyzer=0;
+drop table test;

From c1c32daf01bba08129dd17e2b3a108cd7e837528 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 9 Aug 2024 21:59:40 +0000
Subject: [PATCH 179/265] Fix tests

---
 .../Serializations/SerializationDynamic.cpp   |  2 +-
 tests/queries/0_stateless/00000_test.sql      | 43 -------------------
 ...native_with_binary_encoded_types.reference |  4 +-
 ...ry_and_native_with_binary_encoded_types.sh |  6 +--
 4 files changed, 6 insertions(+), 49 deletions(-)
 delete mode 100644 tests/queries/0_stateless/00000_test.sql

diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index 67b29750948..5fadb6e4de4 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -130,7 +130,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
         for (const auto & variant: variants)
         {
             if (variant->getName() != ColumnDynamic::getSharedVariantTypeName())
-                encodeDataType(dynamic_state->variant_type, *stream);
+                encodeDataType(variant, *stream);
         }
     }
     else
diff --git a/tests/queries/0_stateless/00000_test.sql b/tests/queries/0_stateless/00000_test.sql
deleted file mode 100644
index db9dd774484..00000000000
--- a/tests/queries/0_stateless/00000_test.sql
+++ /dev/null
@@ -1,43 +0,0 @@
-set allow_experimental_variant_type = 1;
-set use_variant_as_common_type = 1;
-set allow_experimental_dynamic_type = 1;
-set enable_named_columns_in_function_tuple = 0;
-drop table if exists test;
-create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;
-
-
-system stop merges test;
-insert into test select number, number from numbers(10);
-insert into test select number, tuple(if(number % 3 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(10);
-insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(5);
-insert into test select number, multiIf(number % 5 == 0, tuple(if(number % 3 == 0, toDateTime(number), toIPv4(number)))::Tuple(a Dynamic(max_types=3)), number % 5 == 1 or number % 5 == 2, number, 'str_' || number) from numbers(10);
-
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
-system start merges test;
-optimize table test final;
-select '---------------------';
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
-
-system stop merges test;
-insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(5);
-insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(20);
-
-select '---------------------';
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
-system start merges test;
-optimize table test final;
-select '---------------------';
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
-
-system stop merges test;
-insert into test select number, tuple(toDateTime(number))::Tuple(a Dynamic(max_types=3)) from numbers(4);
-
-select '---------------------';
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
-system start merges test;
-optimize table test final;
-select '---------------------';
-select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type, isDynamicElementInSharedVariant(d.`Tuple(a Dynamic(max_types=3))`.a) as flag from test group by type, flag order by count(), type;
-
-drop table test;
-
diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference
index 1ba147f9627..7de0804e0f2 100644
--- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference
+++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.reference
@@ -110,5 +110,5 @@ true	Bool
 [{42:(1,[(2,{1:2})])}]	Dynamic
 [{42:(1,[(2,{1:2})])}]	Dynamic(max_types=10)
 [{42:(1,[(2,{1:2})])}]	Dynamic(max_types=10)
-[{42:(1,[(2,{1:2})])}]	Dynamic(max_types=255)
-[{42:(1,[(2,{1:2})])}]	Dynamic(max_types=255)
+[{42:(1,[(2,{1:2})])}]	Dynamic(max_types=254)
+[{42:(1,[(2,{1:2})])}]	Dynamic(max_types=254)
diff --git a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh
index 0c585d36348..1e674a29072 100755
--- a/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh
+++ b/tests/queries/0_stateless/03173_row_binary_and_native_with_binary_encoded_types.sh
@@ -6,8 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 function test
 {
-    $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table"
-    $CLICKHOUSE_LOCAL --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table"
+    $CLICKHOUSE_LOCAL --stacktrace --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_binary_encode_types_in_binary_format=1 -q "select $1 as value format RowBinaryWithNamesAndTypes" | $CLICKHOUSE_LOCAL --input-format RowBinaryWithNamesAndTypes --input_format_binary_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table"
+    $CLICKHOUSE_LOCAL --stacktrace --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --output_format_native_encode_types_in_binary_format=1 -q "select $1 as value format Native" | $CLICKHOUSE_LOCAL --input-format Native --input_format_native_decode_types_in_binary_format=1 -q "select value, toTypeName(value) from table"
 }
 
 test "materialize(42)::UInt8"
@@ -66,4 +66,4 @@ test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])"
 test "materialize(42::UInt32)::Variant(UInt32, String, Tuple(a UInt32, b Array(Map(String, String))))"
 test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic"
 test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=10)"
-test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=255)"
+test "materialize([map(42, tuple(1, [tuple(2, map(1, 2))]))])::Dynamic(max_types=254)"

From 805a2e33bfedbdfc2393217e7b485761b1943e1d Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Sat, 10 Aug 2024 00:47:43 +0000
Subject: [PATCH 180/265] Fix unit tests

---
 src/Columns/tests/gtest_column_dynamic.cpp | 74 +++++++++++-----------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp
index 5445bd525d9..de76261229d 100644
--- a/src/Columns/tests/gtest_column_dynamic.cpp
+++ b/src/Columns/tests/gtest_column_dynamic.cpp
@@ -7,7 +7,7 @@ using namespace DB;
 
 TEST(ColumnDynamic, CreateEmpty)
 {
-    auto column = ColumnDynamic::create(255);
+    auto column = ColumnDynamic::create(254);
     ASSERT_TRUE(column->empty());
     ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)");
     ASSERT_EQ(column->getVariantInfo().variant_names.size(), 1);
@@ -19,7 +19,7 @@ TEST(ColumnDynamic, CreateEmpty)
 
 TEST(ColumnDynamic, InsertDefault)
 {
-    auto column = ColumnDynamic::create(255);
+    auto column = ColumnDynamic::create(254);
     column->insertDefault();
     ASSERT_TRUE(column->size() == 1);
     ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(SharedVariant)");
@@ -34,7 +34,7 @@ TEST(ColumnDynamic, InsertDefault)
 
 TEST(ColumnDynamic, InsertFields)
 {
-    auto column = ColumnDynamic::create(255);
+    auto column = ColumnDynamic::create(254);
     column->insert(Field(42));
     column->insert(Field(-42));
     column->insert(Field("str1"));
@@ -56,7 +56,7 @@ TEST(ColumnDynamic, InsertFields)
 
 ColumnDynamic::MutablePtr getDynamicWithManyVariants(size_t num_variants, Field tuple_element = Field(42))
 {
-    auto column = ColumnDynamic::create(255);
+    auto column = ColumnDynamic::create(254);
     for (size_t i = 0; i != num_variants; ++i)
     {
         Tuple tuple;
@@ -136,7 +136,7 @@ TEST(ColumnDynamic, InsertFieldsOverflow2)
 
 ColumnDynamic::MutablePtr getInsertFromColumn(size_t num = 1)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     for (size_t i = 0; i != num; ++i)
     {
         column_from->insert(Field(42));
@@ -170,13 +170,13 @@ void checkInsertFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynami
 
 TEST(ColumnDynamic, InsertFrom1)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertFrom2)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(42));
     column_to->insert(Field(42.42));
     column_to->insert(Field("str"));
@@ -186,7 +186,7 @@ TEST(ColumnDynamic, InsertFrom2)
 
 TEST(ColumnDynamic, InsertFrom3)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(42));
     column_to->insert(Field(42.42));
     column_to->insert(Field("str"));
@@ -197,7 +197,7 @@ TEST(ColumnDynamic, InsertFrom3)
 
 TEST(ColumnDynamic, InsertFromOverflow1)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
     column_from->insert(Field("str"));
@@ -227,7 +227,7 @@ TEST(ColumnDynamic, InsertFromOverflow1)
 
 TEST(ColumnDynamic, InsertFromOverflow2)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
 
@@ -251,7 +251,7 @@ TEST(ColumnDynamic, InsertFromOverflow3)
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
 
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(41));
 
     column_to->insertFrom(*column_from, 0);
@@ -297,13 +297,13 @@ void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDy
 
 TEST(ColumnDynamic, InsertManyFrom1)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertManyFrom2)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(42));
     column_to->insert(Field(42.42));
     column_to->insert(Field("str"));
@@ -313,7 +313,7 @@ TEST(ColumnDynamic, InsertManyFrom2)
 
 TEST(ColumnDynamic, InsertManyFrom3)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(42));
     column_to->insert(Field(42.42));
     column_to->insert(Field("str"));
@@ -324,7 +324,7 @@ TEST(ColumnDynamic, InsertManyFrom3)
 
 TEST(ColumnDynamic, InsertManyFromOverflow1)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
     column_from->insert(Field("str"));
@@ -361,7 +361,7 @@ TEST(ColumnDynamic, InsertManyFromOverflow1)
 
 TEST(ColumnDynamic, InsertManyFromOverflow2)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
 
@@ -393,7 +393,7 @@ TEST(ColumnDynamic, InsertManyFromOverflow3)
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
 
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(41));
 
     column_to->insertManyFrom(*column_from, 0, 2);
@@ -441,13 +441,13 @@ void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnD
 
 TEST(ColumnDynamic, InsertRangeFrom1)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, SharedVariant, String)", {"Float64", "Int8", "SharedVariant", "String"}, {{"Float64", 0}, {"Int8", 1}, {"SharedVariant", 2}, {"String", 3}});
 }
 
 TEST(ColumnDynamic, InsertRangeFrom2)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(42));
     column_to->insert(Field(42.42));
     column_to->insert(Field("str1"));
@@ -457,7 +457,7 @@ TEST(ColumnDynamic, InsertRangeFrom2)
 
 TEST(ColumnDynamic, InsertRangeFrom3)
 {
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(42));
     column_to->insert(Field(42.42));
     column_to->insert(Field("str1"));
@@ -468,7 +468,7 @@ TEST(ColumnDynamic, InsertRangeFrom3)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow1)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(43));
     column_from->insert(Field(42.42));
@@ -494,7 +494,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow1)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow2)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(43));
     column_from->insert(Field(42.42));
@@ -516,7 +516,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow2)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow3)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(43));
     column_from->insert(Field(42.42));
@@ -539,7 +539,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow3)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow4)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
     column_from->insert(Field("str"));
@@ -561,7 +561,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow4)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow5)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(43));
     column_from->insert(Field(42.42));
@@ -587,7 +587,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow5)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow6)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(43));
     column_from->insert(Field(44));
@@ -619,7 +619,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow7)
 {
-    auto column_from = ColumnDynamic::create(3);
+    auto column_from = ColumnDynamic::create(2);
     column_from->insert(Field(42.42));
     column_from->insert(Field("str1"));
     column_from->insert(Field(42));
@@ -629,7 +629,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow7)
     column_from->insert(Field("str2"));
     column_from->insert(Field(Array({Field(42)})));
 
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     column_to->insert(Field(42));
 
     column_to->insertRangeFrom(*column_from, 0, 8);
@@ -659,7 +659,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow7)
 
 TEST(ColumnDynamic, InsertRangeFromOverflow8)
 {
-    auto column_from = ColumnDynamic::create(3);
+    auto column_from = ColumnDynamic::create(2);
     column_from->insert(Field(42.42));
     column_from->insert(Field("str1"));
     column_from->insert(Field(42));
@@ -669,7 +669,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow8)
     column_from->insert(Field("str2"));
     column_from->insert(Field(Array({Field(42)})));
 
-    auto column_to = ColumnDynamic::create(3);
+    auto column_to = ColumnDynamic::create(2);
     column_to->insert(Field(42));
     column_from->insert(Field("str1"));
 
@@ -711,7 +711,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow9)
     column_from->insert(Field("str2"));
     column_from->insert(Field(Array({Field(42)})));
 
-    auto column_to = ColumnDynamic::create(3);
+    auto column_to = ColumnDynamic::create(2);
     column_to->insert(Field(42));
 
     column_to->insertRangeFrom(*column_from, 0, 9);
@@ -743,7 +743,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow9)
 
 TEST(ColumnDynamic, SerializeDeserializeFromArena1)
 {
-    auto column = ColumnDynamic::create(255);
+    auto column = ColumnDynamic::create(254);
     column->insert(Field(42));
     column->insert(Field(42.42));
     column->insert(Field("str"));
@@ -768,7 +768,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena1)
 
 TEST(ColumnDynamic, SerializeDeserializeFromArena2)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
     column_from->insert(Field("str"));
@@ -781,7 +781,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena2)
     column_from->serializeValueIntoArena(2, arena, pos);
     column_from->serializeValueIntoArena(3, arena, pos);
 
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     pos = column_to->deserializeAndInsertFromArena(ref1.data);
     pos = column_to->deserializeAndInsertFromArena(pos);
     pos = column_to->deserializeAndInsertFromArena(pos);
@@ -800,7 +800,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArena2)
 
 TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow1)
 {
-    auto column_from = ColumnDynamic::create(255);
+    auto column_from = ColumnDynamic::create(254);
     column_from->insert(Field(42));
     column_from->insert(Field(42.42));
     column_from->insert(Field("str"));
@@ -846,7 +846,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow2)
     column_from->serializeValueIntoArena(3, arena, pos);
     column_from->serializeValueIntoArena(4, arena, pos);
 
-    auto column_to = ColumnDynamic::create(3);
+    auto column_to = ColumnDynamic::create(2);
     column_to->insert(Field(42.42));
     pos = column_to->deserializeAndInsertFromArena(ref1.data);
     pos = column_to->deserializeAndInsertFromArena(pos);
@@ -882,7 +882,7 @@ TEST(ColumnDynamic, skipSerializedInArena)
     auto ref4 = column_from->serializeValueIntoArena(3, arena, pos);
 
     const char * end = ref4.data + ref4.size;
-    auto column_to = ColumnDynamic::create(255);
+    auto column_to = ColumnDynamic::create(254);
     pos = column_to->skipSerializedInArena(ref1.data);
     pos = column_to->skipSerializedInArena(pos);
     pos = column_to->skipSerializedInArena(pos);

From c26b3cb4452931ee3bb3355b47dafb364744c9ab Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Sat, 10 Aug 2024 02:27:23 +0000
Subject: [PATCH 181/265] handle the case of packed storage

---
 src/Storages/MergeTree/MutateTask.cpp                 | 11 ++++++++---
 .../03161_lightweight_delete_projection.sql           |  4 +++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 3d9f49c9a7a..0f0428287b6 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1554,6 +1554,10 @@ private:
                 removed_projections.insert(command.column_name);
         }
 
+        bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
+        bool lightweight_delete_drop = lightweight_delete_mode
+            && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP;
+
         const auto & projections = ctx->metadata_snapshot->getProjections();
         for (const auto & projection : projections)
         {
@@ -1561,10 +1565,11 @@ private:
                 continue;
 
             bool need_recalculate =
-                ctx->materialized_projections.contains(projection.name)
+                (ctx->materialized_projections.contains(projection.name)
                 || (!is_full_part_storage
                     && ctx->source_part->hasProjection(projection.name)
-                    && !ctx->source_part->hasBrokenProjection(projection.name));
+                    && !ctx->source_part->hasBrokenProjection(projection.name)))
+                && !lightweight_delete_drop;
 
             if (need_recalculate)
             {
@@ -1572,7 +1577,7 @@ private:
             }
             else
             {
-                if (!ctx->updated_header.has(RowExistsColumn::name) && ctx->source_part->checksums.has(projection.getDirectoryName()))
+                if (!lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName()))
                     entries_to_hardlink.insert(projection.getDirectoryName());
             }
         }
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
index 0b05326e2c1..da6427cbf22 100644
--- a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -1,5 +1,7 @@
+-- For cloud version, should also consider min_bytes_for_full_part_storage since packed storage exists,
+-- but for less redundancy, just let CI test the parameter.
 
-SET max_threads = 1, lightweight_deletes_sync = 2, alter_sync = 2;
+SET lightweight_deletes_sync = 2, alter_sync = 2;
 
 DROP TABLE IF EXISTS users_compact;
 

From d243feea2136bbfa5f1e943f64e5ebd851f2b103 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Sat, 10 Aug 2024 11:21:28 +0000
Subject: [PATCH 182/265] Fix special builds

---
 src/Columns/ColumnDynamic.cpp | 8 ++++----
 src/Columns/ColumnDynamic.h   | 2 +-
 src/Columns/ColumnVariant.cpp | 2 +-
 src/Columns/ColumnVariant.h   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 454f7956f48..7246be29592 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -325,7 +325,7 @@ void ColumnDynamic::doInsertFrom(const IColumn & src_, size_t n)
     /// Check if we insert from shared variant and process it separately.
     if (src_global_discr == dynamic_src.getSharedVariantDiscriminator())
     {
-        auto & src_shared_variant = dynamic_src.getSharedVariant();
+        const auto & src_shared_variant = dynamic_src.getSharedVariant();
         auto value = src_shared_variant.getDataAt(src_offset);
         /// Decode data type of this value.
         ReadBufferFromMemory buf(value.data, value.size);
@@ -469,9 +469,9 @@ void ColumnDynamic::doInsertRangeFrom(const IColumn & src_, size_t start, size_t
     if (variant_info.variant_names.size() - 1 == max_dynamic_types)
     {
         auto shared_variant_discr = getSharedVariantDiscriminator();
-        for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i)
+        for (const auto & variant_name : dynamic_src.variant_info.variant_names)
         {
-            auto it = variant_info.variant_name_to_discriminator.find(dynamic_src.variant_info.variant_names[i]);
+            auto it = variant_info.variant_name_to_discriminator.find(variant_name);
             if (it == variant_info.variant_name_to_discriminator.end())
                 other_to_new_discriminators.push_back(shared_variant_discr);
             else
@@ -618,7 +618,7 @@ void ColumnDynamic::doInsertManyFrom(const IColumn & src_, size_t position, size
     /// Check if we insert from shared variant and process it separately.
     if (src_global_discr == dynamic_src.getSharedVariantDiscriminator())
     {
-        auto & src_shared_variant = dynamic_src.getSharedVariant();
+        const auto & src_shared_variant = dynamic_src.getSharedVariant();
         auto value = src_shared_variant.getDataAt(src_offset);
         /// Decode data type of this value.
         ReadBufferFromMemory buf(value.data, value.size);
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index 8b815e2b015..a595a990964 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -37,7 +37,7 @@ public:
             MERGE, /// Statistics were calculated during merge of several MergeTree parts.
         };
 
-        Statistics(Source source_) : source(source_) {}
+        explicit Statistics(Source source_) : source(source_) {}
 
         /// Source of the statistics.
         Source source;
diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp
index 7531e976926..0402e1a0690 100644
--- a/src/Columns/ColumnVariant.cpp
+++ b/src/Columns/ColumnVariant.cpp
@@ -476,7 +476,7 @@ void ColumnVariant::insertFromImpl(const DB::IColumn & src_, size_t n, const std
     }
 }
 
-void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, Discriminator * skip_discriminator)
+void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, const Discriminator * skip_discriminator)
 {
     const size_t num_variants = variants.size();
     const auto & src = assert_cast<const ColumnVariant &>(src_);
diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h
index 571a843d113..7c8093e385d 100644
--- a/src/Columns/ColumnVariant.h
+++ b/src/Columns/ColumnVariant.h
@@ -329,7 +329,7 @@ public:
 
 private:
     void insertFromImpl(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
-    void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, Discriminator * skip_discriminator);
+    void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping, const Discriminator * skip_discriminator);
     void insertManyFromImpl(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
 
     void initIdentityGlobalToLocalDiscriminatorsMapping();

From 957a0b6ea4c3e262a5c1fa664d81ab31d7e0d757 Mon Sep 17 00:00:00 2001
From: sakulali <sakulali.dev@gmail.com>
Date: Sun, 11 Aug 2024 00:12:36 +0800
Subject: [PATCH 183/265] Add a setting query_cache_tag

---
 docs/en/operations/query-cache.md             | 10 ++++++
 docs/en/operations/settings/settings.md       | 11 +++++++
 .../operations/system-tables/query_cache.md   |  2 ++
 src/Core/Settings.h                           |  1 +
 src/Core/SettingsChangesHistory.cpp           |  1 +
 src/Interpreters/Cache/QueryCache.cpp         | 16 +++++++---
 src/Interpreters/Cache/QueryCache.h           | 13 ++++++--
 src/Interpreters/executeQuery.cpp             |  5 +--
 .../System/StorageSystemQueryCache.cpp        |  5 ++-
 .../02494_query_cache_tag.reference           | 14 ++++++++
 .../0_stateless/02494_query_cache_tag.sql     | 32 +++++++++++++++++++
 11 files changed, 100 insertions(+), 10 deletions(-)
 create mode 100644 tests/queries/0_stateless/02494_query_cache_tag.reference
 create mode 100644 tests/queries/0_stateless/02494_query_cache_tag.sql

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index 7a920671fc2..a6c4d74f4ac 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -143,6 +143,16 @@ value can be specified at session, profile or query level using setting [query_c
 Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
 from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
 
+Entries in the query cache can separate by tag, using setting [query_cache_tag](settings/settings.md#query-cache-tag). Queries with different tags are considered different entries. For example, the result of query
+
+``` sql
+SELECT 1 SETTINGS use_query_cache = true;
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one';
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one diff';
+```
+
+have different entries in the query cache, find the specified tag in system table [system.query_cache](system-tables/query_cache.md)
+
 ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
 etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
 [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index e432f4e038f..7b855665efb 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1800,6 +1800,17 @@ Possible values:
 
 Default value: `0`.
 
+## query_cache_tag {#query-cache-tag}
+
+An arbitrary string to separate entries in the [query cache](../query-cache.md).
+Queries with different values of this setting are considered different.
+
+Possible values:
+
+- string: name of query cache tag
+
+Default value: `''`.
+
 ## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
 
 The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
diff --git a/docs/en/operations/system-tables/query_cache.md b/docs/en/operations/system-tables/query_cache.md
index a9f86f5fc2b..393b37d3616 100644
--- a/docs/en/operations/system-tables/query_cache.md
+++ b/docs/en/operations/system-tables/query_cache.md
@@ -14,6 +14,7 @@ Columns:
 - `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
 - `expires_at` ([DateTime](../../sql-reference/data-types/datetime.md)) — When the query cache entry becomes stale.
 - `key_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — A hash of the query string, used as a key to find query cache entries.
+- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — An arbitrary string to separate entries in the query cache.
 
 **Example**
 
@@ -31,6 +32,7 @@ shared:      0
 compressed:  1
 expires_at:  2023-10-13 13:35:45
 key_hash:    12188185624808016954
+tag:
 
 1 row in set. Elapsed: 0.004 sec.
 ```
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4559cc67b35..ed58f8041d0 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -676,6 +676,7 @@ class IColumn;
     M(Bool, query_cache_squash_partial_results, true, "Squash partial result blocks to blocks of size 'max_block_size'. Reduces performance of inserts into the query cache but improves the compressability of cache entries.", 0) \
     M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \
     M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
+    M(String, query_cache_tag, "", "An arbitrary string to separate entries in the query cache. Queries with different values of this setting are considered different.", 0) \
     M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \
     \
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index c6392044f72..49a325b07b1 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -84,6 +84,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
             {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
             {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
+            {"query_cache_tag", "", "", "Add a setting to separate entries in the query cache using an arbitrary string. Queries with different values of this setting are considered different."},
         }
     },
     {"24.7",
diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index a3fe8c2e779..c9ced873643 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -182,7 +182,7 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
     return transformed_ast;
 }
 
-IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings)
+IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings, const String & tag)
 {
     ast = removeQueryCacheSettings(ast);
 
@@ -194,6 +194,10 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const S
     /// tables (issue #64136)
     hash.update(current_database);
 
+    /// Need to hash the tag since queries with different tags are considered different query cache entries.
+    if (!tag.empty())
+        hash.update(tag);
+
     /// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`).
     /// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order.
     ///       Therefore, collect and sort the settings first, then hash them.
@@ -233,8 +237,9 @@ QueryCache::Key::Key(
     std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
     bool is_shared_,
     std::chrono::time_point<std::chrono::system_clock> expires_at_,
-    bool is_compressed_)
-    : ast_hash(calculateAstHash(ast_, current_database, settings))
+    bool is_compressed_,
+    const String & tag_)
+    : ast_hash(calculateAstHash(ast_, current_database, settings, tag_))
     , header(header_)
     , user_id(user_id_)
     , current_user_roles(current_user_roles_)
@@ -242,11 +247,12 @@ QueryCache::Key::Key(
     , expires_at(expires_at_)
     , is_compressed(is_compressed_)
     , query_string(queryStringFromAST(ast_))
+    , tag(tag_)
 {
 }
 
-QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
-    : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
+QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_, const String & tag_)
+    : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false, tag_) /// dummy values for everything != AST, current database, user name/roles
 {
 }
 
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index 461197cac32..54de5edb145 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -88,6 +88,10 @@ public:
         /// SYSTEM.QUERY_CACHE.
         const String query_string;
 
+        /// An arbitrary string to separate entries in the query cache.
+        /// Queries with different values of this setting are considered different.
+        const String tag;
+
         /// Ctor to construct a Key for writing into query cache.
         Key(ASTPtr ast_,
             const String & current_database,
@@ -96,10 +100,15 @@ public:
             std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
             bool is_shared_,
             std::chrono::time_point<std::chrono::system_clock> expires_at_,
-            bool is_compressed);
+            bool is_compressed,
+            const String & tag_);
 
         /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
-        Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
+        Key(ASTPtr ast_,
+            const String & current_database,
+            const Settings & settings,
+            std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
+            const String & tag_);
 
         bool operator==(const Key & other) const;
     };
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index fe87eed5570..6422d3128fa 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1129,7 +1129,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             {
                 if (can_use_query_cache && settings.enable_reads_from_query_cache)
                 {
-                    QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles());
+                    QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles(), settings.query_cache_tag);
                     QueryCache::Reader reader = query_cache->createReader(key);
                     if (reader.hasCacheEntryForKey())
                     {
@@ -1258,7 +1258,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                                 context->getUserID(), context->getCurrentRoles(),
                                 settings.query_cache_share_between_users,
                                 std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
-                                settings.query_cache_compress_entries);
+                                settings.query_cache_compress_entries,
+                                settings.query_cache_tag);
 
                             const size_t num_query_runs = settings.query_cache_min_query_runs ? query_cache->recordQueryRun(key) : 1; /// try to avoid locking a mutex in recordQueryRun()
                             if (num_query_runs <= settings.query_cache_min_query_runs)
diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp
index 4c54d4ae16f..f81d50e8806 100644
--- a/src/Storages/System/StorageSystemQueryCache.cpp
+++ b/src/Storages/System/StorageSystemQueryCache.cpp
@@ -1,6 +1,7 @@
 #include "StorageSystemQueryCache.h"
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/Cache/QueryCache.h>
 #include <Interpreters/Context.h>
@@ -19,7 +20,8 @@ ColumnsDescription StorageSystemQueryCache::getColumnsDescription()
         {"shared", std::make_shared<DataTypeUInt8>(), "If the query cache entry is shared between multiple users."},
         {"compressed", std::make_shared<DataTypeUInt8>(), "If the query cache entry is compressed."},
         {"expires_at", std::make_shared<DataTypeDateTime>(), "When the query cache entry becomes stale."},
-        {"key_hash", std::make_shared<DataTypeUInt64>(), "A hash of the query string, used as a key to find query cache entries."}
+        {"key_hash", std::make_shared<DataTypeUInt64>(), "A hash of the query string, used as a key to find query cache entries."},
+        {"tag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "An arbitrary string to separate entries in the query cache."}
     };
 }
 
@@ -56,6 +58,7 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr
         res_columns[4]->insert(key.is_compressed);
         res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at));
         res_columns[6]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258)
+        res_columns[7]->insert(key.tag);
     }
 }
 
diff --git a/tests/queries/0_stateless/02494_query_cache_tag.reference b/tests/queries/0_stateless/02494_query_cache_tag.reference
new file mode 100644
index 00000000000..055d3d4c5bb
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_tag.reference
@@ -0,0 +1,14 @@
+1
+1
+---
+1
+1
+1
+2
+---
+1
+1
+1
+2
+1
+3
diff --git a/tests/queries/0_stateless/02494_query_cache_tag.sql b/tests/queries/0_stateless/02494_query_cache_tag.sql
new file mode 100644
index 00000000000..054607058e8
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_tag.sql
@@ -0,0 +1,32 @@
+-- Tags: no-parallel
+-- Tag no-parallel: Messes with internal cache
+
+SYSTEM DROP QUERY CACHE;
+
+-- Cache the query after the query invocation
+SELECT 1 SETTINGS use_query_cache = true;
+SELECT COUNT(*) FROM system.query_cache;
+
+SELECT '---';
+
+SYSTEM DROP QUERY CACHE;
+
+-- Queries with tag value of this setting or not are considered different cache entries.
+SELECT 1 SETTINGS use_query_cache = true;
+SELECT COUNT(*) FROM system.query_cache;
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one';
+SELECT COUNT(*) FROM system.query_cache;
+
+SELECT '---';
+
+SYSTEM DROP QUERY CACHE;
+
+-- Queries with different tags values of this setting are considered different cache entries.
+SELECT 1 SETTINGS use_query_cache = true;
+SELECT COUNT(*) FROM system.query_cache;
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one';
+SELECT COUNT(*) FROM system.query_cache;
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one diff';
+SELECT COUNT(*) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;

From 20b97a45bf3c73960e71e1a158cec35ec522ccff Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 12 Aug 2024 07:09:42 +0200
Subject: [PATCH 184/265] Fix fundamentally broken test CC @azat

---
 tests/integration/test_throttling/test.py | 62 ++++++++++-------------
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py
index c53c2bb1ddf..4bd96e2756d 100644
--- a/tests/integration/test_throttling/test.py
+++ b/tests/integration/test_throttling/test.py
@@ -121,21 +121,15 @@ def node_update_config(mode, setting, value=None):
     node.restart_clickhouse()
 
 
-def assert_took(took, should_took):
+def assert_took(took, should_take):
     # we need to decrease the lower limit because the server limits could
     # be enforced by throttling some server background IO instead of query IO
     # and we have no control over it
-    #
-    # and the same for upper limit, it can be slightly larger, due to for
-    # instance network latencies or CPU starvation
-    if should_took > 0:
-        assert took >= should_took * 0.85 and took <= should_took * 1.8
-    else:
-        assert took >= should_took * 0.85
+    assert took >= should_take * 0.85
 
 
 @pytest.mark.parametrize(
-    "policy,backup_name,mode,setting,value,should_took",
+    "policy,backup_name,mode,setting,value,should_take",
     [
         #
         # Local -> Local
@@ -149,7 +143,7 @@ def assert_took(took, should_took):
             0,
             id="no_local_throttling",
         ),
-        # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds
+        # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds
         pytest.param(
             "default",
             next_backup_name("local"),
@@ -159,7 +153,7 @@ def assert_took(took, should_took):
             7,
             id="user_local_throttling",
         ),
-        # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds
+        # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds
         pytest.param(
             "default",
             next_backup_name("local"),
@@ -181,7 +175,7 @@ def assert_took(took, should_took):
             0,
             id="no_remote_to_local_throttling",
         ),
-        # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds
+        # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds
         pytest.param(
             "s3",
             next_backup_name("local"),
@@ -191,7 +185,7 @@ def assert_took(took, should_took):
             7,
             id="user_remote_to_local_throttling",
         ),
-        # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds
+        # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds
         pytest.param(
             "s3",
             next_backup_name("local"),
@@ -252,7 +246,7 @@ def assert_took(took, should_took):
             0,
             id="no_local_to_remote_throttling",
         ),
-        # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds
+        # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds
         pytest.param(
             "default",
             next_backup_name("remote"),
@@ -262,7 +256,7 @@ def assert_took(took, should_took):
             7,
             id="user_local_to_remote_throttling",
         ),
-        # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds
+        # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds
         pytest.param(
             "default",
             next_backup_name("remote"),
@@ -274,7 +268,7 @@ def assert_took(took, should_took):
         ),
     ],
 )
-def test_backup_throttling(policy, backup_name, mode, setting, value, should_took):
+def test_backup_throttling(policy, backup_name, mode, setting, value, should_take):
     node_update_config(mode, setting, value)
     node.query(
         f"""
@@ -284,7 +278,7 @@ def test_backup_throttling(policy, backup_name, mode, setting, value, should_too
     """
     )
     _, took = elapsed(node.query, f"backup table data to {backup_name}")
-    assert_took(took, should_took)
+    assert_took(took, should_take)
 
 
 def test_backup_throttling_override():
@@ -305,18 +299,18 @@ def test_backup_throttling_override():
             "max_backup_bandwidth": "500K",
         },
     )
-    # reading 1e6*8 bytes with 500Ki default bandwith should take (8-0.5)/0.5=15 seconds
+    # reading 1e6*8 bytes with 500Ki default bandwidth should take (8-0.5)/0.5=15 seconds
     assert_took(took, 15)
 
 
 @pytest.mark.parametrize(
-    "policy,mode,setting,value,should_took",
+    "policy,mode,setting,value,should_take",
     [
         #
         # Local
         #
         pytest.param("default", None, None, None, 0, id="no_local_throttling"),
-        # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds
+        # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds
         pytest.param(
             "default",
             "user",
@@ -325,7 +319,7 @@ def test_backup_throttling_override():
             7,
             id="user_local_throttling",
         ),
-        # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds
+        # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds
         pytest.param(
             "default",
             "server",
@@ -338,7 +332,7 @@ def test_backup_throttling_override():
         # Remote
         #
         pytest.param("s3", None, None, None, 0, id="no_remote_throttling"),
-        # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds
+        # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds
         pytest.param(
             "s3",
             "user",
@@ -347,7 +341,7 @@ def test_backup_throttling_override():
             7,
             id="user_remote_throttling",
         ),
-        # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds
+        # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds
         pytest.param(
             "s3",
             "server",
@@ -358,7 +352,7 @@ def test_backup_throttling_override():
         ),
     ],
 )
-def test_read_throttling(policy, mode, setting, value, should_took):
+def test_read_throttling(policy, mode, setting, value, should_take):
     node_update_config(mode, setting, value)
     node.query(
         f"""
@@ -368,17 +362,17 @@ def test_read_throttling(policy, mode, setting, value, should_took):
     """
     )
     _, took = elapsed(node.query, f"select * from data")
-    assert_took(took, should_took)
+    assert_took(took, should_take)
 
 
 @pytest.mark.parametrize(
-    "policy,mode,setting,value,should_took",
+    "policy,mode,setting,value,should_take",
     [
         #
         # Local
         #
         pytest.param("default", None, None, None, 0, id="no_local_throttling"),
-        # reading 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds
+        # reading 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds
         pytest.param(
             "default",
             "user",
@@ -387,7 +381,7 @@ def test_read_throttling(policy, mode, setting, value, should_took):
             7,
             id="local_user_throttling",
         ),
-        # reading 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds
+        # reading 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds
         pytest.param(
             "default",
             "server",
@@ -400,7 +394,7 @@ def test_read_throttling(policy, mode, setting, value, should_took):
         # Remote
         #
         pytest.param("s3", None, None, None, 0, id="no_remote_throttling"),
-        # writing 1e6*8 bytes with 1M default bandwith should take (8-1)/1=7 seconds
+        # writing 1e6*8 bytes with 1M default bandwidth should take (8-1)/1=7 seconds
         pytest.param(
             "s3",
             "user",
@@ -409,7 +403,7 @@ def test_read_throttling(policy, mode, setting, value, should_took):
             7,
             id="user_remote_throttling",
         ),
-        # writing 1e6*8 bytes with 2M default bandwith should take (8-2)/2=3 seconds
+        # writing 1e6*8 bytes with 2M default bandwidth should take (8-2)/2=3 seconds
         pytest.param(
             "s3",
             "server",
@@ -420,7 +414,7 @@ def test_read_throttling(policy, mode, setting, value, should_took):
         ),
     ],
 )
-def test_write_throttling(policy, mode, setting, value, should_took):
+def test_write_throttling(policy, mode, setting, value, should_take):
     node_update_config(mode, setting, value)
     node.query(
         f"""
@@ -429,7 +423,7 @@ def test_write_throttling(policy, mode, setting, value, should_took):
     """
     )
     _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)")
-    assert_took(took, should_took)
+    assert_took(took, should_take)
 
 
 def test_max_mutations_bandwidth_for_server():
@@ -444,7 +438,7 @@ def test_max_mutations_bandwidth_for_server():
         node.query,
         "alter table data update key = -key where 1 settings mutations_sync = 1",
     )
-    # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds
+    # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds
     assert_took(took, 7)
 
 
@@ -457,5 +451,5 @@ def test_max_merges_bandwidth_for_server():
     )
     node.query("insert into data select * from numbers(1e6)")
     _, took = elapsed(node.query, "optimize table data final")
-    # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds
+    # reading 1e6*8 bytes with 1M/s bandwidth should take (8-1)/1=7 seconds
     assert_took(took, 7)

From 06ceaee50218507f49bfc714903240ad4b5d81a0 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Mon, 12 Aug 2024 11:09:45 +0000
Subject: [PATCH 185/265] Fix test
 01903_correct_block_size_prediction_with_default

- Don't allow random settings that affect the memory usage
- Run two queries and compare the memory usage, rather than
  having an arbitrary hardcoded value
---
 ...ock_size_prediction_with_default.reference |  6 ++++
 ...rect_block_size_prediction_with_default.sh | 36 +++++++++++++++++++
 ...ect_block_size_prediction_with_default.sql | 13 -------
 3 files changed, 42 insertions(+), 13 deletions(-)
 create mode 100755 tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
 delete mode 100644 tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql

diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference
index b70a1cb7c75..2c66db91737 100644
--- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference
+++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference
@@ -1,3 +1,9 @@
 8
+8
+1
 4
 4
+1
+4
+4
+1
diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
new file mode 100755
index 00000000000..922dcb957e5
--- /dev/null
+++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Tags: no-random-merge-tree-settings, no-random-settings
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+sql="toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]))"
+
+# Create the table and fill it
+$CLICKHOUSE_CLIENT -n --query="
+    CREATE TABLE test_extract(str String,  arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=(\"[^\"]*?\"|[^\",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple();
+    INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000);
+    ALTER TABLE test_extract ADD COLUMN 15Id Nullable(UInt16) DEFAULT $sql;"
+
+function test()
+{
+    # Execute two queries and compare if they have similar memory usage:
+    # The first query uses the default column value, while the second explicitly uses the same SQL as the default value.
+    # Follow https://github.com/ClickHouse/ClickHouse/issues/17317 for more info about the issue
+    where=$1
+
+    uuid_1=$($CLICKHOUSE_CLIENT --query="SELECT generateUUIDv4()")
+    $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1
+    uuid_2=$($CLICKHOUSE_CLIENT --query="SELECT generateUUIDv4()")
+    $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2
+    $CLICKHOUSE_CLIENT -n --query="
+        SYSTEM FLUSH LOGS;
+        WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE event_time > now() - INTERVAL 5 MINUTE AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
+             memory_2 AS (SELECT memory_usage FROM system.query_log WHERE event_time > now() - INTERVAL 5 MINUTE AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
+                SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage FROM memory_1, memory_2;"
+}
+
+test ""
+test "PREWHERE 15Id < 4"
+test "WHERE 15Id < 4"
diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql
deleted file mode 100644
index 2eec08635eb..00000000000
--- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql
+++ /dev/null
@@ -1,13 +0,0 @@
--- Tags: no-random-merge-tree-settings
-
-CREATE TABLE test_extract(str String,  arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=("[^"]*?"|[^",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple();
-
-INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000);
-
-ALTER TABLE test_extract ADD COLUMN `15Id` Nullable(UInt16) DEFAULT toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]));
-
-SELECT uniq(15Id) FROM test_extract SETTINGS max_threads=1, max_memory_usage=100000000;
-
-SELECT uniq(15Id) FROM test_extract PREWHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000;
-
-SELECT uniq(15Id) FROM test_extract WHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000;

From 38f3131e11d1d777101a975361eb585fd6263300 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 12 Aug 2024 11:39:36 +0000
Subject: [PATCH 186/265] Fix review comments

---
 src/Columns/ColumnDynamic.cpp                 | 43 +++++++++++--------
 src/Columns/ColumnDynamic.h                   | 12 ++++++
 src/DataTypes/DataTypeDynamic.cpp             |  4 +-
 .../Serializations/SerializationDynamic.cpp   |  2 +-
 src/Functions/FunctionsConversion.cpp         |  2 +-
 ..._read_shared_subcolumns_small.reference.j2 |  3 ++
 6 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 7246be29592..b1d28342a28 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -115,8 +115,8 @@ bool ColumnDynamic::addNewVariant(const DataTypePtr & new_variant, const String
     if (variant_info.variant_name_to_discriminator.contains(new_variant_name))
         return true;
 
-    /// Check if we reached maximum number of variants (don't count shared variant).
-    if (variant_info.variant_names.size() - 1 == max_dynamic_types)
+    /// Check if we reached maximum number of variants.
+    if (!canAddNewVariant())
     {
         /// Dynamic column should always have shared variant.
         if (!variant_info.variant_name_to_discriminator.contains(getSharedVariantTypeName()))
@@ -194,8 +194,8 @@ std::vector<ColumnVariant::Discriminator> * ColumnDynamic::combineVariants(const
     {
         const DataTypes & current_variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
 
-        /// We cannot combine Variants if total number of variants exceeds max_dynamic_types (don't count shared variant).
-        if (current_variants.size() + num_new_variants - 1 > max_dynamic_types)
+        /// We cannot combine Variants if total number of variants exceeds max_dynamic_types.
+        if (!canAddNewVariants(num_new_variants))
         {
             /// Remember that we cannot combine our variant with this one, so we will not try to do it again.
             variants_with_failed_combination.insert(other_variant_info.variant_name);
@@ -403,11 +403,11 @@ void ColumnDynamic::doInsertRangeFrom(const IColumn & src_, size_t start, size_t
         auto shared_variant_discr = getSharedVariantDiscriminator();
         variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping, shared_variant_discr);
 
-        /// We should process insertion from srs shared variant separately, because it can contain
+        /// We should process insertion from src shared variant separately, because it can contain
         /// values that should be extracted into our variants. insertRangeFrom above didn't insert
         /// values into our shared variant (we specified shared_variant_discr as special skip discriminator).
 
-        /// Check if srs shared variant is empty, nothing to do in this case.
+        /// Check if src shared variant is empty, nothing to do in this case.
         if (dynamic_src.getSharedVariant().empty())
             return;
 
@@ -466,7 +466,7 @@ void ColumnDynamic::doInsertRangeFrom(const IColumn & src_, size_t start, size_t
     other_to_new_discriminators.reserve(dynamic_src.variant_info.variant_names.size());
 
     /// Check if we cannot add any more new variants. In this case we will insert all new variants into shared variant.
-    if (variant_info.variant_names.size() - 1 == max_dynamic_types)
+    if (!canAddNewVariant())
     {
         auto shared_variant_discr = getSharedVariantDiscriminator();
         for (const auto & variant_name : dynamic_src.variant_info.variant_names)
@@ -496,7 +496,7 @@ void ColumnDynamic::doInsertRangeFrom(const IColumn & src_, size_t start, size_t
         /// Add new variants from sorted list until we reach max_dynamic_types.
         for (const auto & [_, discr] : new_variants_with_sizes)
         {
-            if (new_variants.size() - 1 == max_dynamic_types)
+            if (!canAddNewVariant(new_variants.size()))
                 break;
             new_variants.push_back(src_variants[discr]);
         }
@@ -846,13 +846,17 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_
     /// Check if both values are in shared variant.
     if (left_discr == left_shared_variant_discr && right_discr == right_shared_variant_discr)
     {
-        /// Extract type names from both values.
+        /// First check if both type and value are equal.
         auto left_value = getSharedVariant().getDataAt(left_variant.offsetAt(n));
+        auto right_value = right_dynamic.getSharedVariant().getDataAt(right_variant.offsetAt(m));
+        if (left_value == right_value)
+            return 0;
+
+        /// Extract type names from both values.
         ReadBufferFromMemory buf_left(left_value.data, left_value.size);
         auto left_data_type = decodeDataType(buf_left);
         auto left_data_type_name = left_data_type->getName();
 
-        auto right_value = right_dynamic.getSharedVariant().getDataAt(right_variant.offsetAt(m));
         ReadBufferFromMemory buf_right(right_value.data, right_value.size);
         auto right_data_type = decodeDataType(buf_right);
         auto right_data_type_name = right_data_type->getName();
@@ -977,8 +981,6 @@ ColumnPtr ColumnDynamic::compress() const
 
 void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
-    LOG_DEBUG(getLogger("ColumnDynamic"), "takeDynamicStructureFromSourceColumns");
-
     if (!empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Dynamic column");
 
@@ -1050,8 +1052,10 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
 
     DataTypePtr result_variant_type;
     Statistics new_statistics(Statistics::Source::MERGE);
+    /// Reset max_dynamic_types to global_max_dynamic_types.
+    max_dynamic_types = global_max_dynamic_types;
     /// Check if the number of all dynamic types exceeds the limit.
-    if (all_variants.size() - 1 > global_max_dynamic_types)
+    if (!canAddNewVariants(0, all_variants.size()))
     {
         /// Create list of variants with their sizes and sort it.
         std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
@@ -1065,11 +1069,13 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
 
         /// Take first max_dynamic_types variants from sorted list and fill shared_variants_statistics with the rest.
         DataTypes result_variants;
-        result_variants.reserve(global_max_dynamic_types + 1);
+        result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant.
+        /// Add shared variant.
+        result_variants.push_back(getSharedVariantDataType());
         for (const auto & [size, variant] : variants_with_sizes)
         {
             /// Add variant to the resulting variants list until we reach max_dynamic_types.
-            if (result_variants.size() < global_max_dynamic_types)
+            if (canAddNewVariant(result_variants.size()))
                 result_variants.push_back(variant);
             /// Add all remaining variants into shared_variants_statistics until we reach its max size.
             else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
@@ -1078,8 +1084,6 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
                 break;
         }
 
-        /// Add shared variant.
-        result_variants.push_back(getSharedVariantDataType());
         result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
     }
     else
@@ -1094,8 +1098,9 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
         new_statistics.variants_statistics[variant_name] = total_sizes[variant_name];
     statistics = std::make_shared<const Statistics>(std::move(new_statistics));
 
-    /// Reduce max_dynamic_types to the number of selected variants (without shared variant), so there will be no possibility
+    /// Reduce max_dynamic_types to the number of selected variants, so there will be no possibility
     /// to extend selected variants on inerts into this column during merges.
+    /// -1 because we don't count shared variant in the limit.
     max_dynamic_types = variant_info.variant_names.size() - 1;
 
     /// Now we have the resulting Variant that will be used in all merged columns.
@@ -1112,7 +1117,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
         {
             /// Try to find this variant in current source column.
             auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]);
-            if (it != source_variant_info.variant_name_to_discriminator.end())
+            if (it != source_variant_info.variant_name_to_discriminator.end())        /// Add shared variant.
                 variants_source_columns[i].push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second));
         }
     }
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index a595a990964..1f050c9079e 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -27,6 +27,10 @@ namespace DB
 class ColumnDynamic final : public COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>
 {
 public:
+    /// Maximum limit on dynamic types. We use ColumnVariant to store all the types,
+    /// so the limit cannot be greater then ColumnVariant::MAX_NESTED_COLUMNS.
+    /// We also always have reserved variant for shared variant.
+    static constexpr size_t MAX_DYNAMIC_TYPES_LIMIT = ColumnVariant::MAX_NESTED_COLUMNS - 1;
     static constexpr const char * SHARED_VARIANT_TYPE_NAME = "SharedVariant";
 
     struct Statistics
@@ -359,6 +363,14 @@ public:
 
     size_t getMaxDynamicTypes() const { return max_dynamic_types; }
 
+    /// Check if we can add new variant types.
+    /// Shared variant doesn't count in the limit but always presents,
+    /// so we should subtract 1 from the total types count.
+    bool canAddNewVariants(size_t current_variants_count, size_t new_variants_count) { return current_variants_count + new_variants_count - 1 <= max_dynamic_types; }
+    bool canAddNewVariant(size_t current_variants_count) { return canAddNewVariants(current_variants_count, 1); }
+    bool canAddNewVariants(size_t new_variants_count) { return canAddNewVariants(variant_info.variant_names.size(), new_variants_count); }
+    bool canAddNewVariant() { return canAddNewVariants(variant_info.variant_names.size(), 1); }
+
     void setVariantType(const DataTypePtr & variant_type);
     void setMaxDynamicPaths(size_t max_dynamic_type_);
 
diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp
index e00638a50ab..04e76df57fe 100644
--- a/src/DataTypes/DataTypeDynamic.cpp
+++ b/src/DataTypes/DataTypeDynamic.cpp
@@ -73,8 +73,8 @@ static DataTypePtr create(const ASTPtr & arguments)
 
     auto * literal = argument->arguments->children[1]->as<ASTLiteral>();
 
-    if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get<UInt64>() > ColumnVariant::MAX_NESTED_COLUMNS - 1)
-        throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 0 and 254");
+    if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get<UInt64>() > ColumnDynamic::MAX_DYNAMIC_TYPES_LIMIT)
+        throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 0 and {}", ColumnDynamic::MAX_DYNAMIC_TYPES_LIMIT);
 
     return std::make_shared<DataTypeDynamic>(literal->value.get<UInt64>());
 }
diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index 5fadb6e4de4..9cd0adcc2ed 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -288,7 +288,7 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD
                 statistics.variants_statistics[variant->getName()] = variant_size;
             }
 
-            /// Second, rend statistics for shared variants.
+            /// Second, read statistics for shared variants.
             size_t statistics_size;
             readVarUInt(statistics_size, *structure_stream);
             String variant_name;
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 21b98cf505c..660efb46b37 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -4335,7 +4335,7 @@ private:
                         auto type = decodeDataType(buf);
                         auto type_name = type->getName();
                         auto it = shared_variant_to_index.find(type_name);
-                        /// Check if didn't created column for this variant yet.
+                        /// Check if we didn't create column for this variant yet.
                         if (it == shared_variant_to_index.end())
                         {
                             it = shared_variant_to_index.emplace(type_name, variant_columns_from_shared_variant.size()).first;
diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2 b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2
index 9c1f8fa45e8..de12c6b8737 100644
--- a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.reference.j2
@@ -1,3 +1,4 @@
+Memory
 Array(Array(Dynamic))
 Array(Variant(String, UInt64))
 LowCardinality(String)
@@ -818,6 +819,7 @@ str_79	0	[]	[]
 []	[]	[]
 []	[]	[]
 []	[]	[]
+MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000
 Array(Array(Dynamic))
 Array(Variant(String, UInt64))
 LowCardinality(String)
@@ -1638,6 +1640,7 @@ str_79	0	[]	[]
 []	[]	[]
 []	[]	[]
 []	[]	[]
+MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1
 Array(Array(Dynamic))
 Array(Variant(String, UInt64))
 LowCardinality(String)

From b58a22aba7f590ada33bfce95dd525c4c8a414ae Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 12 Aug 2024 11:41:42 +0000
Subject: [PATCH 187/265] Update test

---
 .../03036_dynamic_read_shared_subcolumns_small.sql.j2            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2 b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2
index 0c123d5f6fe..dde4f3f53c3 100644
--- a/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2
+++ b/tests/queries/0_stateless/03036_dynamic_read_shared_subcolumns_small.sql.j2
@@ -6,6 +6,7 @@ drop table if exists test;
 
 {% for engine in ['Memory', 'MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1'] -%}
 
+select '{{ engine }}';
 create table test (id UInt64, d Dynamic(max_types=2)) engine={{ engine }};
 
 insert into test select number, number from numbers(10);

From 3172bf8d76534bb46ce54ae6af96e14443d2b59b Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 12 Aug 2024 12:23:32 +0000
Subject: [PATCH 188/265] better accounting of time for merge of projections

---
 .../Transforms/MergeJoinTransform.cpp         |  2 +-
 src/Storages/MergeTree/MergeTask.cpp          | 24 ++++++++++++++-----
 .../03221_merge_profile_events.reference      |  2 +-
 .../03221_merge_profile_events.sql            |  8 ++++---
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp
index ec7f567ea57..6abfa0fccd0 100644
--- a/src/Processors/Transforms/MergeJoinTransform.cpp
+++ b/src/Processors/Transforms/MergeJoinTransform.cpp
@@ -1282,7 +1282,7 @@ MergeJoinTransform::MergeJoinTransform(
 
 void MergeJoinTransform::onFinish()
 {
-    algorithm.logElapsed(merging_elapsed_ns / 1000000000ULL);
+    algorithm.logElapsed(static_cast<double>(merging_elapsed_ns) / 1000000000ULL);
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 39bac8f7c24..cb1921ede2b 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -461,8 +461,12 @@ void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const Str
 
 MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::getContextForNextStage()
 {
-    ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
-    ProfileEvents::increment(ProfileEvents::MergeHorizontalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    /// Do not increment for projection stage because time is already accounted in main task.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+        ProfileEvents::increment(ProfileEvents::MergeHorizontalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    }
 
     auto new_ctx = std::make_shared<VerticalMergeRuntimeContext>();
 
@@ -481,8 +485,12 @@ MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::g
 
 MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNextStage()
 {
-    ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
-    ProfileEvents::increment(ProfileEvents::MergeVerticalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    /// Do not increment for projection stage because time is already accounted in main task.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+        ProfileEvents::increment(ProfileEvents::MergeVerticalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    }
 
     auto new_ctx = std::make_shared<MergeProjectionsRuntimeContext>();
     new_ctx->need_sync = std::move(ctx->need_sync);
@@ -1026,8 +1034,12 @@ bool MergeTask::execute()
     UInt64 stage_elapsed_ms = current_elapsed_ms - global_ctx->prev_elapsed_ms;
     global_ctx->prev_elapsed_ms = current_elapsed_ms;
 
-    ProfileEvents::increment(current_stage->getTotalTimeProfileEvent(), stage_elapsed_ms);
-    ProfileEvents::increment(ProfileEvents::MergeTotalMilliseconds, stage_elapsed_ms);
+    /// Do not increment for projection stage because time is already accounted in main task.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(current_stage->getTotalTimeProfileEvent(), stage_elapsed_ms);
+        ProfileEvents::increment(ProfileEvents::MergeTotalMilliseconds, stage_elapsed_ms);
+    }
 
     auto next_stage_context = current_stage->getContextForNextStage();
 
diff --git a/tests/queries/0_stateless/03221_merge_profile_events.reference b/tests/queries/0_stateless/03221_merge_profile_events.reference
index 729e53eae79..d969717336b 100644
--- a/tests/queries/0_stateless/03221_merge_profile_events.reference
+++ b/tests/queries/0_stateless/03221_merge_profile_events.reference
@@ -1,3 +1,3 @@
 Horizontal	1	20000	3	0	480000	1	1	1	1
 Vertical	1	20000	1	2	480000	1	1	1	1	1	1
-Vertical	2	20020	4	2	480660	1	1	1	1	1	1	1	1
+Vertical	2	400000	2	6	12800000	1	1	1	1	1	1	1	1	1	1
diff --git a/tests/queries/0_stateless/03221_merge_profile_events.sql b/tests/queries/0_stateless/03221_merge_profile_events.sql
index 787aff93ffc..1aa3dd266f8 100644
--- a/tests/queries/0_stateless/03221_merge_profile_events.sql
+++ b/tests/queries/0_stateless/03221_merge_profile_events.sql
@@ -58,12 +58,12 @@ DROP TABLE IF EXISTS t_merge_profile_events_2;
 
 DROP TABLE IF EXISTS t_merge_profile_events_3;
 
-CREATE TABLE t_merge_profile_events_3 (id UInt64, v1 UInt64, v2 UInt64, PROJECTION p (SELECT sum(v1), sum(v2) GROUP BY id % 10))
+CREATE TABLE t_merge_profile_events_3 (id UInt64, v1 UInt64, v2 UInt64, PROJECTION p (SELECT v2, v2 * v2, v2 * 2, v2 * 10, v1 ORDER BY v1))
 ENGINE = MergeTree ORDER BY id
 SETTINGS min_bytes_for_wide_part = 0, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1;
 
-INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(10000);
-INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(10000);
+INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(100000);
+INSERT INTO t_merge_profile_events_3 SELECT number, number, number FROM numbers(100000);
 
 OPTIMIZE TABLE t_merge_profile_events_3 FINAL;
 SYSTEM FLUSH LOGS;
@@ -83,6 +83,8 @@ SELECT
     ProfileEvents['MergeVerticalStageExecuteMilliseconds'] > 0,
     ProfileEvents['MergeProjectionStageTotalMilliseconds'] > 0,
     ProfileEvents['MergeProjectionStageExecuteMilliseconds'] > 0,
+    ProfileEvents['MergeExecuteMilliseconds'] <= duration_ms,
+    ProfileEvents['MergeTotalMilliseconds'] <= duration_ms
 FROM system.part_log WHERE database = currentDatabase() AND table = 't_merge_profile_events_3' AND event_type = 'MergeParts' AND part_name = 'all_1_2_1';
 
 DROP TABLE IF EXISTS t_merge_profile_events_3;

From f7c6eabb498b47b21c13dbf55efbda551902d09c Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Mon, 12 Aug 2024 13:44:05 +0000
Subject: [PATCH 189/265] Small fix to filter by current_database in
 system.query_log

---
 ...01903_correct_block_size_prediction_with_default.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
index 922dcb957e5..e898a9d5ee8 100755
--- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
+++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
@@ -20,15 +20,15 @@ function test()
     # Follow https://github.com/ClickHouse/ClickHouse/issues/17317 for more info about the issue
     where=$1
 
-    uuid_1=$($CLICKHOUSE_CLIENT --query="SELECT generateUUIDv4()")
+    uuid_1=$(cat /proc/sys/kernel/random/uuid)
     $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1
-    uuid_2=$($CLICKHOUSE_CLIENT --query="SELECT generateUUIDv4()")
+    uuid_2=$(cat /proc/sys/kernel/random/uuid)
     $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2
     $CLICKHOUSE_CLIENT -n --query="
         SYSTEM FLUSH LOGS;
-        WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE event_time > now() - INTERVAL 5 MINUTE AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
-             memory_2 AS (SELECT memory_usage FROM system.query_log WHERE event_time > now() - INTERVAL 5 MINUTE AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
-                SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage FROM memory_1, memory_2;"
+    WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
+         memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
+            SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage FROM memory_1, memory_2;"
 }
 
 test ""

From da5b9582a990f7a2c05c1a3dede3739fb9cbfcae Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Mon, 12 Aug 2024 13:54:17 +0000
Subject: [PATCH 190/265] Fix indent

---
 .../01903_correct_block_size_prediction_with_default.sh     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
index e898a9d5ee8..075d9a1dacf 100755
--- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
+++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
@@ -26,9 +26,9 @@ function test()
     $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2
     $CLICKHOUSE_CLIENT -n --query="
         SYSTEM FLUSH LOGS;
-    WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
-         memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
-            SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage FROM memory_1, memory_2;"
+        WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
+             memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
+                SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage FROM memory_1, memory_2;"
 }
 
 test ""

From eb3ffb71847fc5a204af31665a4594b0918fc1d7 Mon Sep 17 00:00:00 2001
From: divanik <dan.ivanik@clickhouse.com>
Date: Mon, 12 Aug 2024 15:09:16 +0000
Subject: [PATCH 191/265] Add supportsReplication

---
 src/Storages/MergeTree/MergeTreeData.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 625b1281c61..f925cb773f5 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2351,7 +2351,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const String & root_path, siz
                         /// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs
                         /// or not. So we are not doing it
                         bool keep_shared = false;
-                        if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
+                        if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication && supportsReplication())
                         {
                             LOG_WARNING(log, "Since zero-copy replication is enabled we are not going to remove blobs from shared storage for {}", full_path);
                             keep_shared = true;

From a39b9cf643bff565728be4083eb024ff5254f363 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sat, 11 May 2024 13:05:24 +0000
Subject: [PATCH 192/265] Un-screw usearch's build description

No directory 'SimSIMD-map' exists, the build only worked because SimSIMD
support in usearch was (accidentally?) disabled. This commit corrects
the build description. SimSIMD support in usearch will be enabled by a
later commit.
---
 contrib/CMakeLists.txt                         | 2 +-
 contrib/usearch-cmake/CMakeLists.txt           | 8 +++-----
 src/Storages/MergeTree/MergeTreeIndexUSearch.h | 1 -
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index b33e7083e32..98b992e1080 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -207,7 +207,7 @@ if (ARCH_S390X)
 endif()
 add_contrib (annoy-cmake annoy)
 
-option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
+option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
 if (ENABLE_USEARCH)
     add_contrib (FP16-cmake FP16)
     add_contrib (robin-map-cmake robin-map)
diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt
index 29fbe57106c..0b6f60e106b 100644
--- a/contrib/usearch-cmake/CMakeLists.txt
+++ b/contrib/usearch-cmake/CMakeLists.txt
@@ -1,9 +1,7 @@
-set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
-set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
-
 set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
 set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
-set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
+set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
+set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
 
 add_library(_usearch INTERFACE)
 
@@ -11,7 +9,7 @@ target_include_directories(_usearch SYSTEM INTERFACE
     ${FP16_PROJECT_DIR}/include
     ${ROBIN_MAP_PROJECT_DIR}/include
     ${SIMSIMD_PROJECT_DIR}/include
-    ${USEARCH_SOURCE_DIR})
+    ${USEARCH_PROJECT_DIR}/include)
 
 add_library(ch_contrib::usearch ALIAS _usearch)
 target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index 41de94402c9..e6068790d22 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -113,4 +113,3 @@ private:
 
 
 #endif
-

From d7211f9d12d33c54929fb24991fe7e46939be67d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 09:22:38 +0000
Subject: [PATCH 193/265] Fix CMake integration of usearch and annoy

Registers usearch and annoy properly via configure_config.cmake and
config.h.in like all other 3rd party libs, instead of (mis)using
target_compile_definitions.
---
 contrib/annoy-cmake/CMakeLists.txt               | 1 -
 contrib/usearch-cmake/CMakeLists.txt             | 1 -
 src/Common/config.h.in                           | 2 ++
 src/Processors/QueryPlan/ReadFromMergeTree.cpp   | 6 ++++--
 src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp   | 4 ++--
 src/Storages/MergeTree/MergeTreeIndexAnnoy.h     | 4 +++-
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 6 +++---
 src/Storages/MergeTree/MergeTreeIndexUSearch.h   | 8 ++++++--
 src/Storages/MergeTree/MergeTreeIndices.cpp      | 4 ++--
 src/Storages/MergeTree/MergeTreeIndices.h        | 5 +++--
 src/configure_config.cmake                       | 6 ++++++
 11 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/contrib/annoy-cmake/CMakeLists.txt b/contrib/annoy-cmake/CMakeLists.txt
index bdef7d92132..f6579c12412 100644
--- a/contrib/annoy-cmake/CMakeLists.txt
+++ b/contrib/annoy-cmake/CMakeLists.txt
@@ -20,5 +20,4 @@ add_library(_annoy INTERFACE)
 target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
 
 add_library(ch_contrib::annoy ALIAS _annoy)
-target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
 target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt
index 0b6f60e106b..6be622275ae 100644
--- a/contrib/usearch-cmake/CMakeLists.txt
+++ b/contrib/usearch-cmake/CMakeLists.txt
@@ -12,4 +12,3 @@ target_include_directories(_usearch SYSTEM INTERFACE
     ${USEARCH_PROJECT_DIR}/include)
 
 add_library(ch_contrib::usearch ALIAS _usearch)
-target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)
diff --git a/src/Common/config.h.in b/src/Common/config.h.in
index e3f8882850f..0fa5f4313b2 100644
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@@ -58,6 +58,8 @@
 #cmakedefine01 USE_FILELOG
 #cmakedefine01 USE_ODBC
 #cmakedefine01 USE_BLAKE3
+#cmakedefine01 USE_ANNOY
+#cmakedefine01 USE_USEARCH
 #cmakedefine01 USE_SKIM
 #cmakedefine01 USE_PRQL
 #cmakedefine01 USE_ULID
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 901d7c61167..0ec7bde933c 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -52,6 +52,8 @@
 #include <memory>
 #include <unordered_map>
 
+#include "config.h"
+
 using namespace DB;
 
 namespace
@@ -1476,11 +1478,11 @@ static void buildIndexes(
                 MergeTreeIndexConditionPtr condition;
                 if (index_helper->isVectorSearch())
                 {
-#ifdef ENABLE_ANNOY
+#if USE_ANNOY
                     if (const auto * annoy = typeid_cast<const MergeTreeIndexAnnoy *>(index_helper.get()))
                         condition = annoy->createIndexCondition(query_info, context);
 #endif
-#ifdef ENABLE_USEARCH
+#if USE_USEARCH
                     if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
                         condition = usearch->createIndexCondition(query_info, context);
 #endif
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index b68e48eeb3a..cec0e0926f0 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -1,7 +1,7 @@
-#ifdef ENABLE_ANNOY
-
 #include <Storages/MergeTree/MergeTreeIndexAnnoy.h>
 
+#if USE_ANNOY
+
 #include <Columns/ColumnArray.h>
 #include <Common/typeid_cast.h>
 #include <Core/Field.h>
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
index 282920c608e..8e0e0e621a0 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
@@ -1,6 +1,8 @@
 #pragma once
 
-#ifdef ENABLE_ANNOY
+#include "config.h"
+
+#if USE_ANNOY
 
 #include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index efd9bb754e1..5a532803d84 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -1,10 +1,10 @@
-#ifdef ENABLE_USEARCH
+#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
+
+#if USE_USEARCH
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpass-failed"
 
-#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
-
 #include <Columns/ColumnArray.h>
 #include <Common/typeid_cast.h>
 #include <Core/Field.h>
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index e6068790d22..6923ef2f807 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -1,12 +1,16 @@
 #pragma once
 
-#ifdef ENABLE_USEARCH
+#include "config.h"
 
-#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
+#if USE_USEARCH
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpass-failed"
+
+#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
+
 #include <usearch/index_dense.hpp>
+
 #pragma clang diagnostic pop
 
 namespace DB
diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp
index bded961db8e..32ac629e706 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndices.cpp
@@ -127,12 +127,12 @@ MergeTreeIndexFactory::MergeTreeIndexFactory()
     registerCreator("hypothesis", hypothesisIndexCreator);
 
     registerValidator("hypothesis", hypothesisIndexValidator);
-#ifdef ENABLE_ANNOY
+#if USE_ANNOY
     registerCreator("annoy", annoyIndexCreator);
     registerValidator("annoy", annoyIndexValidator);
 #endif
 
-#ifdef ENABLE_USEARCH
+#if USE_USEARCH
     registerCreator("usearch", usearchIndexCreator);
     registerValidator("usearch", usearchIndexValidator);
 #endif
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index 1be73e1c811..355f1b69356 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -15,6 +15,7 @@
 #include <Interpreters/ExpressionActions.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 
+#include "config.h"
 
 constexpr auto INDEX_FILE_PREFIX = "skp_idx_";
 
@@ -230,12 +231,12 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool attach);
 MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index);
 void hypothesisIndexValidator(const IndexDescription & index, bool attach);
 
-#ifdef ENABLE_ANNOY
+#if USE_ANNOY
 MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index);
 void annoyIndexValidator(const IndexDescription & index, bool attach);
 #endif
 
-#ifdef ENABLE_USEARCH
+#if USE_USEARCH
 MergeTreeIndexPtr usearchIndexCreator(const IndexDescription& index);
 void usearchIndexValidator(const IndexDescription& index, bool attach);
 #endif
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index 5b24f79ef6e..702875b1f40 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -164,6 +164,12 @@ endif()
 if (TARGET ch_contrib::bcrypt)
     set(USE_BCRYPT 1)
 endif()
+if (TARGET ch_contrib::annoy)
+    set(USE_ANNOY 1)
+endif()
+if (TARGET ch_contrib::usearch)
+    set(USE_USEARCH 1)
+endif()
 if (TARGET ch_contrib::ssh)
     set(USE_SSH 1)
 endif()

From 7c419399216a714f9dcffe7835f951718851bceb Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 09:36:39 +0000
Subject: [PATCH 194/265] Fix test results (no analyzer support yet ...)

---
 tests/queries/0_stateless/02354_vector_search_queries.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
index 64051aa8544..87d27be0ea4 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -8,6 +8,8 @@
 SET allow_experimental_annoy_index = 1;
 SET allow_experimental_usearch_index = 1;
 
+SET enable_analyzer = 0;
+
 SELECT 'ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block';
 
 DROP TABLE IF EXISTS tab_annoy;

From 218421c255cadbe65406e6a040d05942cc4efc3e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 09:47:50 +0000
Subject: [PATCH 195/265] Remove Annoy indexes

Annoy indexes fell out of favor in the community, at least when it comes
to vector databases. Such indexes work okay-ish low dimensions but they
suffers badly from a curse of dimensionality which makes them inapt for
a high number of dimensions.

Now that Annoy is gone, issue (*) also disappears and we can drop
'no-ubsan', 'no-cpu-aarch64', and 'no-asan' from tests.

(*) spotify/annoy#456
---
 .gitmodules                                   |   3 -
 contrib/CMakeLists.txt                        |   1 -
 contrib/annoy                                 |   1 -
 contrib/annoy-cmake/CMakeLists.txt            |  23 -
 .../mergetree-family/annindexes.md            |  87 +---
 src/CMakeLists.txt                            |   4 -
 src/Common/config.h.in                        |   1 -
 src/Core/Settings.h                           |   6 +-
 src/Databases/DatabaseReplicated.cpp          |   1 -
 src/Interpreters/InterpreterCreateQuery.cpp   |   2 -
 src/Parsers/ASTIndexDeclaration.h             |   1 -
 src/Parsers/ParserCreateIndexQuery.cpp        |   4 +-
 src/Parsers/ParserCreateQuery.cpp             |   4 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |   5 -
 .../MergeTree/MergeTreeIOSettings.cpp         |   1 -
 src/Storages/MergeTree/MergeTreeIOSettings.h  |   2 -
 .../MergeTree/MergeTreeIndexAnnoy.cpp         | 416 ------------------
 src/Storages/MergeTree/MergeTreeIndexAnnoy.h  | 114 -----
 src/Storages/MergeTree/MergeTreeIndices.cpp   |   4 -
 src/Storages/MergeTree/MergeTreeIndices.h     |   5 -
 src/configure_config.cmake                    |   3 -
 .../02354_vector_search_bugs.reference        |  10 -
 .../0_stateless/02354_vector_search_bugs.sql  |  75 +---
 ...ector_search_default_granularity.reference |   6 +-
 ...2354_vector_search_default_granularity.sql |  21 +-
 ...r_search_index_creation_negative.reference |   2 +-
 ..._vector_search_index_creation_negative.sql |  22 +-
 .../02354_vector_search_queries.reference     |  99 -----
 .../02354_vector_search_queries.sql           | 119 +----
 29 files changed, 32 insertions(+), 1010 deletions(-)
 delete mode 160000 contrib/annoy
 delete mode 100644 contrib/annoy-cmake/CMakeLists.txt
 delete mode 100644 src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
 delete mode 100644 src/Storages/MergeTree/MergeTreeIndexAnnoy.h

diff --git a/.gitmodules b/.gitmodules
index 7fdfb1103c5..0a66031de8d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -230,9 +230,6 @@
 [submodule "contrib/minizip-ng"]
 	path = contrib/minizip-ng
 	url = https://github.com/zlib-ng/minizip-ng
-[submodule "contrib/annoy"]
-	path = contrib/annoy
-	url = https://github.com/ClickHouse/annoy
 [submodule "contrib/qpl"]
 	path = contrib/qpl
 	url = https://github.com/intel/qpl
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 98b992e1080..dc2ad2a3150 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -205,7 +205,6 @@ add_contrib (morton-nd-cmake morton-nd)
 if (ARCH_S390X)
     add_contrib(crc32-s390x-cmake crc32-s390x)
 endif()
-add_contrib (annoy-cmake annoy)
 
 option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
 if (ENABLE_USEARCH)
diff --git a/contrib/annoy b/contrib/annoy
deleted file mode 160000
index f2ac8e7b48f..00000000000
--- a/contrib/annoy
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956
diff --git a/contrib/annoy-cmake/CMakeLists.txt b/contrib/annoy-cmake/CMakeLists.txt
deleted file mode 100644
index f6579c12412..00000000000
--- a/contrib/annoy-cmake/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
-
-# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations
-# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463)
-# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo
-# https://github.com/spotify/annoy/issues/456
-# Problem with aligment can lead to errors like
-# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment)
-# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest
-# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index")
-if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64))
-    message (STATUS "Not using annoy")
-    return()
-endif()
-
-set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
-set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
-
-add_library(_annoy INTERFACE)
-target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
-
-add_library(ch_contrib::annoy ALIAS _annoy)
-target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index 5a81313f62e..9a80542522e 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -126,81 +126,8 @@ was specified for ANN indexes, the default value is 100 million.
 
 # Available ANN Indexes {#available_ann_indexes}
 
-- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
-
 - [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
 
-## Annoy {#annoy}
-
-Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
-disabled on ARM due to memory safety problems with the algorithm.
-
-This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
-linear surfaces (lines in 2D, planes in 3D etc.).
-
-<div class='vimeo-container'>
-  <iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
-    width="640"
-    height="360"
-    frameborder="0"
-    allow="autoplay;
-    fullscreen;
-    picture-in-picture"
-    allowfullscreen>
-  </iframe>
-</div>
-
-Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
-
-```sql
-CREATE TABLE table_with_annoy_index
-(
-  id Int64,
-  vectors Array(Float32),
-  INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
-)
-ENGINE = MergeTree
-ORDER BY id;
-```
-
-Annoy currently supports two distance functions:
-- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
-  ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
-- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
-  ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
-
-For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
-distance function was specified during index creation, `L2Distance` is used as default.
-
-Parameter `NumTrees` is the number of trees which the algorithm creates (default if not specified: 100). Higher values of `NumTree` mean
-more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes.
-
-:::note
-All arrays must have same length. To avoid errors, you can use a
-[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
-length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
-:::
-
-The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase
-setting `max_threads_for_annoy_index_creation` (default: 4) which controls how many threads are used to create an Annoy index. Please be
-careful with this setting, it is possible that multiple indexes are created in parallel in which case there can be overparallelization.
-
-Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
-values mean more accurate results at the cost of longer query runtime:
-
-```sql
-SELECT *
-FROM table_name
-ORDER BY L2Distance(vectors, Point)
-LIMIT N
-SETTINGS annoy_index_search_k_nodes=100;
-```
-
-:::note
-The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
-[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
-:::
-
 ## USearch {#usearch}
 
 This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
@@ -211,6 +138,8 @@ that are expensive to load and compare. The library also has several hardware-sp
 distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
 navigation around immutable persistent files, without loading them into RAM.
 
+USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_usearch_index = 1`.
+
 <div class='vimeo-container'>
   <iframe src="//www.youtube.com/embed/UMrhB3icP9w"
     width="640"
@@ -247,3 +176,15 @@ was specified during index creation, `f16` is used as default.
 
 For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
 distance function was specified during index creation, `L2Distance` is used as default.
+
+:::note
+All arrays must have same length. To avoid errors, you can use a
+[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
+length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
+:::
+
+:::note
+The USearch index currently does not work with per-table, non-default `index_granularity` settings (see
+[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
+:::
+
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 43092d10dd2..4f68c9bd220 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -601,10 +601,6 @@ endif()
 
 dbms_target_link_libraries(PUBLIC ch_contrib::consistent_hashing)
 
-if (TARGET ch_contrib::annoy)
-    dbms_target_link_libraries(PUBLIC ch_contrib::annoy)
-endif()
-
 if (TARGET ch_contrib::usearch)
     dbms_target_link_libraries(PUBLIC ch_contrib::usearch)
 endif()
diff --git a/src/Common/config.h.in b/src/Common/config.h.in
index 0fa5f4313b2..2e3b8d84366 100644
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@@ -58,7 +58,6 @@
 #cmakedefine01 USE_FILELOG
 #cmakedefine01 USE_ODBC
 #cmakedefine01 USE_BLAKE3
-#cmakedefine01 USE_ANNOY
 #cmakedefine01 USE_USEARCH
 #cmakedefine01 USE_SKIM
 #cmakedefine01 USE_PRQL
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 6f24db57026..23ad12bb017 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -909,12 +909,9 @@ class IColumn;
     M(Bool, allow_experimental_time_series_table, false, "Allows experimental TimeSeries table engine", 0) \
     M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
     M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
-    M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
     M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
     M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
     M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
-    M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \
-    M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
     M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
     M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
     M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
@@ -1036,6 +1033,9 @@ class IColumn;
     MAKE_OBSOLETE(M, UInt64, parallel_replicas_min_number_of_granules_to_enable, 0) \
     MAKE_OBSOLETE(M, Bool, query_plan_optimize_projection, true) \
     MAKE_OBSOLETE(M, Bool, query_cache_store_results_of_queries_with_nondeterministic_functions, false) \
+    MAKE_OBSOLETE(M, Bool, allow_experimental_annoy_index, false) \
+    MAKE_OBSOLETE(M, UInt64, max_threads_for_annoy_index_creation, 4) \
+    MAKE_OBSOLETE(M, Int64, annoy_index_search_k_nodes, -1) \
     MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index f4aa925d6dd..207b317fbe6 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1153,7 +1153,6 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         query_context->setSetting("allow_experimental_object_type", 1);
         query_context->setSetting("allow_experimental_variant_type", 1);
         query_context->setSetting("allow_experimental_dynamic_type", 1);
-        query_context->setSetting("allow_experimental_annoy_index", 1);
         query_context->setSetting("allow_experimental_usearch_index", 1);
         query_context->setSetting("allow_experimental_bigint_types", 1);
         query_context->setSetting("allow_experimental_window_functions", 1);
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 8bcb7f18a0f..a1ffcf07588 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -787,8 +787,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
                 if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index)
                     throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'");
                 /// ----
-                if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index)
-                    throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index");
                 if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index)
                     throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index");
 
diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h
index dd05ad08184..90645f12b7c 100644
--- a/src/Parsers/ASTIndexDeclaration.h
+++ b/src/Parsers/ASTIndexDeclaration.h
@@ -13,7 +13,6 @@ class ASTIndexDeclaration : public IAST
 {
 public:
     static const auto DEFAULT_INDEX_GRANULARITY = 1uz;
-    static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz;
     static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz;
 
     ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_);
diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp
index 9ebee4cc852..e7cfd753f99 100644
--- a/src/Parsers/ParserCreateIndexQuery.cpp
+++ b/src/Parsers/ParserCreateIndexQuery.cpp
@@ -89,9 +89,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
     else
     {
         auto index_type = index->getType();
-        if (index_type && index_type->name == "annoy")
-            index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
-        else if (index_type && index_type->name == "usearch")
+        if (index_type && index_type->name == "usearch")
             index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
         else
             index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 318fe9da1b4..b31fe21c4cc 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -214,9 +214,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
     else
     {
         auto index_type = index->getType();
-        if (index_type->name == "annoy")
-            index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
-        else if (index_type->name == "usearch")
+        if (index_type->name == "usearch")
             index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
         else
             index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 0ec7bde933c..5b491de2dfc 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -24,7 +24,6 @@
 #include <Processors/Transforms/SelectByIndicesTransform.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
-#include <Storages/MergeTree/MergeTreeIndexAnnoy.h>
 #include <Storages/MergeTree/MergeTreeIndexUSearch.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
@@ -1478,10 +1477,6 @@ static void buildIndexes(
                 MergeTreeIndexConditionPtr condition;
                 if (index_helper->isVectorSearch())
                 {
-#if USE_ANNOY
-                    if (const auto * annoy = typeid_cast<const MergeTreeIndexAnnoy *>(index_helper.get()))
-                        condition = annoy->createIndexCondition(query_info, context);
-#endif
 #if USE_USEARCH
                     if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
                         condition = usearch->createIndexCondition(query_info, context);
diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.cpp b/src/Storages/MergeTree/MergeTreeIOSettings.cpp
index 58c3bd28d6a..24cb25afe47 100644
--- a/src/Storages/MergeTree/MergeTreeIOSettings.cpp
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.cpp
@@ -27,7 +27,6 @@ MergeTreeWriterSettings::MergeTreeWriterSettings(
     , rewrite_primary_key(rewrite_primary_key_)
     , blocks_are_granules_size(blocks_are_granules_size_)
     , query_write_settings(query_write_settings_)
-    , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation)
     , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size)
     , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0)
     , use_compact_variant_discriminators_serialization(storage_settings->use_compact_variant_discriminators_serialization)
diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h
index c79ca1e66ee..47b174b2e29 100644
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@@ -77,8 +77,6 @@ struct MergeTreeWriterSettings
     bool blocks_are_granules_size;
     WriteSettings query_write_settings;
 
-    size_t max_threads_for_annoy_index_creation;
-
     size_t low_cardinality_max_dictionary_size;
     bool low_cardinality_use_single_dictionary_for_part;
     bool use_compact_variant_discriminators_serialization;
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
deleted file mode 100644
index cec0e0926f0..00000000000
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ /dev/null
@@ -1,416 +0,0 @@
-#include <Storages/MergeTree/MergeTreeIndexAnnoy.h>
-
-#if USE_ANNOY
-
-#include <Columns/ColumnArray.h>
-#include <Common/typeid_cast.h>
-#include <Core/Field.h>
-#include <Core/Settings.h>
-#include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteHelpers.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/castColumn.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_COLUMN;
-    extern const int INCORRECT_DATA;
-    extern const int INCORRECT_NUMBER_OF_COLUMNS;
-    extern const int INCORRECT_QUERY;
-    extern const int LOGICAL_ERROR;
-    extern const int NOT_IMPLEMENTED;
-}
-
-template <typename Distance>
-AnnoyIndexWithSerialization<Distance>::AnnoyIndexWithSerialization(size_t dimensions)
-    : Base::AnnoyIndex(static_cast<int>(dimensions))
-{
-}
-
-template<typename Distance>
-void AnnoyIndexWithSerialization<Distance>::serialize(WriteBuffer & ostr) const
-{
-    chassert(Base::_built);
-    writeIntBinary(Base::_s, ostr);
-    writeIntBinary(Base::_n_items, ostr);
-    writeIntBinary(Base::_n_nodes, ostr);
-    writeIntBinary(Base::_nodes_size, ostr);
-    writeIntBinary(Base::_K, ostr);
-    writeIntBinary(Base::_seed, ostr);
-    writeVectorBinary(Base::_roots, ostr);
-    ostr.write(reinterpret_cast<const char *>(Base::_nodes), Base::_s * Base::_n_nodes);
-}
-
-template<typename Distance>
-void AnnoyIndexWithSerialization<Distance>::deserialize(ReadBuffer & istr)
-{
-    chassert(!Base::_built);
-    readIntBinary(Base::_s, istr);
-    readIntBinary(Base::_n_items, istr);
-    readIntBinary(Base::_n_nodes, istr);
-    readIntBinary(Base::_nodes_size, istr);
-    readIntBinary(Base::_K, istr);
-    readIntBinary(Base::_seed, istr);
-    readVectorBinary(Base::_roots, istr);
-    Base::_nodes = realloc(Base::_nodes, Base::_s * Base::_n_nodes);
-    istr.readStrict(reinterpret_cast<char *>(Base::_nodes), Base::_s * Base::_n_nodes);
-
-    Base::_fd = 0;
-    // set flags
-    Base::_loaded = false;
-    Base::_verbose = false;
-    Base::_on_disk = false;
-    Base::_built = true;
-}
-
-template<typename Distance>
-size_t AnnoyIndexWithSerialization<Distance>::getDimensions() const
-{
-    return Base::get_f();
-}
-
-
-template <typename Distance>
-MergeTreeIndexGranuleAnnoy<Distance>::MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_)
-    : index_name(index_name_)
-    , index_sample_block(index_sample_block_)
-    , index(nullptr)
-{}
-
-template <typename Distance>
-MergeTreeIndexGranuleAnnoy<Distance>::MergeTreeIndexGranuleAnnoy(
-    const String & index_name_,
-    const Block & index_sample_block_,
-    AnnoyIndexWithSerializationPtr<Distance> index_)
-    : index_name(index_name_)
-    , index_sample_block(index_sample_block_)
-    , index(std::move(index_))
-{}
-
-template <typename Distance>
-void MergeTreeIndexGranuleAnnoy<Distance>::serializeBinary(WriteBuffer & ostr) const
-{
-    /// Number of dimensions is required in the index constructor,
-    /// so it must be written and read separately from the other part
-    writeIntBinary(static_cast<UInt64>(index->getDimensions()), ostr); // write dimension
-    index->serialize(ostr);
-}
-
-template <typename Distance>
-void MergeTreeIndexGranuleAnnoy<Distance>::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
-{
-    UInt64 dimension;
-    readIntBinary(dimension, istr);
-    index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(dimension);
-    index->deserialize(istr);
-}
-
-template <typename Distance>
-MergeTreeIndexAggregatorAnnoy<Distance>::MergeTreeIndexAggregatorAnnoy(
-    const String & index_name_,
-    const Block & index_sample_block_,
-    UInt64 trees_,
-    size_t max_threads_for_creation_)
-    : index_name(index_name_)
-    , index_sample_block(index_sample_block_)
-    , trees(trees_)
-    , max_threads_for_creation(max_threads_for_creation_)
-{}
-
-template <typename Distance>
-MergeTreeIndexGranulePtr MergeTreeIndexAggregatorAnnoy<Distance>::getGranuleAndReset()
-{
-    int threads = (max_threads_for_creation == 0) ? -1 : static_cast<int>(max_threads_for_creation);
-    /// clang-tidy reports a false positive: it considers %p with an outdated pointer in fprintf() (used by logging which we don't do) dereferencing
-    index->build(static_cast<int>(trees), threads);
-    auto granule = std::make_shared<MergeTreeIndexGranuleAnnoy<Distance>>(index_name, index_sample_block, index);
-    index = nullptr;
-    return granule;
-}
-
-template <typename Distance>
-void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t * pos, size_t limit)
-{
-    if (*pos >= block.rows())
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "The provided position is not less than the number of block rows. Position: {}, Block rows: {}.",
-            *pos, block.rows());
-
-    size_t rows_read = std::min(limit, block.rows() - *pos);
-
-    if (rows_read == 0)
-        return;
-
-    if (rows_read > std::numeric_limits<uint32_t>::max())
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Index granularity is too big: more than 4B rows per index granule.");
-
-    if (index_sample_block.columns() > 1)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column");
-
-    const String & index_column_name = index_sample_block.getByPosition(0).name;
-    ColumnPtr column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read);
-
-    if (const auto & column_array = typeid_cast<const ColumnArray *>(column_cut.get()))
-    {
-        const auto & column_array_data = column_array->getData();
-        const auto & column_array_data_float = typeid_cast<const ColumnFloat32 &>(column_array_data);
-        const auto & column_array_data_float_data = column_array_data_float.getData();
-
-        const auto & column_array_offsets = column_array->getOffsets();
-        const size_t num_rows = column_array_offsets.size();
-
-        if (column_array->empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty");
-
-        /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
-        /// are INSERTed into an Annoy-indexed column or if no value was specified at all in which case the arrays take on their default
-        /// value which is also empty.
-        if (column_array->isDefaultAt(0))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
-
-        /// Check all sizes are the same
-        size_t dimension = column_array_offsets[0];
-        for (size_t i = 0; i < num_rows - 1; ++i)
-            if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
-
-        /// Also check that previously inserted blocks have the same size as this block.
-        /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across
-        /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42.
-        if (index && index->getDimensions() != dimension)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
-
-        if (!index)
-            index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(dimension);
-
-        /// Add all rows of block
-        index->add_item(index->get_n_items(), column_array_data_float_data.data());
-        for (size_t current_row = 1; current_row < num_rows; ++current_row)
-            index->add_item(index->get_n_items(), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
-    }
-    else if (const auto & column_tuple = typeid_cast<const ColumnTuple *>(column_cut.get()))
-    {
-        const auto & column_tuple_columns = column_tuple->getColumns();
-
-        /// TODO check if calling index->add_item() directly on the block's tuples is faster than materializing everything
-        std::vector<std::vector<Float32>> data(column_tuple->size(), std::vector<Float32>());
-        for (const auto & column : column_tuple_columns)
-        {
-            const auto & pod_array = typeid_cast<const ColumnFloat32 *>(column.get())->getData();
-            for (size_t i = 0; i < pod_array.size(); ++i)
-                data[i].push_back(pod_array[i]);
-        }
-
-        if (data.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read);
-
-        if (!index)
-            index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(data[0].size());
-
-        for (const auto & item : data)
-            index->add_item(index->get_n_items(), item.data());
-    }
-    else
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array or Tuple column");
-
-    *pos += rows_read;
-}
-
-
-MergeTreeIndexConditionAnnoy::MergeTreeIndexConditionAnnoy(
-    const IndexDescription & /*index_description*/,
-    const SelectQueryInfo & query,
-    const String & distance_function_,
-    ContextPtr context)
-    : ann_condition(query, context)
-    , distance_function(distance_function_)
-    , search_k(context->getSettingsRef().annoy_index_search_k_nodes)
-{}
-
-bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const
-{
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes");
-}
-
-bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const
-{
-    return ann_condition.alwaysUnknownOrTrue(distance_function);
-}
-
-std::vector<size_t> MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const
-{
-    if (distance_function == DISTANCE_FUNCTION_L2)
-        return getUsefulRangesImpl<Annoy::Euclidean>(idx_granule);
-    else if (distance_function == DISTANCE_FUNCTION_COSINE)
-        return getUsefulRangesImpl<Annoy::Angular>(idx_granule);
-    std::unreachable();
-}
-
-template <typename Distance>
-std::vector<size_t> MergeTreeIndexConditionAnnoy::getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const
-{
-    const UInt64 limit = ann_condition.getLimit();
-    const UInt64 index_granularity = ann_condition.getIndexGranularity();
-    const std::optional<float> comparison_distance = ann_condition.getQueryType() == ApproximateNearestNeighborInformation::Type::Where
-        ? std::optional<float>(ann_condition.getComparisonDistanceForWhereQuery())
-        : std::nullopt;
-
-    if (comparison_distance && comparison_distance.value() < 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to optimize query with where without distance");
-
-    const std::vector<float> reference_vector = ann_condition.getReferenceVector();
-
-    const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleAnnoy<Distance>>(idx_granule);
-    if (granule == nullptr)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type");
-
-    const AnnoyIndexWithSerializationPtr<Distance> annoy = granule->index;
-
-    if (ann_condition.getDimensions() != annoy->getDimensions())
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) "
-                        "does not match the dimension in the index ({})",
-                        ann_condition.getDimensions(), annoy->getDimensions());
-
-    std::vector<UInt64> neighbors; /// indexes of dots which were closest to the reference vector
-    std::vector<Float32> distances;
-    neighbors.reserve(limit);
-    distances.reserve(limit);
-
-    annoy->get_nns_by_vector(reference_vector.data(), limit, static_cast<int>(search_k), &neighbors, &distances);
-
-    chassert(neighbors.size() == distances.size());
-
-    std::vector<size_t> granules;
-    granules.reserve(neighbors.size());
-    for (size_t i = 0; i < neighbors.size(); ++i)
-    {
-        if (comparison_distance && distances[i] > comparison_distance)
-            continue;
-        granules.push_back(neighbors[i] / index_granularity);
-    }
-
-    /// make unique
-    std::sort(granules.begin(), granules.end());
-    granules.erase(std::unique(granules.begin(), granules.end()), granules.end());
-
-    return granules;
-}
-
-MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64 trees_, const String & distance_function_)
-    : IMergeTreeIndex(index_)
-    , trees(trees_)
-    , distance_function(distance_function_)
-{}
-
-MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const
-{
-    if (distance_function == DISTANCE_FUNCTION_L2)
-        return std::make_shared<MergeTreeIndexGranuleAnnoy<Annoy::Euclidean>>(index.name, index.sample_block);
-    else if (distance_function == DISTANCE_FUNCTION_COSINE)
-        return std::make_shared<MergeTreeIndexGranuleAnnoy<Annoy::Angular>>(index.name, index.sample_block);
-    std::unreachable();
-}
-
-MergeTreeIndexAggregatorPtr MergeTreeIndexAnnoy::createIndexAggregator(const MergeTreeWriterSettings & settings) const
-{
-    /// TODO: Support more metrics. Available metrics: https://github.com/spotify/annoy/blob/master/src/annoymodule.cc#L151-L171
-    if (distance_function == DISTANCE_FUNCTION_L2)
-        return std::make_shared<MergeTreeIndexAggregatorAnnoy<Annoy::Euclidean>>(index.name, index.sample_block, trees, settings.max_threads_for_annoy_index_creation);
-    else if (distance_function == DISTANCE_FUNCTION_COSINE)
-        return std::make_shared<MergeTreeIndexAggregatorAnnoy<Annoy::Angular>>(index.name, index.sample_block, trees, settings.max_threads_for_annoy_index_creation);
-    std::unreachable();
-}
-
-MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const
-{
-    return std::make_shared<MergeTreeIndexConditionAnnoy>(index, query, distance_function, context);
-};
-
-MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const ActionsDAG *, ContextPtr) const
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG");
-}
-
-MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index)
-{
-    static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2;
-    String distance_function = DEFAULT_DISTANCE_FUNCTION;
-    if (!index.arguments.empty())
-        distance_function = index.arguments[0].safeGet<String>();
-
-    static constexpr auto DEFAULT_TREES = 100uz;
-    UInt64 trees = DEFAULT_TREES;
-    if (index.arguments.size() > 1)
-        trees = index.arguments[1].safeGet<UInt64>();
-
-    return std::make_shared<MergeTreeIndexAnnoy>(index, trees, distance_function);
-}
-
-void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
-{
-    /// Check number and type of Annoy index arguments:
-
-    if (index.arguments.size() > 2)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index must not have more than two parameters");
-
-    if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance function argument of Annoy index must be of type String");
-
-    if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::UInt64)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Number of trees argument of Annoy index must be of type UInt64");
-
-    /// Check that the index is created on a single column
-
-    if (index.column_names.size() != 1 || index.data_types.size() != 1)
-        throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Annoy indexes must be created on a single column");
-
-    /// Check that a supported metric was passed as first argument
-
-    if (!index.arguments.empty())
-    {
-        String distance_name = index.arguments[0].safeGet<String>();
-        if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE);
-    }
-
-    /// Check data type of indexed column:
-
-    auto throw_unsupported_underlying_column_exception = []()
-    {
-        throw Exception(
-            ErrorCodes::ILLEGAL_COLUMN,
-            "Annoy indexes can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])");
-    };
-
-    DataTypePtr data_type = index.sample_block.getDataTypes()[0];
-
-    if (const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get()))
-    {
-        TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();
-        if (!WhichDataType(nested_type_index).isFloat32())
-            throw_unsupported_underlying_column_exception();
-    }
-    else if (const auto * data_type_tuple = typeid_cast<const DataTypeTuple *>(data_type.get()))
-    {
-        const DataTypes & inner_types = data_type_tuple->getElements();
-        for (const auto & inner_type : inner_types)
-        {
-            TypeIndex nested_type_index = inner_type->getTypeId();
-            if (!WhichDataType(nested_type_index).isFloat32())
-                throw_unsupported_underlying_column_exception();
-        }
-    }
-    else
-        throw_unsupported_underlying_column_exception();
-}
-
-}
-
-#endif
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
deleted file mode 100644
index 8e0e0e621a0..00000000000
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_ANNOY
-
-#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
-
-#include <annoylib.h>
-#include <kissrandom.h>
-
-namespace DB
-{
-
-template <typename Distance>
-class AnnoyIndexWithSerialization : public Annoy::AnnoyIndex<UInt64, Float32, Distance, Annoy::Kiss64Random, Annoy::AnnoyIndexMultiThreadedBuildPolicy>
-{
-    using Base = Annoy::AnnoyIndex<UInt64, Float32, Distance, Annoy::Kiss64Random, Annoy::AnnoyIndexMultiThreadedBuildPolicy>;
-
-public:
-    explicit AnnoyIndexWithSerialization(size_t dimensions);
-    void serialize(WriteBuffer & ostr) const;
-    void deserialize(ReadBuffer & istr);
-    size_t getDimensions() const;
-};
-
-template <typename Distance>
-using AnnoyIndexWithSerializationPtr = std::shared_ptr<AnnoyIndexWithSerialization<Distance>>;
-
-
-template <typename Distance>
-struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule
-{
-    MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_);
-    MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_, AnnoyIndexWithSerializationPtr<Distance> index_);
-
-    ~MergeTreeIndexGranuleAnnoy() override = default;
-
-    void serializeBinary(WriteBuffer & ostr) const override;
-    void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
-
-    bool empty() const override { return !index.get(); }
-
-    const String index_name;
-    const Block index_sample_block;
-    AnnoyIndexWithSerializationPtr<Distance> index;
-};
-
-
-template <typename Distance>
-struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator
-{
-    MergeTreeIndexAggregatorAnnoy(const String & index_name_, const Block & index_sample_block, UInt64 trees, size_t max_threads_for_creation);
-    ~MergeTreeIndexAggregatorAnnoy() override = default;
-
-    bool empty() const override { return !index || index->get_n_items() == 0; }
-    MergeTreeIndexGranulePtr getGranuleAndReset() override;
-    void update(const Block & block, size_t * pos, size_t limit) override;
-
-    const String index_name;
-    const Block index_sample_block;
-    const UInt64 trees;
-    const size_t max_threads_for_creation;
-    AnnoyIndexWithSerializationPtr<Distance> index;
-};
-
-
-class MergeTreeIndexConditionAnnoy final : public IMergeTreeIndexConditionApproximateNearestNeighbor
-{
-public:
-    MergeTreeIndexConditionAnnoy(
-        const IndexDescription & index_description,
-        const SelectQueryInfo & query,
-        const String & distance_function,
-        ContextPtr context);
-
-    ~MergeTreeIndexConditionAnnoy() override = default;
-
-    bool alwaysUnknownOrTrue() const override;
-    bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
-    std::vector<size_t> getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const override;
-
-private:
-    template <typename Distance>
-    std::vector<size_t> getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const;
-
-    const ApproximateNearestNeighborCondition ann_condition;
-    const String distance_function;
-    const Int64 search_k;
-};
-
-
-class MergeTreeIndexAnnoy final : public IMergeTreeIndex
-{
-public:
-
-    MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64 trees_, const String & distance_function_);
-
-    ~MergeTreeIndexAnnoy() override = default;
-
-    MergeTreeIndexGranulePtr createIndexGranule() const override;
-    MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
-    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const;
-    MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override;
-    bool isVectorSearch() const override { return true; }
-
-private:
-    const UInt64 trees;
-    const String distance_function;
-};
-
-}
-
-#endif
diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp
index 32ac629e706..f07449f762c 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndices.cpp
@@ -127,10 +127,6 @@ MergeTreeIndexFactory::MergeTreeIndexFactory()
     registerCreator("hypothesis", hypothesisIndexCreator);
 
     registerValidator("hypothesis", hypothesisIndexValidator);
-#if USE_ANNOY
-    registerCreator("annoy", annoyIndexCreator);
-    registerValidator("annoy", annoyIndexValidator);
-#endif
 
 #if USE_USEARCH
     registerCreator("usearch", usearchIndexCreator);
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index 355f1b69356..a67c602e32f 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -231,11 +231,6 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool attach);
 MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index);
 void hypothesisIndexValidator(const IndexDescription & index, bool attach);
 
-#if USE_ANNOY
-MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index);
-void annoyIndexValidator(const IndexDescription & index, bool attach);
-#endif
-
 #if USE_USEARCH
 MergeTreeIndexPtr usearchIndexCreator(const IndexDescription& index);
 void usearchIndexValidator(const IndexDescription& index, bool attach);
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index 702875b1f40..5a1aa179e01 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -164,9 +164,6 @@ endif()
 if (TARGET ch_contrib::bcrypt)
     set(USE_BCRYPT 1)
 endif()
-if (TARGET ch_contrib::annoy)
-    set(USE_ANNOY 1)
-endif()
 if (TARGET ch_contrib::usearch)
     set(USE_USEARCH 1)
 endif()
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.reference b/tests/queries/0_stateless/02354_vector_search_bugs.reference
index d2c2d7e2fb7..da8ff0a25c5 100644
--- a/tests/queries/0_stateless/02354_vector_search_bugs.reference
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.reference
@@ -1,16 +1,6 @@
 Issue #52258: Empty Arrays or Arrays with default values are rejected
-- Annoy
-- Usearch
 It is possible to create parts with different Array vector sizes but there will be an error at query time
-- Annoy
-- Usearch
 Correctness of index with > 1 mark
-- Annoy
-1	[1,0]	0
-9000	[9000,0]	0
-1	(1,0)	0
-9000	(9000,0)	0
-- Usearch
 1	[1,0]	0
 9000	[9000,0]	0
 1	(1,0)	0
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql
index f03c36f6550..f3a5f601e8d 100644
--- a/tests/queries/0_stateless/02354_vector_search_bugs.sql
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql
@@ -1,11 +1,9 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+-- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
--- therefore they are tested in a single file.
+-- Tests vector search in ClickHouse, i.e. Usearch indexes.
 
 -- This file contains tests for various bugs and special cases
 
-SET allow_experimental_annoy_index = 1;
 SET allow_experimental_usearch_index = 1;
 
 SET enable_analyzer = 1; -- 0 vs. 1 produce slightly different error codes, make it future-proof
@@ -14,19 +12,6 @@ DROP TABLE IF EXISTS tab;
 
 SELECT 'Issue #52258: Empty Arrays or Arrays with default values are rejected';
 
-SELECT '- Annoy';
-
-CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree() ORDER BY (id);
-INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
-INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
-DROP TABLE tab;
-
-CREATE TABLE tab (id UInt64, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree() ORDER BY (id);
-INSERT INTO tab (id) VALUES (1); -- works fine, takes on default tuple (0.0, 0.0)
-DROP TABLE tab;
-
-SELECT '- Usearch';
-
 CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY (id);
 INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
 INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
@@ -38,23 +23,6 @@ DROP TABLE tab;
 
 SELECT 'It is possible to create parts with different Array vector sizes but there will be an error at query time';
 
-SELECT '- Annoy';
-
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id;
-SYSTEM STOP MERGES tab;
-INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]);
-INSERT INTO tab values (2, [2.2, 2.3, 2.4]) (3, [3.1, 3.2, 3.3]);
-
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 3; -- { serverError SIZES_OF_ARRAYS_DONT_MATCH }
-
-DROP TABLE tab;
-
-SELECT '- Usearch';
-
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
 SYSTEM STOP MERGES tab;
 INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]);
@@ -70,45 +38,6 @@ DROP TABLE tab;
 
 SELECT 'Correctness of index with > 1 mark';
 
-SELECT '- Annoy';
-
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
-INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000);
-
-WITH [1.0, 0.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 1;
-
-WITH [9000.0, 0.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 1;
-
-DROP TABLE tab;
-
--- same, but with Tuples
-CREATE TABLE tab(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
-INSERT INTO tab SELECT number, (toFloat32(number), 0.0) from numbers(10000);
-
-WITH (1.0, 0.0) AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 1;
-
-WITH (9000.0, 0.0) AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 1;
-
-DROP TABLE tab;
-
-SELECT '- Usearch';
-
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
 INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000);
 
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.reference b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
index 2f97ed72c52..f33a840e97e 100644
--- a/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
@@ -1,7 +1,3 @@
-Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for Annoy and USearch
-- Annoy
-100000000
-100000000
-- Usearch
+Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for USearch
 100000000
 100000000
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
index f15554505f8..d3b6b275b62 100644
--- a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
@@ -1,29 +1,14 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+-- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
--- therefore they are tested in a single file.
+-- Tests vector search in ClickHouse, i.e. Usearch indexes.
 
 -- This file contains tests for the non-standard default granularity of vector search indexes.
 
-SET allow_experimental_annoy_index = 1;
 SET allow_experimental_usearch_index = 1;
 
-SELECT 'Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for Annoy and USearch';
-
-SELECT '- Annoy';
+SELECT 'Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for USearch';
 
 DROP TABLE IF EXISTS tab;
-CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE annoy) ENGINE=MergeTree ORDER BY id;
-SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
-
-DROP TABLE tab;
-CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE=MergeTree ORDER BY id;
-ALTER TABLE tab ADD INDEX idx(vec) TYPE annoy;
-SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
-
-SELECT '- Usearch';
-
-DROP TABLE tab;
 CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE usearch) ENGINE=MergeTree ORDER BY id;
 SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
 
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
index 43bc49e8adc..2e19e1110a1 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
@@ -1,7 +1,7 @@
 At most two index arguments
 1st argument (distance function) must be String
 Rejects unsupported distance functions
-2nd argument (Annoy: number of trees, USearch: scalar kind) must be UInt64 (Annoy) / String (Usearch)
+2nd argument (scalar kind) must be String
 Rejects unsupported scalar kinds (only Usearch)
 Must be created on single column
 Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
index 6a4d6448629..5ed6b151011 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
@@ -1,63 +1,47 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+-- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
--- therefore they are tested in a single file.
+-- Tests vector search in ClickHouse, i.e. Usearch indexes.
 
 -- This file tests that various conditions are checked during creation of vector search indexes.
 
-SET allow_experimental_annoy_index = 1;
 SET allow_experimental_usearch_index = 1;
 
 DROP TABLE IF EXISTS tab;
 
 SELECT 'At most two index arguments';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('too', 'many', 'arguments')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('too', 'many', 'args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
 SELECT '1st argument (distance function) must be String';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
 SELECT 'Rejects unsupported distance functions';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
-SELECT '2nd argument (Annoy: number of trees, USearch: scalar kind) must be UInt64 (Annoy) / String (Usearch)';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('L2Distance', 'not an UInt64')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+SELECT '2nd argument (scalar kind) must be String';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
 SELECT 'Rejects unsupported scalar kinds (only Usearch)';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('L2Distance', 'invalidKind')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
 SELECT 'Must be created on single column';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
 
 SELECT 'Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns';
 
 SET allow_suspicious_low_cardinality_types = 1;
 
-CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
-CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
-CREATE TABLE tab(id Int32, vec Tuple(Float64), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec Tuple(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
-CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
-CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
 SELECT 'Rejects INSERTs of Arrays with different sizes';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id;
-INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
-DROP TABLE tab;
-
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
 INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference
index 41c1915ecc3..f762be763cc 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.reference
+++ b/tests/queries/0_stateless/02354_vector_search_queries.reference
@@ -1,12 +1,4 @@
 ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block
-- Annoy: WHERE-type
-5	[0,2]	0
-6	[0,2.1]	0.09999990463256836
-7	[0,2.2]	0.20000004768371582
-- Annoy: ORDER-BY-type
-5	[0,2]	0
-6	[0,2.1]	0.09999990463256836
-7	[0,2.2]	0.20000004768371582
 - Usearch: WHERE-type
 5	[0,2]	0
 6	[0,2.1]	0.09999990463256836
@@ -15,37 +7,6 @@ ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule,
 5	[0,2]	0
 6	[0,2.1]	0.09999990463256836
 7	[0,2.2]	0.20000004768371582
-- Annoy: WHERE-type, EXPLAIN
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    Expression
-      ReadFromMergeTree (default.tab_annoy)
-      Indexes:
-        PrimaryKey
-          Condition: true
-          Parts: 1/1
-          Granules: 1/1
-        Skip
-          Name: idx
-          Description: annoy GRANULARITY 100000000
-          Parts: 1/1
-          Granules: 1/1
-- Annoy: ORDER-BY-type, EXPLAIN
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab_annoy)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 1/1
-          Skip
-            Name: idx
-            Description: annoy GRANULARITY 100000000
-            Parts: 1/1
-            Granules: 1/1
 - Usearch: WHERE-type, EXPLAIN
 Expression ((Projection + Before ORDER BY))
   Limit (preliminary LIMIT (without OFFSET))
@@ -78,14 +39,6 @@ Expression (Projection)
             Parts: 1/1
             Granules: 1/1
 ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block
-- Annoy: WHERE-type
-6	[0,2]	0
-7	[0,2.1]	0.09999990463256836
-8	[0,2.2]	0.20000004768371582
-- Annoy: ORDER-BY-type
-6	[0,2]	0
-7	[0,2.1]	0.09999990463256836
-8	[0,2.2]	0.20000004768371582
 - Usearch: WHERE-type
 6	[0,2]	0
 7	[0,2.1]	0.09999990463256836
@@ -94,37 +47,6 @@ ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2
 6	[0,2]	0
 7	[0,2.1]	0.09999990463256836
 8	[0,2.2]	0.20000004768371582
-- Annoy: WHERE-type, EXPLAIN
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    Expression
-      ReadFromMergeTree (default.tab_annoy)
-      Indexes:
-        PrimaryKey
-          Condition: true
-          Parts: 1/1
-          Granules: 4/4
-        Skip
-          Name: idx
-          Description: annoy GRANULARITY 2
-          Parts: 1/1
-          Granules: 1/4
-- Annoy: ORDER-BY-type, EXPLAIN
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab_annoy)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 4/4
-          Skip
-            Name: idx
-            Description: annoy GRANULARITY 2
-            Parts: 1/1
-            Granules: 2/4
 - Usearch: WHERE-type, EXPLAIN
 Expression ((Projection + Before ORDER BY))
   Limit (preliminary LIMIT (without OFFSET))
@@ -157,14 +79,6 @@ Expression (Projection)
             Parts: 1/1
             Granules: 2/4
 TUPLE vectors and special cases
-- Annoy: WHERE-type
-0	(4.6,2.3)	0.5527864045000421
-1	(2,3.2)	0.15200169244542905
-2	(4.2,3.4)	0.37080174340866845
-- Annoy: ORDER-BY-type
-6	(1,9.3)	0.005731362878640178
-1	(2,3.2)	0.15200169244542905
-7	(5.5,4.7)	0.3503476876550442
 - Usearch: WHERE-type
 0	(4.6,2.3)	0.5527864045000421
 1	(2,3.2)	0.15200169244542905
@@ -174,19 +88,6 @@ TUPLE vectors and special cases
 1	(2,3.2)	0.15200169244542905
 7	(5.5,4.7)	0.3503476876550442
 - Special case: MaximumDistance is negative
-- Special case: MaximumDistance is negative
-- Special case: setting "annoy_index_search_k_nodes"
-- Special case: setting "max_limit_for_ann_queries"
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab_annoy)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 4/4
 - Special case: setting "max_limit_for_ann_queries"
 Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
index 87d27be0ea4..f3b202be1f8 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -1,41 +1,21 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+-- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
--- therefore they are tested in a single file.
+-- Tests vector search in ClickHouse, i.e. Usearch indexes.
 
 -- This file tests various simple approximate nearest neighborhood (ANN) queries that utilize vector search indexes.
 
-SET allow_experimental_annoy_index = 1;
 SET allow_experimental_usearch_index = 1;
 
 SET enable_analyzer = 0;
 
 SELECT 'ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block';
 
-DROP TABLE IF EXISTS tab_annoy;
 DROP TABLE IF EXISTS tab_usearch;
 
-CREATE TABLE tab_annoy(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
-INSERT INTO tab_annoy VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
-
 CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
 INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
 
 
-SELECT '- Annoy: WHERE-type';
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
-SELECT '- Annoy: ORDER-BY-type';
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 3;
-
 SELECT '- Usearch: WHERE-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@@ -50,22 +30,6 @@ FROM tab_usearch
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Annoy: WHERE-type, EXPLAIN';
-EXPLAIN indexes=1
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
-SELECT '- Annoy: ORDER-BY-type, EXPLAIN';
-EXPLAIN indexes=1
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 3;
-
 SELECT '- Usearch: WHERE-type, EXPLAIN';
 EXPLAIN indexes=1
 WITH [0.0, 2.0] AS reference_vec
@@ -82,32 +46,14 @@ FROM tab_usearch
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-DROP TABLE tab_annoy;
 DROP TABLE tab_usearch;
 
 
 SELECT 'ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block';
 
-CREATE TABLE tab_annoy(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
-INSERT INTO tab_annoy VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
-
 CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
 
-SELECT '- Annoy: WHERE-type';
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
-SELECT '- Annoy: ORDER-BY-type';
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 3;
-
 SELECT '- Usearch: WHERE-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@@ -122,22 +68,6 @@ FROM tab_usearch
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Annoy: WHERE-type, EXPLAIN';
-EXPLAIN indexes=1
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
-SELECT '- Annoy: ORDER-BY-type, EXPLAIN';
-EXPLAIN indexes=1
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_annoy
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 3;
-
 SELECT '- Usearch: WHERE-type, EXPLAIN';
 EXPLAIN indexes=1
 WITH [0.0, 2.0] AS reference_vec
@@ -154,34 +84,16 @@ FROM tab_usearch
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-DROP TABLE tab_annoy;
 DROP TABLE tab_usearch;
 
 
 SELECT 'TUPLE vectors and special cases';
 -- Not a systematic test, just to check that no bad things happen.
--- Just for jun, use metric = 'cosineDistance' (Annoy/Usearch), tree_count = 200 (Annoy), scalarKind = 'f64' (Usearch)
-
-CREATE TABLE tab_annoy(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy('cosineDistance', 200) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
-INSERT INTO tab_annoy VALUES (0, (4.6, 2.3)), (1, (2.0, 3.2)), (2, (4.2, 3.4)), (3, (5.3, 2.9)), (4, (2.4, 5.2)), (5, (5.3, 2.3)), (6, (1.0, 9.3)), (7, (5.5, 4.7)), (8, (6.4, 3.5)), (9, (5.3, 2.5)), (10, (6.4, 3.4)), (11, (6.4, 3.2));
+-- Just for jun, use metric = 'cosineDistance', scalarKind = 'f64'
 
 CREATE TABLE tab_usearch(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab_usearch VALUES (0, (4.6, 2.3)), (1, (2.0, 3.2)), (2, (4.2, 3.4)), (3, (5.3, 2.9)), (4, (2.4, 5.2)), (5, (5.3, 2.3)), (6, (1.0, 9.3)), (7, (5.5, 4.7)), (8, (6.4, 3.5)), (9, (5.3, 2.5)), (10, (6.4, 3.4)), (11, (6.4, 3.2));
 
-SELECT '- Annoy: WHERE-type';
-WITH (0.0, 2.0) AS reference_vec
-SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_annoy
-WHERE cosineDistance(vec, reference_vec) < 1.0
-LIMIT 3;
-
-SELECT '- Annoy: ORDER-BY-type';
-WITH (0.0, 2.0) AS reference_vec
-SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_annoy
-ORDER BY cosineDistance(vec, reference_vec)
-LIMIT 3;
-
 SELECT '- Usearch: WHERE-type';
 WITH (0.0, 2.0) AS reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
@@ -196,13 +108,6 @@ FROM tab_usearch
 ORDER BY cosineDistance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Special case: MaximumDistance is negative';
-WITH (0.0, 2.0) as reference_vec
-SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_annoy
-WHERE cosineDistance(vec, reference_vec) < -1.0
-LIMIT 3; -- { serverError INCORRECT_QUERY }
-
 SELECT '- Special case: MaximumDistance is negative';
 WITH (0.0, 2.0) as reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
@@ -210,23 +115,6 @@ FROM tab_usearch
 WHERE cosineDistance(vec, reference_vec) < -1.0
 LIMIT 3; -- { serverError INCORRECT_QUERY }
 
-SELECT '- Special case: setting "annoy_index_search_k_nodes"';
-WITH (0.0, 2.0) as reference_vec
-SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_annoy
-ORDER BY cosineDistance(vec, reference_vec)
-LIMIT 3
-SETTINGS annoy_index_search_k_nodes=0; -- searches zero nodes --> no results
-
-SELECT '- Special case: setting "max_limit_for_ann_queries"';
-EXPLAIN indexes=1
-WITH (0.0, 2.0) as reference_vec
-SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_annoy
-ORDER BY cosineDistance(vec, reference_vec)
-LIMIT 3
-SETTINGS max_limit_for_ann_queries=2; -- LIMIT 3 > 2 --> don't use the ann index
-
 SELECT '- Special case: setting "max_limit_for_ann_queries"';
 EXPLAIN indexes=1
 WITH (0.0, 2.0) as reference_vec
@@ -236,5 +124,4 @@ ORDER BY cosineDistance(vec, reference_vec)
 LIMIT 3
 SETTINGS max_limit_for_ann_queries=2; -- LIMIT 3 > 2 --> don't use the ann index
 
-DROP TABLE tab_annoy;
 DROP TABLE tab_usearch;

From 65186f0b69e39ad31bbc906a3af3c860ae09c297 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 10:15:03 +0000
Subject: [PATCH 196/265] Remove tuple support

Indexes for approximate nearest neighbourhood (ANN) search (USearch) can
be build on columns of type Array(Float32) or Tuple(Float32[, Float32[, ...]]).
In practice, Arrays(Float32) is the only relevant data type.
Arrays store high-dimensional embeddings consecutively (--> cache
locality) and the additional flexibility of different data types in a
tuple is not needed for vector search.

Therefore removing support for ANN indexes over tuple columns to
simplify the code, tests and docs.
---
 ...pproximateNearestNeighborIndexesCommon.cpp | 37 ++++-----------
 .../ApproximateNearestNeighborIndexesCommon.h |  7 ---
 .../MergeTree/MergeTreeIndexUSearch.cpp       | 46 +------------------
 .../02354_vector_search_bugs.reference        |  2 -
 .../0_stateless/02354_vector_search_bugs.sql  | 22 ---------
 ...r_search_index_creation_negative.reference |  2 +-
 ..._vector_search_index_creation_negative.sql |  4 +-
 .../02354_vector_search_queries.reference     | 18 ++++----
 .../02354_vector_search_queries.sql           | 19 ++++----
 9 files changed, 30 insertions(+), 127 deletions(-)

diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
index d6a8af3238e..c0d5180058a 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
@@ -227,8 +227,6 @@ bool ApproximateNearestNeighborCondition::traverseAtomAST(const ASTPtr & node, R
             function->name == "dotProduct" ||
             function->name == "LpDistance")
             out.function = RPNElement::FUNCTION_DISTANCE;
-        else if (function->name == "tuple")
-            out.function = RPNElement::FUNCTION_TUPLE;
         else if (function->name == "array")
             out.function = RPNElement::FUNCTION_ARRAY;
         else if (function->name == "less" ||
@@ -289,14 +287,6 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node
             return true;
         }
 
-        if (const_value.getType() == Field::Types::Tuple)
-        {
-            out.function = RPNElement::FUNCTION_LITERAL_TUPLE;
-            out.tuple_literal = const_value.safeGet<Tuple>();
-            out.func_name = "Tuple literal";
-            return true;
-        }
-
         if (const_value.getType() == Field::Types::Array)
         {
             out.function = RPNElement::FUNCTION_LITERAL_ARRAY;
@@ -330,7 +320,7 @@ bool ApproximateNearestNeighborCondition::matchRPNWhere(RPN & rpn, ApproximateNe
     ann_info.type = ApproximateNearestNeighborInformation::Type::Where;
 
     /// WHERE section must have at least 5 expressions
-    /// Operator->Distance(float)->DistanceFunc->Column->Tuple(Array)Func(ReferenceVector(floats))
+    /// Operator->Distance(float)->DistanceFunc->Column->ArrayFunc(ReferenceVector(floats))
     if (rpn.size() < 5)
         return false;
 
@@ -411,7 +401,7 @@ bool ApproximateNearestNeighborCondition::matchMainParts(RPN::iterator & iter, c
 {
     bool identifier_found = false;
 
-    /// Matches DistanceFunc->[Column]->[Tuple(array)Func]->ReferenceVector(floats)->[Column]
+    /// Matches DistanceFunc->[Column]->[ArrayFunc]->ReferenceVector(floats)->[Column]
     if (iter->function != RPNElement::FUNCTION_DISTANCE)
         return false;
 
@@ -434,36 +424,25 @@ bool ApproximateNearestNeighborCondition::matchMainParts(RPN::iterator & iter, c
         ++iter;
     }
 
-    if (iter->function == RPNElement::FUNCTION_TUPLE || iter->function == RPNElement::FUNCTION_ARRAY)
+    if (iter->function == RPNElement::FUNCTION_ARRAY)
         ++iter;
 
-    if (iter->function == RPNElement::FUNCTION_LITERAL_TUPLE)
-    {
-        extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->tuple_literal);
-        ++iter;
-    }
-
     if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY)
     {
         extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->array_literal);
         ++iter;
     }
 
-    /// further conditions are possible if there is no tuple or array, or no identifier is found
-    /// the tuple or array can be inside a cast function. For other cases, see the loop after this condition
+    /// further conditions are possible if there is no array, or no identifier is found
+    /// the array can be inside a cast function. For other cases, see the loop after this condition
     if (iter != end && iter->function == RPNElement::FUNCTION_CAST)
     {
         ++iter;
-        /// Cast should be made to array or tuple
-        if (!iter->func_name.starts_with("Array") && !iter->func_name.starts_with("Tuple"))
+        /// Cast should be made to array
+        if (!iter->func_name.starts_with("Array"))
             return false;
         ++iter;
-        if (iter->function == RPNElement::FUNCTION_LITERAL_TUPLE)
-        {
-            extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->tuple_literal);
-            ++iter;
-        }
-        else if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY)
+        if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY)
         {
             extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->array_literal);
             ++iter;
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index 5da2a714b02..4bdc3efdd03 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -118,9 +118,6 @@ private:
             /// DistanceFunctions
             FUNCTION_DISTANCE,
 
-            //tuple(0.1, ..., 0.1)
-            FUNCTION_TUPLE,
-
             //array(0.1, ..., 0.1)
             FUNCTION_ARRAY,
 
@@ -139,9 +136,6 @@ private:
             /// Unknown, can be any value
             FUNCTION_UNKNOWN,
 
-            /// (0.1, ...., 0.1) vector without word 'tuple'
-            FUNCTION_LITERAL_TUPLE,
-
             /// [0.1, ...., 0.1] vector without word 'array'
             FUNCTION_LITERAL_ARRAY,
 
@@ -166,7 +160,6 @@ private:
         std::optional<String> identifier;
         std::optional<int64_t> int_literal;
 
-        std::optional<Tuple> tuple_literal;
         std::optional<Array> array_literal;
 
         UInt32 dim = 0;
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 5a532803d84..e440017adf7 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -9,7 +9,6 @@
 #include <Common/typeid_cast.h>
 #include <Core/Field.h>
 #include <DataTypes/DataTypeArray.h>
-#include <DataTypes/DataTypeTuple.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
@@ -220,39 +219,8 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
             ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, rc.computed_distances);
         }
     }
-    else if (const auto & column_tuple = typeid_cast<const ColumnTuple *>(column_cut.get()))
-    {
-        const auto & column_tuple_columns = column_tuple->getColumns();
-        std::vector<std::vector<Float32>> data(column_tuple->size(), std::vector<Float32>());
-        for (const auto & column : column_tuple_columns)
-        {
-            const auto & pod_array = typeid_cast<const ColumnFloat32 *>(column.get())->getData();
-            for (size_t i = 0; i < pod_array.size(); ++i)
-                data[i].push_back(pod_array[i]);
-        }
-
-        if (data.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Tuple has 0 rows, {} rows expected", rows_read);
-
-        if (!index)
-            index = std::make_shared<USearchIndexWithSerialization<Metric>>(data[0].size(), scalar_kind);
-
-        if (!index->reserve(unum::usearch::ceil2(index->size() + data.size())))
-            throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index");
-
-        for (const auto & item : data)
-        {
-            auto rc = index->add(static_cast<uint32_t>(index->size()), item.data());
-            if (!rc)
-                throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, rc.error.release());
-
-            ProfileEvents::increment(ProfileEvents::USearchAddCount);
-            ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, rc.visited_members);
-            ProfileEvents::increment(ProfileEvents::USearchAddComputedDistances, rc.computed_distances);
-        }
-    }
     else
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array or Tuple column");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected Array(Float32) column");
 
     *pos += rows_read;
 }
@@ -433,7 +401,7 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
     {
         throw Exception(
             ErrorCodes::ILLEGAL_COLUMN,
-            "USearch can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])");
+            "USearch can only be created on columns of type Array(Float32)");
     };
 
     DataTypePtr data_type = index.sample_block.getDataTypes()[0];
@@ -444,16 +412,6 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
         if (!WhichDataType(nested_type_index).isFloat32())
             throw_unsupported_underlying_column_exception();
     }
-    else if (const auto * data_type_tuple = typeid_cast<const DataTypeTuple *>(data_type.get()))
-    {
-        const DataTypes & inner_types = data_type_tuple->getElements();
-        for (const auto & inner_type : inner_types)
-        {
-            TypeIndex nested_type_index = inner_type->getTypeId();
-            if (!WhichDataType(nested_type_index).isFloat32())
-                throw_unsupported_underlying_column_exception();
-        }
-    }
     else
         throw_unsupported_underlying_column_exception();
 }
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.reference b/tests/queries/0_stateless/02354_vector_search_bugs.reference
index da8ff0a25c5..a27b086e118 100644
--- a/tests/queries/0_stateless/02354_vector_search_bugs.reference
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.reference
@@ -3,5 +3,3 @@ It is possible to create parts with different Array vector sizes but there will
 Correctness of index with > 1 mark
 1	[1,0]	0
 9000	[9000,0]	0
-1	(1,0)	0
-9000	(9000,0)	0
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql
index f3a5f601e8d..0c1eb5ddaec 100644
--- a/tests/queries/0_stateless/02354_vector_search_bugs.sql
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql
@@ -17,10 +17,6 @@ INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
 INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
 
-CREATE TABLE tab (id UInt64, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY (id);
-INSERT INTO tab (id) VALUES (1); -- works fine, takes on default tuple (0.0, 0.0)
-DROP TABLE tab;
-
 SELECT 'It is possible to create parts with different Array vector sizes but there will be an error at query time';
 
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
@@ -54,21 +50,3 @@ ORDER BY L2Distance(vec, reference_vec)
 LIMIT 1;
 
 DROP TABLE tab;
-
--- same, but with Tuples
-CREATE TABLE tab(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
-INSERT INTO tab SELECT number, (toFloat32(number), 0.0) from numbers(10000);
-
-WITH (1.0, 0.0) AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 1;
-
-WITH (9000.0, 0.0) AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab
-ORDER BY L2Distance(vec, reference_vec)
-LIMIT 1;
-
-DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
index 2e19e1110a1..83dd4cf26e3 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
@@ -4,5 +4,5 @@ Rejects unsupported distance functions
 2nd argument (scalar kind) must be String
 Rejects unsupported scalar kinds (only Usearch)
 Must be created on single column
-Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns
+Must be created on Array(Float32) columns
 Rejects INSERTs of Arrays with different sizes
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
index 5ed6b151011..11dedfe5090 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
@@ -26,7 +26,7 @@ CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('L2Dis
 SELECT 'Must be created on single column';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
 
-SELECT 'Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns';
+SELECT 'Must be created on Array(Float32) columns';
 
 SET allow_suspicious_low_cardinality_types = 1;
 
@@ -34,8 +34,6 @@ CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE usearch()) ENGINE = M
 
 CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
-CREATE TABLE tab(id Int32, vec Tuple(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-
 CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
 CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference
index f762be763cc..2d32af80807 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.reference
+++ b/tests/queries/0_stateless/02354_vector_search_queries.reference
@@ -1,4 +1,4 @@
-ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block
+10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block
 - Usearch: WHERE-type
 5	[0,2]	0
 6	[0,2.1]	0.09999990463256836
@@ -38,7 +38,7 @@ Expression (Projection)
             Description: usearch GRANULARITY 100000000
             Parts: 1/1
             Granules: 1/1
-ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block
+12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block
 - Usearch: WHERE-type
 6	[0,2]	0
 7	[0,2.1]	0.09999990463256836
@@ -78,15 +78,15 @@ Expression (Projection)
             Description: usearch GRANULARITY 2
             Parts: 1/1
             Granules: 2/4
-TUPLE vectors and special cases
+Special cases
 - Usearch: WHERE-type
-0	(4.6,2.3)	0.5527864045000421
-1	(2,3.2)	0.15200169244542905
-2	(4.2,3.4)	0.37080174340866845
+0	[4.6,2.3]	0.5527864045000421
+1	[2,3.2]	0.15200169244542905
+2	[4.2,3.4]	0.37080174340866845
 - Usearch: ORDER-BY-type
-6	(1,9.3)	0.005731362878640178
-1	(2,3.2)	0.15200169244542905
-7	(5.5,4.7)	0.3503476876550442
+6	[1,9.3]	0.005731362878640178
+1	[2,3.2]	0.15200169244542905
+7	[5.5,4.7]	0.3503476876550442
 - Special case: MaximumDistance is negative
 - Special case: setting "max_limit_for_ann_queries"
 Expression (Projection)
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
index f3b202be1f8..c3f20ad3390 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -8,7 +8,7 @@ SET allow_experimental_usearch_index = 1;
 
 SET enable_analyzer = 0;
 
-SELECT 'ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block';
+SELECT '10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block';
 
 DROP TABLE IF EXISTS tab_usearch;
 
@@ -49,7 +49,7 @@ LIMIT 3;
 DROP TABLE tab_usearch;
 
 
-SELECT 'ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block';
+SELECT '12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block';
 
 CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
@@ -87,29 +87,28 @@ LIMIT 3;
 DROP TABLE tab_usearch;
 
 
-SELECT 'TUPLE vectors and special cases';
--- Not a systematic test, just to check that no bad things happen.
+SELECT 'Special cases'; -- Not a systematic test, just to check that no bad things happen.
 -- Just for jun, use metric = 'cosineDistance', scalarKind = 'f64'
 
-CREATE TABLE tab_usearch(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
-INSERT INTO tab_usearch VALUES (0, (4.6, 2.3)), (1, (2.0, 3.2)), (2, (4.2, 3.4)), (3, (5.3, 2.9)), (4, (2.4, 5.2)), (5, (5.3, 2.3)), (6, (1.0, 9.3)), (7, (5.5, 4.7)), (8, (6.4, 3.5)), (9, (5.3, 2.5)), (10, (6.4, 3.4)), (11, (6.4, 3.2));
+CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab_usearch VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
 
 SELECT '- Usearch: WHERE-type';
-WITH (0.0, 2.0) AS reference_vec
+WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
 FROM tab_usearch
 WHERE cosineDistance(vec, reference_vec) < 1.0
 LIMIT 3;
 
 SELECT '- Usearch: ORDER-BY-type';
-WITH (0.0, 2.0) AS reference_vec
+WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
 FROM tab_usearch
 ORDER BY cosineDistance(vec, reference_vec)
 LIMIT 3;
 
 SELECT '- Special case: MaximumDistance is negative';
-WITH (0.0, 2.0) as reference_vec
+WITH [0.0, 2.0] as reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
 FROM tab_usearch
 WHERE cosineDistance(vec, reference_vec) < -1.0
@@ -117,7 +116,7 @@ LIMIT 3; -- { serverError INCORRECT_QUERY }
 
 SELECT '- Special case: setting "max_limit_for_ann_queries"';
 EXPLAIN indexes=1
-WITH (0.0, 2.0) as reference_vec
+WITH [0.0, 2.0] as reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
 FROM tab_usearch
 ORDER BY cosineDistance(vec, reference_vec)

From abb8e619819f041a0a97e8a0f294f859b604350d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 10:27:43 +0000
Subject: [PATCH 197/265] Remove support code for Lp norm in vector search

It is a generalization of other norms, too expensive to calculate and
not relevant in practice. Also, Usearch doesn't support it.
---
 ...pproximateNearestNeighborIndexesCommon.cpp | 28 +++----------------
 .../ApproximateNearestNeighborIndexesCommon.h | 13 ++-------
 2 files changed, 7 insertions(+), 34 deletions(-)

diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
index c0d5180058a..e8102c59620 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
@@ -44,8 +44,6 @@ ApproximateNearestNeighborInformation::Metric stringToMetric(std::string_view me
 {
     if (metric == "L2Distance")
         return ApproximateNearestNeighborInformation::Metric::L2;
-    else if (metric == "LpDistance")
-        return ApproximateNearestNeighborInformation::Metric::Lp;
     else
         return ApproximateNearestNeighborInformation::Metric::Unknown;
 }
@@ -110,13 +108,6 @@ ApproximateNearestNeighborInformation::Metric ApproximateNearestNeighborConditio
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Metric name was requested for useless or uninitialized index.");
 }
 
-float ApproximateNearestNeighborCondition::getPValueForLpDistance() const
-{
-    if (index_is_useful && query_information.has_value())
-        return query_information->p_for_lp_dist;
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "P from LPDistance was requested for useless or uninitialized index.");
-}
-
 ApproximateNearestNeighborInformation::Type ApproximateNearestNeighborCondition::getQueryType() const
 {
     if (index_is_useful && query_information.has_value())
@@ -152,7 +143,8 @@ bool ApproximateNearestNeighborCondition::checkQueryStructure(const SelectQueryI
     if (select.limitLength())
         traverseAtomAST(select.limitLength(), rpn_limit);
 
-    if (select.orderBy()) // If query has ORDERBY clause
+    // If query has ORDER BY clause
+    if (select.orderBy())
         traverseOrderByAST(select.orderBy(), rpn_order_by_clause);
 
     /// Reverse RPNs for conveniences during parsing
@@ -224,8 +216,7 @@ bool ApproximateNearestNeighborCondition::traverseAtomAST(const ASTPtr & node, R
             function->name == "L2Distance" ||
             function->name == "LinfDistance" ||
             function->name == "cosineDistance" ||
-            function->name == "dotProduct" ||
-            function->name == "LpDistance")
+            function->name == "dotProduct")
             out.function = RPNElement::FUNCTION_DISTANCE;
         else if (function->name == "array")
             out.function = RPNElement::FUNCTION_ARRAY;
@@ -316,7 +307,6 @@ void ApproximateNearestNeighborCondition::traverseOrderByAST(const ASTPtr & node
 /// Returns true and stores ApproximateNearestNeighborInformation if the query has valid WHERE clause
 bool ApproximateNearestNeighborCondition::matchRPNWhere(RPN & rpn, ApproximateNearestNeighborInformation & ann_info)
 {
-    /// Fill query type field
     ann_info.type = ApproximateNearestNeighborInformation::Type::Where;
 
     /// WHERE section must have at least 5 expressions
@@ -371,7 +361,6 @@ bool ApproximateNearestNeighborCondition::matchRPNWhere(RPN & rpn, ApproximateNe
 /// Returns true and stores ANNExpr if the query has valid ORDERBY clause
 bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & ann_info)
 {
-    /// Fill query type field
     ann_info.type = ApproximateNearestNeighborInformation::Type::OrderBy;
 
     // ORDER BY clause must have at least 3 expressions
@@ -396,7 +385,7 @@ bool ApproximateNearestNeighborCondition::matchRPNLimit(RPNElement & rpn, UInt64
     return false;
 }
 
-/// Matches dist function, referencer vector, column name
+/// Matches dist function, reference vector, column name
 bool ApproximateNearestNeighborCondition::matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & ann_info)
 {
     bool identifier_found = false;
@@ -408,15 +397,6 @@ bool ApproximateNearestNeighborCondition::matchMainParts(RPN::iterator & iter, c
     ann_info.metric = stringToMetric(iter->func_name);
     ++iter;
 
-    if (ann_info.metric == ApproximateNearestNeighborInformation::Metric::Lp)
-    {
-        if (iter->function != RPNElement::FUNCTION_FLOAT_LITERAL &&
-            iter->function != RPNElement::FUNCTION_INT_LITERAL)
-            return false;
-        ann_info.p_for_lp_dist = getFloatOrIntLiteralOrPanic(iter);
-        ++iter;
-    }
-
     if (iter->function == RPNElement::FUNCTION_IDENTIFIER)
     {
         identifier_found = true;
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index 4bdc3efdd03..454293e7f8a 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -14,13 +14,12 @@ static constexpr auto DISTANCE_FUNCTION_COSINE = "cosineDistance";
 
 /// Approximate Nearest Neighbour queries have a similar structure:
 /// - reference vector from which all distances are calculated
-/// - metric name (e.g L2Distance, LpDistance, etc.)
+/// - metric name, e.g L2Distance
 /// - name of column with embeddings
 /// - type of query
 /// - maximum number of returned elements (LIMIT)
 ///
-/// And two optional parameters:
-/// - p for LpDistance function
+/// And one optional parameter:
 /// - distance to compare with (only for where queries)
 ///
 /// This struct holds all these components.
@@ -32,8 +31,7 @@ struct ApproximateNearestNeighborInformation
     enum class Metric : uint8_t
     {
         Unknown,
-        L2,
-        Lp
+        L2
     };
     Metric metric;
 
@@ -47,7 +45,6 @@ struct ApproximateNearestNeighborInformation
     };
     Type type;
 
-    float p_for_lp_dist = -1.0;
     float distance = -1.0;
 };
 
@@ -71,7 +68,6 @@ struct ApproximateNearestNeighborInformation
 /// From matching query it extracts
 /// - referenceVector
 /// - metricName(DistanceFunction)
-/// - dimension size if query uses LpDistance
 /// - distance to compare(ONLY for search types, otherwise you get exception)
 /// - spaceDimension(which is referenceVector's components count)
 /// - column
@@ -100,9 +96,6 @@ public:
 
     ApproximateNearestNeighborInformation::Metric getMetricType() const;
 
-    /// The P- value if the metric is 'LpDistance'
-    float getPValueForLpDistance() const;
-
     ApproximateNearestNeighborInformation::Type getQueryType() const;
 
     UInt64 getIndexGranularity() const { return index_granularity; }

From 40bed3e20fb309150263771ccddb82822948d474 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 10:37:11 +0000
Subject: [PATCH 198/265] Remove support for WHERE-type queries

These kind of vector search similarity queries are rather obscure and
rare in practice. They require the user to specify a maximum distance
which is not intuitive to obtain. Furthermore, these queries are not
natively supported in USearch, so the vector search index had to emulate
these queries.

Therefore simplifying the code base and restricting vector search to
ORDER-BY queries only.
---
 .../mergetree-family/annindexes.md            |  34 +---
 ...pproximateNearestNeighborIndexesCommon.cpp | 160 +++---------------
 .../ApproximateNearestNeighborIndexesCommon.h |  12 --
 .../MergeTree/MergeTreeIndexUSearch.cpp       |  14 +-
 .../02354_vector_search_queries.reference     |  43 -----
 .../02354_vector_search_queries.sql           |  44 -----
 6 files changed, 30 insertions(+), 277 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index 9a80542522e..e2f8051d064 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -27,20 +27,10 @@ Function `Distance` computes the distance between two vectors. Often, the Euclid
 distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
 0.33, ...)`, and `N` limits the number of search results.
 
-An alternative formulation of the nearest neighborhood search problem looks as follows:
+This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for
+situations where `MaxDistance` is difficult to determine in advance.
 
-``` sql
-SELECT *
-FROM table_with_ann_index
-WHERE Distance(vectors, Point) < MaxDistance
-LIMIT N
-```
-
-While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
-point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
-`MaxDistance` is difficult to determine in advance.
-
-With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
+With brute force search, the query is expensive (linear in the number of points) because the distance between all points in `vectors` and
 `Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
 of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
 
@@ -63,9 +53,7 @@ ANN indexes are built during column insertion and merge. As a result, `INSERT` a
 tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
 requests.
 
-ANN indexes support two types of queries:
-
-- ORDER BY queries:
+ANN indexes support these queries:
 
   ``` sql
   SELECT *
@@ -75,15 +63,6 @@ ANN indexes support two types of queries:
   LIMIT N
   ```
 
-- WHERE queries:
-
-   ``` sql
-   SELECT *
-   FROM table_with_ann_index
-   WHERE Distance(vectors, Point) < MaxDistance
-   LIMIT N
-   ```
-
 :::tip
 To avoid writing out large vectors, you can use [query
 parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
@@ -93,9 +72,8 @@ clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_inde
 ```
 :::
 
-**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
-clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
-without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
+**Restrictions**: Approximate algorithms used to determine the nearest neighbors require a limit, hence queries without `LIMIT` clause
+cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
 `max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
 approximate neighbor search.
 
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
index e8102c59620..3e2a019f9f6 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
@@ -65,14 +65,6 @@ bool ApproximateNearestNeighborCondition::alwaysUnknownOrTrue(String metric) con
     return !(stringToMetric(metric) == query_information->metric);
 }
 
-float ApproximateNearestNeighborCondition::getComparisonDistanceForWhereQuery() const
-{
-    if (index_is_useful && query_information.has_value()
-        && query_information->type == ApproximateNearestNeighborInformation::Type::Where)
-        return query_information->distance;
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supported method for this query type");
-}
-
 UInt64 ApproximateNearestNeighborCondition::getLimit() const
 {
     if (index_is_useful && query_information.has_value())
@@ -108,81 +100,40 @@ ApproximateNearestNeighborInformation::Metric ApproximateNearestNeighborConditio
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Metric name was requested for useless or uninitialized index.");
 }
 
-ApproximateNearestNeighborInformation::Type ApproximateNearestNeighborCondition::getQueryType() const
-{
-    if (index_is_useful && query_information.has_value())
-        return query_information->type;
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Query type was requested for useless or uninitialized index.");
-}
-
 bool ApproximateNearestNeighborCondition::checkQueryStructure(const SelectQueryInfo & query)
 {
-    /// RPN-s for different sections of the query
-    RPN rpn_prewhere_clause;
-    RPN rpn_where_clause;
-    RPN rpn_order_by_clause;
-    RPNElement rpn_limit;
-    UInt64 limit;
-
-    ApproximateNearestNeighborInformation prewhere_info;
-    ApproximateNearestNeighborInformation where_info;
     ApproximateNearestNeighborInformation order_by_info;
 
     /// Build rpns for query sections
     const auto & select = query.query->as<ASTSelectQuery &>();
 
-    /// If query has PREWHERE clause
-    if (select.prewhere())
-        traverseAST(select.prewhere(), rpn_prewhere_clause);
+    RPN rpn_order_by;
+    RPNElement rpn_limit;
+    UInt64 limit;
 
-    /// If query has WHERE clause
-    if (select.where())
-        traverseAST(select.where(), rpn_where_clause);
-
-    /// If query has LIMIT clause
     if (select.limitLength())
         traverseAtomAST(select.limitLength(), rpn_limit);
 
-    // If query has ORDER BY clause
     if (select.orderBy())
-        traverseOrderByAST(select.orderBy(), rpn_order_by_clause);
+        traverseOrderByAST(select.orderBy(), rpn_order_by);
 
     /// Reverse RPNs for conveniences during parsing
-    std::reverse(rpn_prewhere_clause.begin(), rpn_prewhere_clause.end());
-    std::reverse(rpn_where_clause.begin(), rpn_where_clause.end());
-    std::reverse(rpn_order_by_clause.begin(), rpn_order_by_clause.end());
+    std::reverse(rpn_order_by.begin(), rpn_order_by.end());
 
-    /// Match rpns with supported types and extract information
-    const bool prewhere_is_valid = matchRPNWhere(rpn_prewhere_clause, prewhere_info);
-    const bool where_is_valid = matchRPNWhere(rpn_where_clause, where_info);
-    const bool order_by_is_valid = matchRPNOrderBy(rpn_order_by_clause, order_by_info);
+    const bool order_by_is_valid = matchRPNOrderBy(rpn_order_by, order_by_info);
     const bool limit_is_valid = matchRPNLimit(rpn_limit, limit);
 
-    /// Query without a LIMIT clause or with a limit greater than a restriction is not supported
-    if (!limit_is_valid || max_limit_for_ann_queries < limit)
+    if (!limit_is_valid || limit > max_limit_for_ann_queries)
         return false;
 
-    /// Search type query in both sections isn't supported
-    if (prewhere_is_valid && where_is_valid)
-        return false;
-
-    /// Search type should be in WHERE or PREWHERE clause
-    if (prewhere_is_valid || where_is_valid)
-        query_information = std::move(prewhere_is_valid ? prewhere_info : where_info);
-
     if (order_by_is_valid)
     {
-        /// Query with valid where and order by type is not supported
-        if (query_information.has_value())
-            return false;
-
         query_information = std::move(order_by_info);
+        query_information->limit = limit;
+        return true;
     }
 
-    if (query_information)
-        query_information->limit = limit;
-
-    return query_information.has_value();
+    return false;
 }
 
 void ApproximateNearestNeighborCondition::traverseAST(const ASTPtr & node, RPN & rpn)
@@ -220,11 +171,6 @@ bool ApproximateNearestNeighborCondition::traverseAtomAST(const ASTPtr & node, R
             out.function = RPNElement::FUNCTION_DISTANCE;
         else if (function->name == "array")
             out.function = RPNElement::FUNCTION_ARRAY;
-        else if (function->name == "less" ||
-                 function->name == "greater" ||
-                 function->name == "lessOrEquals" ||
-                 function->name == "greaterOrEquals")
-            out.function = RPNElement::FUNCTION_COMPARISON;
         else if (function->name == "_CAST")
             out.function = RPNElement::FUNCTION_CAST;
         else
@@ -304,65 +250,9 @@ void ApproximateNearestNeighborCondition::traverseOrderByAST(const ASTPtr & node
             traverseAST(order_by_element->children.front(), rpn);
 }
 
-/// Returns true and stores ApproximateNearestNeighborInformation if the query has valid WHERE clause
-bool ApproximateNearestNeighborCondition::matchRPNWhere(RPN & rpn, ApproximateNearestNeighborInformation & ann_info)
-{
-    ann_info.type = ApproximateNearestNeighborInformation::Type::Where;
-
-    /// WHERE section must have at least 5 expressions
-    /// Operator->Distance(float)->DistanceFunc->Column->ArrayFunc(ReferenceVector(floats))
-    if (rpn.size() < 5)
-        return false;
-
-    auto iter = rpn.begin();
-
-    /// Query starts from operator less
-    if (iter->function != RPNElement::FUNCTION_COMPARISON)
-        return false;
-
-    const bool greater_case = iter->func_name == "greater" || iter->func_name == "greaterOrEquals";
-    const bool less_case = iter->func_name == "less" || iter->func_name == "lessOrEquals";
-
-    ++iter;
-
-    if (less_case)
-    {
-        if (iter->function != RPNElement::FUNCTION_FLOAT_LITERAL)
-            return false;
-
-        ann_info.distance = getFloatOrIntLiteralOrPanic(iter);
-        if (ann_info.distance < 0)
-            throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance can't be negative. Got {}", ann_info.distance);
-
-        ++iter;
-
-    }
-    else if (!greater_case)
-        return false;
-
-    auto end = rpn.end();
-    if (!matchMainParts(iter, end, ann_info))
-        return false;
-
-    if (greater_case)
-    {
-        if (ann_info.reference_vector.size() < 2)
-            return false;
-        ann_info.distance = ann_info.reference_vector.back();
-        if (ann_info.distance < 0)
-            throw Exception(ErrorCodes::INCORRECT_QUERY, "Distance can't be negative. Got {}", ann_info.distance);
-        ann_info.reference_vector.pop_back();
-    }
-
-    /// query is ok
-    return true;
-}
-
 /// Returns true and stores ANNExpr if the query has valid ORDERBY clause
 bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & ann_info)
 {
-    ann_info.type = ApproximateNearestNeighborInformation::Type::OrderBy;
-
     // ORDER BY clause must have at least 3 expressions
     if (rpn.size() < 3)
         return false;
@@ -370,24 +260,6 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
     auto iter = rpn.begin();
     auto end = rpn.end();
 
-    return ApproximateNearestNeighborCondition::matchMainParts(iter, end, ann_info);
-}
-
-/// Returns true and stores Length if we have valid LIMIT clause in query
-bool ApproximateNearestNeighborCondition::matchRPNLimit(RPNElement & rpn, UInt64 & limit)
-{
-    if (rpn.function == RPNElement::FUNCTION_INT_LITERAL)
-    {
-        limit = rpn.int_literal.value();
-        return true;
-    }
-
-    return false;
-}
-
-/// Matches dist function, reference vector, column name
-bool ApproximateNearestNeighborCondition::matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & ann_info)
-{
     bool identifier_found = false;
 
     /// Matches DistanceFunc->[Column]->[ArrayFunc]->ReferenceVector(floats)->[Column]
@@ -453,6 +325,18 @@ bool ApproximateNearestNeighborCondition::matchMainParts(RPN::iterator & iter, c
     return identifier_found && !ann_info.reference_vector.empty();
 }
 
+/// Returns true and stores Length if we have valid LIMIT clause in query
+bool ApproximateNearestNeighborCondition::matchRPNLimit(RPNElement & rpn, UInt64 & limit)
+{
+    if (rpn.function == RPNElement::FUNCTION_INT_LITERAL)
+    {
+        limit = rpn.int_literal.value();
+        return true;
+    }
+
+    return false;
+}
+
 /// Gets float or int from AST node
 float ApproximateNearestNeighborCondition::getFloatOrIntLiteralOrPanic(const RPN::iterator& iter)
 {
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index 454293e7f8a..545a3384d15 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -38,13 +38,6 @@ struct ApproximateNearestNeighborInformation
     String column_name;
     UInt64 limit;
 
-    enum class Type : uint8_t
-    {
-        OrderBy,
-        Where
-    };
-    Type type;
-
     float distance = -1.0;
 };
 
@@ -83,9 +76,6 @@ public:
     /// Returns false if query can be speeded up by an ANN index, true otherwise.
     bool alwaysUnknownOrTrue(String metric) const;
 
-    /// Returns the distance to compare with for search query
-    float getComparisonDistanceForWhereQuery() const;
-
     /// Distance should be calculated regarding to referenceVector
     std::vector<float> getReferenceVector() const;
 
@@ -96,8 +86,6 @@ public:
 
     ApproximateNearestNeighborInformation::Metric getMetricType() const;
 
-    ApproximateNearestNeighborInformation::Type getQueryType() const;
-
     UInt64 getIndexGranularity() const { return index_granularity; }
 
     /// Length's value from LIMIT clause
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index e440017adf7..b3634245fa0 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -259,12 +259,6 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTre
 {
     const UInt64 limit = ann_condition.getLimit();
     const UInt64 index_granularity = ann_condition.getIndexGranularity();
-    const std::optional<float> comparison_distance = ann_condition.getQueryType() == ApproximateNearestNeighborInformation::Type::Where
-        ? std::optional<float>(ann_condition.getComparisonDistanceForWhereQuery())
-        : std::nullopt;
-
-    if (comparison_distance && comparison_distance.value() < 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to optimize query with where without distance");
 
     const std::vector<float> reference_vector = ann_condition.getReferenceVector();
 
@@ -291,12 +285,8 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTre
 
     std::vector<size_t> granules;
     granules.reserve(neighbors.size());
-    for (size_t i = 0; i < neighbors.size(); ++i)
-    {
-        if (comparison_distance && distances[i] > comparison_distance)
-            continue;
-        granules.push_back(neighbors[i] / index_granularity);
-    }
+    for (auto neighbor : neighbors)
+        granules.push_back(neighbor / index_granularity);
 
     /// make unique
     std::sort(granules.begin(), granules.end());
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference
index 2d32af80807..45cdd19bab4 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.reference
+++ b/tests/queries/0_stateless/02354_vector_search_queries.reference
@@ -1,27 +1,8 @@
 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block
-- Usearch: WHERE-type
-5	[0,2]	0
-6	[0,2.1]	0.09999990463256836
-7	[0,2.2]	0.20000004768371582
 - Usearch: ORDER-BY-type
 5	[0,2]	0
 6	[0,2.1]	0.09999990463256836
 7	[0,2.2]	0.20000004768371582
-- Usearch: WHERE-type, EXPLAIN
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    Expression
-      ReadFromMergeTree (default.tab_usearch)
-      Indexes:
-        PrimaryKey
-          Condition: true
-          Parts: 1/1
-          Granules: 1/1
-        Skip
-          Name: idx
-          Description: usearch GRANULARITY 100000000
-          Parts: 1/1
-          Granules: 1/1
 - Usearch: ORDER-BY-type, EXPLAIN
 Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
@@ -39,29 +20,10 @@ Expression (Projection)
             Parts: 1/1
             Granules: 1/1
 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block
-- Usearch: WHERE-type
-6	[0,2]	0
-7	[0,2.1]	0.09999990463256836
-8	[0,2.2]	0.20000004768371582
 - Usearch: ORDER-BY-type
 6	[0,2]	0
 7	[0,2.1]	0.09999990463256836
 8	[0,2.2]	0.20000004768371582
-- Usearch: WHERE-type, EXPLAIN
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    Expression
-      ReadFromMergeTree (default.tab_usearch)
-      Indexes:
-        PrimaryKey
-          Condition: true
-          Parts: 1/1
-          Granules: 4/4
-        Skip
-          Name: idx
-          Description: usearch GRANULARITY 2
-          Parts: 1/1
-          Granules: 1/4
 - Usearch: ORDER-BY-type, EXPLAIN
 Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
@@ -79,15 +41,10 @@ Expression (Projection)
             Parts: 1/1
             Granules: 2/4
 Special cases
-- Usearch: WHERE-type
-0	[4.6,2.3]	0.5527864045000421
-1	[2,3.2]	0.15200169244542905
-2	[4.2,3.4]	0.37080174340866845
 - Usearch: ORDER-BY-type
 6	[1,9.3]	0.005731362878640178
 1	[2,3.2]	0.15200169244542905
 7	[5.5,4.7]	0.3503476876550442
-- Special case: MaximumDistance is negative
 - Special case: setting "max_limit_for_ann_queries"
 Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
index c3f20ad3390..f78c4962f43 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -16,13 +16,6 @@ CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearc
 INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
 
 
-SELECT '- Usearch: WHERE-type';
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
 SELECT '- Usearch: ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@@ -30,14 +23,6 @@ FROM tab_usearch
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Usearch: WHERE-type, EXPLAIN';
-EXPLAIN indexes=1
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
 SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
 EXPLAIN indexes=1
 WITH [0.0, 2.0] AS reference_vec
@@ -54,13 +39,6 @@ SELECT '12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexe
 CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
 
-SELECT '- Usearch: WHERE-type';
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
 SELECT '- Usearch: ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@@ -68,14 +46,6 @@ FROM tab_usearch
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Usearch: WHERE-type, EXPLAIN';
-EXPLAIN indexes=1
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
-WHERE L2Distance(vec, reference_vec) < 1.0
-LIMIT 3;
-
 SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
 EXPLAIN indexes=1
 WITH [0.0, 2.0] AS reference_vec
@@ -93,13 +63,6 @@ SELECT 'Special cases'; -- Not a systematic test, just to check that no bad thin
 CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab_usearch VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
 
-SELECT '- Usearch: WHERE-type';
-WITH [0.0, 2.0] AS reference_vec
-SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_usearch
-WHERE cosineDistance(vec, reference_vec) < 1.0
-LIMIT 3;
-
 SELECT '- Usearch: ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
@@ -107,13 +70,6 @@ FROM tab_usearch
 ORDER BY cosineDistance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Special case: MaximumDistance is negative';
-WITH [0.0, 2.0] as reference_vec
-SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_usearch
-WHERE cosineDistance(vec, reference_vec) < -1.0
-LIMIT 3; -- { serverError INCORRECT_QUERY }
-
 SELECT '- Special case: setting "max_limit_for_ann_queries"';
 EXPLAIN indexes=1
 WITH [0.0, 2.0] as reference_vec

From e7c2bf49c378ad8b1ab1972633082e5e632c82e4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 10:45:29 +0000
Subject: [PATCH 199/265] Add detach/attach test

---
 ...2354_vector_search_detach_attach.reference |  3 +++
 .../02354_vector_search_detach_attach.sql     | 20 +++++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 tests/queries/0_stateless/02354_vector_search_detach_attach.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_detach_attach.sql

diff --git a/tests/queries/0_stateless/02354_vector_search_detach_attach.reference b/tests/queries/0_stateless/02354_vector_search_detach_attach.reference
new file mode 100644
index 00000000000..80eb091922e
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_detach_attach.reference
@@ -0,0 +1,3 @@
+5	[0,2]	0
+6	[0,2.1]	0.09999990463256836
+7	[0,2.2]	0.20000004768371582
diff --git a/tests/queries/0_stateless/02354_vector_search_detach_attach.sql b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
new file mode 100644
index 00000000000..92e8efd918b
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
@@ -0,0 +1,20 @@
+-- Tags: no-fasttest, no-ordinary-database
+
+-- Tests that vector similarity indexes can be detached/attached.
+
+SET allow_experimental_usearch_index = 1;
+
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
+
+DETACH TABLE tab SYNC;
+ATTACH TABLE tab;
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+DROP TABLE tab;

From f944ef25bb044e1bf8a286000cbe128d09ea1b8b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 10:48:00 +0000
Subject: [PATCH 200/265] Better handling of errors during add, search, and
 save

---
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index b3634245fa0..ec296ec3cd5 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -64,7 +64,9 @@ void USearchIndexWithSerialization<Metric>::serialize(WriteBuffer & ostr) const
         return true;
     };
 
-    Base::save_to_stream(callback);
+    auto result = Base::save_to_stream(callback);
+    if (result.error)
+        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not save USearch index, error: " + String(result.error.release()));
 }
 
 template <unum::usearch::metric_kind_t Metric>
@@ -212,7 +214,7 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
         {
             auto rc = index->add(static_cast<uint32_t>(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
             if (!rc)
-                throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, rc.error.release());
+                throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not add data to USearch index, error: " + String(rc.error.release()));
 
             ProfileEvents::increment(ProfileEvents::USearchAddCount);
             ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, rc.visited_members);
@@ -274,6 +276,8 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTre
             ann_condition.getDimensions(), index->dimensions());
 
     auto result = index->search(reference_vector.data(), limit);
+    if (result.error)
+        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not search in USearch index, error: " + String(result.error.release()));
 
     ProfileEvents::increment(ProfileEvents::USearchSearchCount);
     ProfileEvents::increment(ProfileEvents::USearchSearchVisitedMembers, result.visited_members);

From 7f611681df3c011a3c06e4a1288b67d2afae4cc9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 10:49:56 +0000
Subject: [PATCH 201/265] Add a similar sanity check as in other skipping
 indexes

---
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 3 +++
 src/Storages/MergeTree/MergeTreeIndexUSearch.h   | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index ec296ec3cd5..f7597cceb31 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -115,6 +115,9 @@ MergeTreeIndexGranuleUSearch<Metric>::MergeTreeIndexGranuleUSearch(
 template <unum::usearch::metric_kind_t Metric>
 void MergeTreeIndexGranuleUSearch<Metric>::serializeBinary(WriteBuffer & ostr) const
 {
+    if (empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name));
+
     /// Number of dimensions is required in the index constructor,
     /// so it must be written and read separately from the other part
     writeIntBinary(static_cast<UInt64>(index->getDimensions()), ostr); // write dimension
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index 6923ef2f807..59ba84009cf 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -45,7 +45,7 @@ struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
     void serializeBinary(WriteBuffer & ostr) const override;
     void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
 
-    bool empty() const override { return !index.get(); }
+    bool empty() const override { return !index || index->size() == 0; }
 
     const String index_name;
     const Block index_sample_block;

From 4f23f7754b09e4559591c52bd5f338a1e3660484 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 11:10:57 +0000
Subject: [PATCH 202/265] Cosmetics

---
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 16 +++++-----------
 src/Storages/MergeTree/MergeTreeIndexUSearch.h   | 14 +++++---------
 2 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index f7597cceb31..95098de9db6 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -81,12 +81,6 @@ void USearchIndexWithSerialization<Metric>::deserialize(ReadBuffer & istr)
     Base::load_from_stream(callback);
 }
 
-template <unum::usearch::metric_kind_t Metric>
-size_t USearchIndexWithSerialization<Metric>::getDimensions() const
-{
-    return Base::dimensions();
-}
-
 template <unum::usearch::metric_kind_t Metric>
 MergeTreeIndexGranuleUSearch<Metric>::MergeTreeIndexGranuleUSearch(
     const String & index_name_,
@@ -120,7 +114,7 @@ void MergeTreeIndexGranuleUSearch<Metric>::serializeBinary(WriteBuffer & ostr) c
 
     /// Number of dimensions is required in the index constructor,
     /// so it must be written and read separately from the other part
-    writeIntBinary(static_cast<UInt64>(index->getDimensions()), ostr); // write dimension
+    writeIntBinary(static_cast<UInt64>(index->dimensions()), ostr); // write dimension
     index->serialize(ostr);
 }
 
@@ -195,19 +189,19 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
             throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
 
         /// Check all sizes are the same
-        size_t dimension = column_array_offsets[0];
+        const size_t dimensions = column_array_offsets[0];
         for (size_t i = 0; i < num_rows - 1; ++i)
-            if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
+            if (column_array_offsets[i + 1] - column_array_offsets[i] != dimensions)
                 throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
         /// Also check that previously inserted blocks have the same size as this block.
         /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across
         /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42.
-        if (index && index->getDimensions() != dimension)
+        if (index && index->dimensions() != dimensions)
             throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
         if (!index)
-            index = std::make_shared<USearchIndexWithSerialization<Metric>>(dimension, scalar_kind);
+            index = std::make_shared<USearchIndexWithSerialization<Metric>>(dimensions, scalar_kind);
 
         /// Add all rows of block
         if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows)))
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index 59ba84009cf..8d1da19637d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -6,28 +6,24 @@
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpass-failed"
-
-#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
-
-#include <usearch/index_dense.hpp>
-
+#  include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
+#  include <usearch/index_dense.hpp>
 #pragma clang diagnostic pop
 
 namespace DB
 {
 
-using USearchImplType = unum::usearch::index_dense_gt</* key_at */ uint32_t, /* compressed_slot_at */ uint32_t>;
+using USearchIndex = unum::usearch::index_dense_gt</*key_at*/ uint32_t, /*compressed_slot_at*/ uint32_t>;
 
 template <unum::usearch::metric_kind_t Metric>
-class USearchIndexWithSerialization : public USearchImplType
+class USearchIndexWithSerialization : public USearchIndex
 {
-    using Base = USearchImplType;
+    using Base = USearchIndex;
 
 public:
     USearchIndexWithSerialization(size_t dimensions, unum::usearch::scalar_kind_t scalar_kind);
     void serialize(WriteBuffer & ostr) const;
     void deserialize(ReadBuffer & istr);
-    size_t getDimensions() const;
 };
 
 template <unum::usearch::metric_kind_t Metric>

From 57a614857c7f9f21cd298ddd347a27b4856b9df6 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 12 Aug 2024 16:27:01 +0100
Subject: [PATCH 203/265] address review comments

---
 base/base/cgroupsv2.cpp                   | 24 +++++++++++++
 base/base/cgroupsv2.h                     |  5 +++
 src/Common/AsynchronousMetrics.cpp        | 42 +++++++++++++++--------
 src/Common/CgroupsMemoryUsageObserver.cpp | 27 +--------------
 4 files changed, 57 insertions(+), 41 deletions(-)

diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp
index 87f62bf377d..4372696c2b7 100644
--- a/base/base/cgroupsv2.cpp
+++ b/base/base/cgroupsv2.cpp
@@ -71,3 +71,27 @@ fs::path cgroupV2PathOfProcess()
     return {};
 #endif
 }
+
+std::optional<std::string> getCgroupsV2PathContainingFile(std::string_view file_name)
+{
+    if (!cgroupsV2Enabled())
+        return {};
+
+    if (!cgroupsV2MemoryControllerEnabled())
+        return {};
+
+    fs::path current_cgroup = cgroupV2PathOfProcess();
+    if (current_cgroup.empty())
+        return {};
+
+    /// Return the bottom-most nested current memory file. If there is no such file at the current
+    /// level, try again at the parent level as memory settings are inherited.
+    while (current_cgroup != default_cgroups_mount.parent_path())
+    {
+        const auto path = current_cgroup / file_name;
+        if (fs::exists(path))
+            return {current_cgroup};
+        current_cgroup = current_cgroup.parent_path();
+    }
+    return {};
+}
diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h
index cfb916ff358..9d8e178a866 100644
--- a/base/base/cgroupsv2.h
+++ b/base/base/cgroupsv2.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <filesystem>
+#include <string_view>
 
 #if defined(OS_LINUX)
 /// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers).
@@ -19,3 +20,7 @@ bool cgroupsV2MemoryControllerEnabled();
 /// Returns an empty path the cgroup cannot be determined.
 /// Assumes that cgroupsV2Enabled() is enabled.
 std::filesystem::path cgroupV2PathOfProcess();
+
+/// Returns the most nested cgroup dir containing the specified file.
+/// If cgroups v2 is not enabled - returns an empty optional.
+std::optional<std::string> getCgroupsV2PathContainingFile(std::string_view file_name);
diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index 67f0c8d6481..02c130d3caa 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -1,13 +1,13 @@
-#include <chrono>
+#include <Common/AsynchronousMetrics.h>
+
 #include <IO/MMappedFileCache.h>
 #include <IO/ReadHelpers.h>
 #include <IO/UncompressedCache.h>
+#include <base/cgroupsv2.h>
 #include <base/errnoToString.h>
 #include <base/find_symbols.h>
 #include <base/getPageSize.h>
-#include <boost/locale/date_time_facet.hpp>
 #include <sys/resource.h>
-#include <Common/AsynchronousMetrics.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Exception.h>
 #include <Common/filesystemHelpers.h>
@@ -15,6 +15,11 @@
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>
 
+#include <boost/locale/date_time_facet.hpp>
+
+#include <chrono>
+#include <string_view>
+
 #include "config.h"
 
 #if USE_JEMALLOC
@@ -53,6 +58,12 @@ static std::unique_ptr<ReadBufferFromFilePRead> openFileIfExists(const std::stri
     return {};
 }
 
+static void openCgroupv2MetricFile(const std::string & filename, std::optional<ReadBufferFromFilePRead> & out)
+{
+    if (auto path = getCgroupsV2PathContainingFile(filename))
+        openFileIfExists((path.value() + filename).c_str(), out);
+};
+
 #endif
 
 
@@ -73,13 +84,10 @@ AsynchronousMetrics::AsynchronousMetrics(
     openFileIfExists("/proc/net/dev", net_dev);
 
     /// CGroups v2
-    openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes);
-    if (cgroupmem_limit_in_bytes)
-    {
-        openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes);
-    }
-    openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max);
-    openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat);
+    openCgroupv2MetricFile("memory.max", cgroupmem_limit_in_bytes);
+    openCgroupv2MetricFile("memory.current", cgroupmem_usage_in_bytes);
+    openCgroupv2MetricFile("cpu.max", cgroupcpu_max);
+    openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat);
 
     /// CGroups v1
     if (!cgroupmem_limit_in_bytes)
@@ -1014,10 +1022,14 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 
             if (!first_run)
             {
-                int64_t hz = sysconf(_SC_CLK_TCK);
-                if (-1 == hz)
-                    throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
-                const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : hz;
+                auto get_clock_ticks = [&]()
+                {
+                    if (auto hz = sysconf(_SC_CLK_TCK); hz != -1)
+                        return hz;
+                    else
+                        throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
+                };
+                const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : get_clock_ticks();
                 const double multiplier = 1.0 / cgroup_version_specific_divisor
                     / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
 
@@ -1032,7 +1044,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
         catch (...)
         {
             tryLogCurrentException(__PRETTY_FUNCTION__);
-            openFileIfExists("/sys/fs/cgroup/cpu.stat", cgroupcpu_stat);
+            openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat);
             if (!cgroupcpu_stat)
                 openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat);
         }
diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp
index ef8bdfc1823..83b04360164 100644
--- a/src/Common/CgroupsMemoryUsageObserver.cpp
+++ b/src/Common/CgroupsMemoryUsageObserver.cpp
@@ -144,31 +144,6 @@ private:
 /// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such
 ///   systems existed only for a short transition period.
 
-std::optional<std::string> getCgroupsV2Path()
-{
-    if (!cgroupsV2Enabled())
-        return {};
-
-    if (!cgroupsV2MemoryControllerEnabled())
-        return {};
-
-    fs::path current_cgroup = cgroupV2PathOfProcess();
-    if (current_cgroup.empty())
-        return {};
-
-    /// Return the bottom-most nested current memory file. If there is no such file at the current
-    /// level, try again at the parent level as memory settings are inherited.
-    while (current_cgroup != default_cgroups_mount.parent_path())
-    {
-        const auto current_path = current_cgroup / "memory.current";
-        const auto stat_path = current_cgroup / "memory.stat";
-        if (fs::exists(current_path) && fs::exists(stat_path))
-            return {current_cgroup};
-        current_cgroup = current_cgroup.parent_path();
-    }
-    return {};
-}
-
 std::optional<std::string> getCgroupsV1Path()
 {
     auto path = default_cgroups_mount / "memory/memory.stat";
@@ -179,7 +154,7 @@ std::optional<std::string> getCgroupsV1Path()
 
 std::pair<std::string, CgroupsMemoryUsageObserver::CgroupsVersion> getCgroupsPath()
 {
-    auto v2_path = getCgroupsV2Path();
+    auto v2_path = getCgroupsV2PathContainingFile("memory.current");
     if (v2_path.has_value())
         return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2};
 

From 8853b3359b74dfb396889d4a093a59cb208669d9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 11:18:01 +0000
Subject: [PATCH 204/265] Remove useless templatization

Makes the code cleaner, compile faster, and the binary smaller.
---
 .../ApproximateNearestNeighborIndexesCommon.h |   3 -
 .../MergeTree/MergeTreeIndexUSearch.cpp       | 135 ++++++++----------
 .../MergeTree/MergeTreeIndexUSearch.h         |  31 ++--
 3 files changed, 75 insertions(+), 94 deletions(-)

diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index 545a3384d15..e1130f91d32 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -9,9 +9,6 @@
 namespace DB
 {
 
-static constexpr auto DISTANCE_FUNCTION_L2 = "L2Distance";
-static constexpr auto DISTANCE_FUNCTION_COSINE = "cosineDistance";
-
 /// Approximate Nearest Neighbour queries have a similar structure:
 /// - reference vector from which all distances are calculated
 /// - metric name, e.g L2Distance
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 95098de9db6..a45869d0dc0 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -41,22 +41,37 @@ namespace ErrorCodes
 namespace
 {
 
+std::unordered_map<String, unum::usearch::metric_kind_t> nameToMetricKind = {
+    {"L2Distance", unum::usearch::metric_kind_t::l2sq_k},
+    {"cosineDistance", unum::usearch::metric_kind_t::cos_k}};
+
 std::unordered_map<String, unum::usearch::scalar_kind_t> nameToScalarKind = {
     {"f64", unum::usearch::scalar_kind_t::f64_k},
     {"f32", unum::usearch::scalar_kind_t::f32_k},
     {"f16", unum::usearch::scalar_kind_t::f16_k},
     {"i8", unum::usearch::scalar_kind_t::i8_k}};
 
+template <typename T>
+String keysAsString(const T & t)
+{
+    String result;
+    for (const auto & [k, _] : t)
+    {
+        if (!result.empty())
+            result += ", ";
+        result += k;
+    }
+    return result;
 }
 
-template <unum::usearch::metric_kind_t Metric>
-USearchIndexWithSerialization<Metric>::USearchIndexWithSerialization(size_t dimensions, unum::usearch::scalar_kind_t scalar_kind)
-    : Base(Base::make(unum::usearch::metric_punned_t(dimensions, Metric, scalar_kind)))
+}
+
+USearchIndexWithSerialization::USearchIndexWithSerialization(size_t dimensions, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind)
+    : Base(Base::make(unum::usearch::metric_punned_t(dimensions, metric_kind, scalar_kind)))
 {
 }
 
-template <unum::usearch::metric_kind_t Metric>
-void USearchIndexWithSerialization<Metric>::serialize(WriteBuffer & ostr) const
+void USearchIndexWithSerialization::serialize(WriteBuffer & ostr) const
 {
     auto callback = [&ostr](void * from, size_t n)
     {
@@ -69,8 +84,7 @@ void USearchIndexWithSerialization<Metric>::serialize(WriteBuffer & ostr) const
         throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not save USearch index, error: " + String(result.error.release()));
 }
 
-template <unum::usearch::metric_kind_t Metric>
-void USearchIndexWithSerialization<Metric>::deserialize(ReadBuffer & istr)
+void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
 {
     auto callback = [&istr](void * from, size_t n)
     {
@@ -81,33 +95,34 @@ void USearchIndexWithSerialization<Metric>::deserialize(ReadBuffer & istr)
     Base::load_from_stream(callback);
 }
 
-template <unum::usearch::metric_kind_t Metric>
-MergeTreeIndexGranuleUSearch<Metric>::MergeTreeIndexGranuleUSearch(
+MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch(
     const String & index_name_,
     const Block & index_sample_block_,
+    unum::usearch::metric_kind_t metric_kind_,
     unum::usearch::scalar_kind_t scalar_kind_)
     : index_name(index_name_)
     , index_sample_block(index_sample_block_)
+    , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
     , index(nullptr)
 {
 }
 
-template <unum::usearch::metric_kind_t Metric>
-MergeTreeIndexGranuleUSearch<Metric>::MergeTreeIndexGranuleUSearch(
+MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch(
     const String & index_name_,
     const Block & index_sample_block_,
+    unum::usearch::metric_kind_t metric_kind_,
     unum::usearch::scalar_kind_t scalar_kind_,
-    USearchIndexWithSerializationPtr<Metric> index_)
+    USearchIndexWithSerializationPtr index_)
     : index_name(index_name_)
     , index_sample_block(index_sample_block_)
+    , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
     , index(std::move(index_))
 {
 }
 
-template <unum::usearch::metric_kind_t Metric>
-void MergeTreeIndexGranuleUSearch<Metric>::serializeBinary(WriteBuffer & ostr) const
+void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const
 {
     if (empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name));
@@ -118,36 +133,34 @@ void MergeTreeIndexGranuleUSearch<Metric>::serializeBinary(WriteBuffer & ostr) c
     index->serialize(ostr);
 }
 
-template <unum::usearch::metric_kind_t Metric>
-void MergeTreeIndexGranuleUSearch<Metric>::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
+void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
 {
     UInt64 dimension;
     readIntBinary(dimension, istr);
-    index = std::make_shared<USearchIndexWithSerialization<Metric>>(dimension, scalar_kind);
+    index = std::make_shared<USearchIndexWithSerialization>(dimension, metric_kind, scalar_kind);
     index->deserialize(istr);
 }
 
-template <unum::usearch::metric_kind_t Metric>
-MergeTreeIndexAggregatorUSearch<Metric>::MergeTreeIndexAggregatorUSearch(
+MergeTreeIndexAggregatorUSearch::MergeTreeIndexAggregatorUSearch(
     const String & index_name_,
     const Block & index_sample_block_,
+    unum::usearch::metric_kind_t metric_kind_,
     unum::usearch::scalar_kind_t scalar_kind_)
     : index_name(index_name_)
     , index_sample_block(index_sample_block_)
+    , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
 {
 }
 
-template <unum::usearch::metric_kind_t Metric>
-MergeTreeIndexGranulePtr MergeTreeIndexAggregatorUSearch<Metric>::getGranuleAndReset()
+MergeTreeIndexGranulePtr MergeTreeIndexAggregatorUSearch::getGranuleAndReset()
 {
-    auto granule = std::make_shared<MergeTreeIndexGranuleUSearch<Metric>>(index_name, index_sample_block, scalar_kind, index);
+    auto granule = std::make_shared<MergeTreeIndexGranuleUSearch>(index_name, index_sample_block, metric_kind, scalar_kind, index);
     index = nullptr;
     return granule;
 }
 
-template <unum::usearch::metric_kind_t Metric>
-void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t * pos, size_t limit)
+void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos, size_t limit)
 {
     if (*pos >= block.rows())
         throw Exception(
@@ -201,7 +214,7 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
             throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
         if (!index)
-            index = std::make_shared<USearchIndexWithSerialization<Metric>>(dimensions, scalar_kind);
+            index = std::make_shared<USearchIndexWithSerialization>(dimensions, metric_kind, scalar_kind);
 
         /// Add all rows of block
         if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows)))
@@ -227,10 +240,10 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
 MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch(
     const IndexDescription & /*index_description*/,
     const SelectQueryInfo & query,
-    const String & distance_function_,
+    unum::usearch::metric_kind_t metric_kind_,
     ContextPtr context)
     : ann_condition(query, context)
-    , distance_function(distance_function_)
+    , metric_kind(metric_kind_)
 {
 }
 
@@ -241,31 +254,28 @@ bool MergeTreeIndexConditionUSearch::mayBeTrueOnGranule(MergeTreeIndexGranulePtr
 
 bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const
 {
-    return ann_condition.alwaysUnknownOrTrue(distance_function);
+    String index_distance_function;
+    switch (metric_kind)
+    {
+        case unum::usearch::metric_kind_t::l2sq_k: index_distance_function = "L2Distance"; break;
+        case unum::usearch::metric_kind_t::cos_k:  index_distance_function = "cosineDistance"; break;
+        default: std::unreachable();
+    }
+    return ann_condition.alwaysUnknownOrTrue(index_distance_function);
 }
 
 std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const
-{
-    if (distance_function == DISTANCE_FUNCTION_L2)
-        return getUsefulRangesImpl<unum::usearch::metric_kind_t::l2sq_k>(idx_granule);
-    else if (distance_function == DISTANCE_FUNCTION_COSINE)
-        return getUsefulRangesImpl<unum::usearch::metric_kind_t::cos_k>(idx_granule);
-    std::unreachable();
-}
-
-template <unum::usearch::metric_kind_t Metric>
-std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const
 {
     const UInt64 limit = ann_condition.getLimit();
     const UInt64 index_granularity = ann_condition.getIndexGranularity();
 
     const std::vector<float> reference_vector = ann_condition.getReferenceVector();
 
-    const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleUSearch<Metric>>(idx_granule);
+    const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleUSearch>(idx_granule);
     if (granule == nullptr)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type");
 
-    const USearchIndexWithSerializationPtr<Metric> index = granule->index;
+    const USearchIndexWithSerializationPtr index = granule->index;
 
     if (ann_condition.getDimensions() != index->dimensions())
         throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) "
@@ -296,34 +306,26 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRangesImpl(MergeTre
     return granules;
 }
 
-MergeTreeIndexUSearch::MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_, unum::usearch::scalar_kind_t scalar_kind_)
+MergeTreeIndexUSearch::MergeTreeIndexUSearch(const IndexDescription & index_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_)
     : IMergeTreeIndex(index_)
-    , distance_function(distance_function_)
+    , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
 {
 }
 
 MergeTreeIndexGranulePtr MergeTreeIndexUSearch::createIndexGranule() const
 {
-    if (distance_function == DISTANCE_FUNCTION_L2)
-        return std::make_shared<MergeTreeIndexGranuleUSearch<unum::usearch::metric_kind_t::l2sq_k>>(index.name, index.sample_block, scalar_kind);
-    else if (distance_function == DISTANCE_FUNCTION_COSINE)
-        return std::make_shared<MergeTreeIndexGranuleUSearch<unum::usearch::metric_kind_t::cos_k>>(index.name, index.sample_block, scalar_kind);
-    std::unreachable();
+    return std::make_shared<MergeTreeIndexGranuleUSearch>(index.name, index.sample_block, metric_kind, scalar_kind);
 }
 
 MergeTreeIndexAggregatorPtr MergeTreeIndexUSearch::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const
 {
-    if (distance_function == DISTANCE_FUNCTION_L2)
-        return std::make_shared<MergeTreeIndexAggregatorUSearch<unum::usearch::metric_kind_t::l2sq_k>>(index.name, index.sample_block, scalar_kind);
-    else if (distance_function == DISTANCE_FUNCTION_COSINE)
-        return std::make_shared<MergeTreeIndexAggregatorUSearch<unum::usearch::metric_kind_t::cos_k>>(index.name, index.sample_block, scalar_kind);
-    std::unreachable();
+    return std::make_shared<MergeTreeIndexAggregatorUSearch>(index.name, index.sample_block, metric_kind, scalar_kind);
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeIndexConditionUSearch>(index, query, distance_function, context);
+    return std::make_shared<MergeTreeIndexConditionUSearch>(index, query, metric_kind, context);
 };
 
 MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAG *, ContextPtr) const
@@ -333,17 +335,17 @@ MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const Act
 
 MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index)
 {
-    static constexpr auto default_distance_function = DISTANCE_FUNCTION_L2;
-    String distance_function = default_distance_function;
+    static constexpr auto default_metric_kind = unum::usearch::metric_kind_t::l2sq_k;
+    auto metric_kind = default_metric_kind;
     if (!index.arguments.empty())
-        distance_function = index.arguments[0].safeGet<String>();
+        metric_kind = nameToMetricKind.at(index.arguments[0].safeGet<String>());
 
     static constexpr auto default_scalar_kind = unum::usearch::scalar_kind_t::f16_k;
     auto scalar_kind = default_scalar_kind;
     if (index.arguments.size() > 1)
         scalar_kind = nameToScalarKind.at(index.arguments[1].safeGet<String>());
 
-    return std::make_shared<MergeTreeIndexUSearch>(index, distance_function, scalar_kind);
+    return std::make_shared<MergeTreeIndexUSearch>(index, metric_kind, scalar_kind);
 }
 
 void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
@@ -365,26 +367,13 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
 
     /// Check that a supported metric was passed as first argument
 
-    if (!index.arguments.empty())
-    {
-        String distance_name = index.arguments[0].safeGet<String>();
-        if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "USearch index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE);
-    }
+    if (!index.arguments.empty() && !nameToMetricKind.contains(index.arguments[0].safeGet<String>()))
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized metric kind (first argument) for vector index. Supported kinds are: {}", keysAsString(nameToMetricKind));
 
     /// Check that a supported kind was passed as a second argument
 
     if (index.arguments.size() > 1 && !nameToScalarKind.contains(index.arguments[1].safeGet<String>()))
-    {
-        String supported_kinds;
-        for (const auto & [name, kind] : nameToScalarKind)
-        {
-            if (!supported_kinds.empty())
-                supported_kinds += ", ";
-            supported_kinds += name;
-        }
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized scalar kind (second argument) for USearch index. Supported kinds are: {}", supported_kinds);
-    }
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized scalar kind (second argument) for vector index. Supported kinds are: {}", keysAsString(nameToScalarKind));
 
     /// Check data type of indexed column:
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index 8d1da19637d..14cc1d9bbf7 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -15,26 +15,23 @@ namespace DB
 
 using USearchIndex = unum::usearch::index_dense_gt</*key_at*/ uint32_t, /*compressed_slot_at*/ uint32_t>;
 
-template <unum::usearch::metric_kind_t Metric>
 class USearchIndexWithSerialization : public USearchIndex
 {
     using Base = USearchIndex;
 
 public:
-    USearchIndexWithSerialization(size_t dimensions, unum::usearch::scalar_kind_t scalar_kind);
+    USearchIndexWithSerialization(size_t dimensions, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind);
     void serialize(WriteBuffer & ostr) const;
     void deserialize(ReadBuffer & istr);
 };
 
-template <unum::usearch::metric_kind_t Metric>
-using USearchIndexWithSerializationPtr = std::shared_ptr<USearchIndexWithSerialization<Metric>>;
+using USearchIndexWithSerializationPtr = std::shared_ptr<USearchIndexWithSerialization>;
 
 
-template <unum::usearch::metric_kind_t Metric>
 struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
 {
-    MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::scalar_kind_t scalar_kind_);
-    MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::scalar_kind_t scalar_kind_, USearchIndexWithSerializationPtr<Metric> index_);
+    MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind_);
+    MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind_, USearchIndexWithSerializationPtr index_);
 
     ~MergeTreeIndexGranuleUSearch() override = default;
 
@@ -45,15 +42,15 @@ struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
 
     const String index_name;
     const Block index_sample_block;
+    const unum::usearch::metric_kind_t metric_kind;
     const unum::usearch::scalar_kind_t scalar_kind;
-    USearchIndexWithSerializationPtr<Metric> index;
+    USearchIndexWithSerializationPtr index;
 };
 
 
-template <unum::usearch::metric_kind_t Metric>
 struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator
 {
-    MergeTreeIndexAggregatorUSearch(const String & index_name_, const Block & index_sample_block, unum::usearch::scalar_kind_t scalar_kind_);
+    MergeTreeIndexAggregatorUSearch(const String & index_name_, const Block & index_sample_block, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_);
     ~MergeTreeIndexAggregatorUSearch() override = default;
 
     bool empty() const override { return !index || index->size() == 0; }
@@ -62,8 +59,9 @@ struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator
 
     const String index_name;
     const Block index_sample_block;
+    const unum::usearch::metric_kind_t metric_kind;
     const unum::usearch::scalar_kind_t scalar_kind;
-    USearchIndexWithSerializationPtr<Metric> index;
+    USearchIndexWithSerializationPtr index;
 };
 
 
@@ -73,7 +71,7 @@ public:
     MergeTreeIndexConditionUSearch(
         const IndexDescription & index_description,
         const SelectQueryInfo & query,
-        const String & distance_function,
+        unum::usearch::metric_kind_t metric_kind_,
         ContextPtr context);
 
     ~MergeTreeIndexConditionUSearch() override = default;
@@ -83,18 +81,15 @@ public:
     std::vector<size_t> getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const override;
 
 private:
-    template <unum::usearch::metric_kind_t Metric>
-    std::vector<size_t> getUsefulRangesImpl(MergeTreeIndexGranulePtr idx_granule) const;
-
     const ApproximateNearestNeighborCondition ann_condition;
-    const String distance_function;
+    const unum::usearch::metric_kind_t metric_kind;
 };
 
 
 class MergeTreeIndexUSearch : public IMergeTreeIndex
 {
 public:
-    MergeTreeIndexUSearch(const IndexDescription & index_, const String & distance_function_, unum::usearch::scalar_kind_t scalar_kind_);
+    MergeTreeIndexUSearch(const IndexDescription & index_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_);
 
     ~MergeTreeIndexUSearch() override = default;
 
@@ -105,7 +100,7 @@ public:
     bool isVectorSearch() const override { return true; }
 
 private:
-    const String distance_function;
+    const unum::usearch::metric_kind_t metric_kind;
     const unum::usearch::scalar_kind_t scalar_kind;
 };
 

From 74de79e52b7479c1ed20cef263b802e120f47df3 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 12:03:56 +0000
Subject: [PATCH 205/265] Addd logging of basic statistics

---
 .../MergeTree/MergeTreeIndexUSearch.cpp       | 24 +++++++++++++++++++
 .../MergeTree/MergeTreeIndexUSearch.h         | 18 ++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index a45869d0dc0..ea73484f05d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -6,6 +6,8 @@
 #pragma clang diagnostic ignored "-Wpass-failed"
 
 #include <Columns/ColumnArray.h>
+#include <Common/formatReadable.h>
+#include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Core/Field.h>
 #include <DataTypes/DataTypeArray.h>
@@ -95,6 +97,20 @@ void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
     Base::load_from_stream(callback);
 }
 
+USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
+{
+    Statistics statistics = {
+        .max_level = max_level(),
+        .connectivity = connectivity(),
+        .size = size(),                         /// number of vectors
+        .capacity = capacity(),                 /// number of vectors reserved
+        .memory_usage = memory_usage(),         /// in bytes, the value is not exact
+        .bytes_per_vector = bytes_per_vector(),
+        .scalar_words = scalar_words(),
+        .statistics = stats()};
+    return statistics;
+}
+
 MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch(
     const String & index_name_,
     const Block & index_sample_block_,
@@ -131,6 +147,10 @@ void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const
     /// so it must be written and read separately from the other part
     writeIntBinary(static_cast<UInt64>(index->dimensions()), ostr); // write dimension
     index->serialize(ostr);
+
+    auto statistics = index->getStatistics();
+    LOG_TRACE(logger, "Wrote USearch index: max_level = {}, connectivity = {}, size = {}, capacity = {}, memory_usage = {}",
+                      statistics.max_level, statistics.connectivity, statistics.size, statistics.capacity, ReadableSize(statistics.memory_usage));
 }
 
 void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
@@ -139,6 +159,10 @@ void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTre
     readIntBinary(dimension, istr);
     index = std::make_shared<USearchIndexWithSerialization>(dimension, metric_kind, scalar_kind);
     index->deserialize(istr);
+
+    auto statistics = index->getStatistics();
+    LOG_TRACE(logger, "Loaded USearch index: max_level = {}, connectivity = {}, size = {}, capacity = {}, memory_usage = {}",
+                      statistics.max_level, statistics.connectivity, statistics.size, statistics.capacity, ReadableSize(statistics.memory_usage));
 }
 
 MergeTreeIndexAggregatorUSearch::MergeTreeIndexAggregatorUSearch(
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index 14cc1d9bbf7..ee9de4acbfc 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -7,6 +7,7 @@
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpass-failed"
 #  include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
+#  include <Common/Logger.h>
 #  include <usearch/index_dense.hpp>
 #pragma clang diagnostic pop
 
@@ -23,6 +24,21 @@ public:
     USearchIndexWithSerialization(size_t dimensions, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind);
     void serialize(WriteBuffer & ostr) const;
     void deserialize(ReadBuffer & istr);
+
+    struct Statistics
+    {
+        size_t max_level;
+        size_t connectivity;
+        size_t size;
+        size_t capacity;
+        size_t memory_usage;
+        /// advanced stats:
+        size_t bytes_per_vector;
+        size_t scalar_words;
+        Base::stats_t statistics;
+    };
+
+    Statistics getStatistics() const;
 };
 
 using USearchIndexWithSerializationPtr = std::shared_ptr<USearchIndexWithSerialization>;
@@ -45,6 +61,8 @@ struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
     const unum::usearch::metric_kind_t metric_kind;
     const unum::usearch::scalar_kind_t scalar_kind;
     USearchIndexWithSerializationPtr index;
+
+    LoggerPtr logger = getLogger("USearchIndex");
 };
 
 
From 4ad624cb7e9ed9be0449c278da314270619ae436 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 12:28:03 +0000
Subject: [PATCH 206/265] Cosmetics

---
 .../0_stateless/02354_vector_search_bugs.sql  |  9 ++--
 ...ector_search_default_granularity.reference |  1 -
 ...2354_vector_search_default_granularity.sql | 12 +++---
 ...r_search_index_creation_negative.reference |  4 +-
 ..._vector_search_index_creation_negative.sql | 14 ++-----
 .../02354_vector_search_queries.reference     |  6 +--
 .../02354_vector_search_queries.sql           | 42 +++++++++----------
 7 files changed, 36 insertions(+), 52 deletions(-)

diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql
index 0c1eb5ddaec..de36683ede1 100644
--- a/tests/queries/0_stateless/02354_vector_search_bugs.sql
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql
@@ -1,18 +1,15 @@
 -- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Usearch indexes.
-
--- This file contains tests for various bugs and special cases
+-- Tests various bugs and special cases for vector indexes.
 
 SET allow_experimental_usearch_index = 1;
-
 SET enable_analyzer = 1; -- 0 vs. 1 produce slightly different error codes, make it future-proof
 
 DROP TABLE IF EXISTS tab;
 
 SELECT 'Issue #52258: Empty Arrays or Arrays with default values are rejected';
 
-CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY (id);
+CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY id;
 INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
 INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
@@ -34,7 +31,7 @@ DROP TABLE tab;
 
 SELECT 'Correctness of index with > 1 mark';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192; -- disable adaptive granularity due to bug
 INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000);
 
 WITH [1.0, 0.0] AS reference_vec
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.reference b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
index f33a840e97e..ab3cc71ff6b 100644
--- a/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
@@ -1,3 +1,2 @@
-Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for USearch
 100000000
 100000000
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
index d3b6b275b62..ff659b56033 100644
--- a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
@@ -1,19 +1,17 @@
 -- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Usearch indexes.
-
--- This file contains tests for the non-standard default granularity of vector search indexes.
+-- Tests that vector search indexes use a (non-standard) index granularity of 100 mio by default.
 
 SET allow_experimental_usearch_index = 1;
 
-SELECT 'Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for USearch';
-
+-- After CREATE TABLE
 DROP TABLE IF EXISTS tab;
-CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE usearch) ENGINE=MergeTree ORDER BY id;
+CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE usearch) ENGINE = MergeTree ORDER BY id;
 SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
 
+-- After ALTER TABLE
 DROP TABLE tab;
-CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE=MergeTree ORDER BY id;
+CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE = MergeTree ORDER BY id;
 ALTER TABLE tab ADD INDEX idx(vec) TYPE usearch;
 SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
 
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
index 83dd4cf26e3..bee3236f436 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
@@ -1,8 +1,8 @@
 At most two index arguments
 1st argument (distance function) must be String
-Rejects unsupported distance functions
+Unsupported distance functions are rejected
 2nd argument (scalar kind) must be String
-Rejects unsupported scalar kinds (only Usearch)
+Unsupported scalar kinds are rejected
 Must be created on single column
 Must be created on Array(Float32) columns
 Rejects INSERTs of Arrays with different sizes
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
index 11dedfe5090..60bd54d1dbe 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
@@ -1,8 +1,6 @@
 -- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Usearch indexes.
-
--- This file tests that various conditions are checked during creation of vector search indexes.
+-- Tests that various conditions are checked during creation of vector search indexes.
 
 SET allow_experimental_usearch_index = 1;
 
@@ -14,32 +12,26 @@ CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('too',
 SELECT '1st argument (distance function) must be String';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
-SELECT 'Rejects unsupported distance functions';
+SELECT 'Unsupported distance functions are rejected';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
 SELECT '2nd argument (scalar kind) must be String';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
-SELECT 'Rejects unsupported scalar kinds (only Usearch)';
+SELECT 'Unsupported scalar kinds are rejected';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('L2Distance', 'invalidKind')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
 SELECT 'Must be created on single column';
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
 
 SELECT 'Must be created on Array(Float32) columns';
-
 SET allow_suspicious_low_cardinality_types = 1;
-
 CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-
 CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-
 CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-
 CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
 SELECT 'Rejects INSERTs of Arrays with different sizes';
-
 CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
 INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference
index 45cdd19bab4..22ad46f802c 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.reference
+++ b/tests/queries/0_stateless/02354_vector_search_queries.reference
@@ -8,7 +8,7 @@ Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
     Sorting (Sorting for ORDER BY)
       Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab_usearch)
+        ReadFromMergeTree (default.tab)
         Indexes:
           PrimaryKey
             Condition: true
@@ -29,7 +29,7 @@ Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
     Sorting (Sorting for ORDER BY)
       Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab_usearch)
+        ReadFromMergeTree (default.tab)
         Indexes:
           PrimaryKey
             Condition: true
@@ -50,7 +50,7 @@ Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
     Sorting (Sorting for ORDER BY)
       Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab_usearch)
+        ReadFromMergeTree (default.tab)
         Indexes:
           PrimaryKey
             Condition: true
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
index f78c4962f43..555f47b364f 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -1,8 +1,6 @@
 -- Tags: no-fasttest, no-ordinary-database
 
--- Tests vector search in ClickHouse, i.e. Usearch indexes.
-
--- This file tests various simple approximate nearest neighborhood (ANN) queries that utilize vector search indexes.
+-- Tests various simple approximate nearest neighborhood (ANN) queries that utilize vector search indexes.
 
 SET allow_experimental_usearch_index = 1;
 
@@ -10,63 +8,63 @@ SET enable_analyzer = 0;
 
 SELECT '10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block';
 
-DROP TABLE IF EXISTS tab_usearch;
+DROP TABLE IF EXISTS tab;
 
-CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
-INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
 
 
 SELECT '- Usearch: ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
+FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
 SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
-EXPLAIN indexes=1
+EXPLAIN indexes = 1
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
+FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-DROP TABLE tab_usearch;
+DROP TABLE tab;
 
 
 SELECT '12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block';
 
-CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
-INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
 
 SELECT '- Usearch: ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
+FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
 SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
-EXPLAIN indexes=1
+EXPLAIN indexes = 1
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
-FROM tab_usearch
+FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-DROP TABLE tab_usearch;
+DROP TABLE tab;
 
 
 SELECT 'Special cases'; -- Not a systematic test, just to check that no bad things happen.
 -- Just for jun, use metric = 'cosineDistance', scalarKind = 'f64'
 
-CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
-INSERT INTO tab_usearch VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
 
 SELECT '- Usearch: ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_usearch
+FROM tab
 ORDER BY cosineDistance(vec, reference_vec)
 LIMIT 3;
 
@@ -74,9 +72,9 @@ SELECT '- Special case: setting "max_limit_for_ann_queries"';
 EXPLAIN indexes=1
 WITH [0.0, 2.0] as reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
-FROM tab_usearch
+FROM tab
 ORDER BY cosineDistance(vec, reference_vec)
 LIMIT 3
-SETTINGS max_limit_for_ann_queries=2; -- LIMIT 3 > 2 --> don't use the ann index
+SETTINGS max_limit_for_ann_queries = 2; -- LIMIT 3 > 2 --> don't use the ann index
 
-DROP TABLE tab_usearch;
+DROP TABLE tab;

From 289c27c8041ceb893819f3195f7060a95c15c0c1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 12:47:34 +0000
Subject: [PATCH 207/265] Introduce version for for index files in persistence

---
 .../MergeTree/MergeTreeIndexUSearch.cpp       | 20 ++++++++++++++++++-
 .../MergeTree/MergeTreeIndexUSearch.h         |  7 +++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index ea73484f05d..65a7603c006 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -32,6 +32,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int FORMAT_VERSION_TOO_OLD;
     extern const int ILLEGAL_COLUMN;
     extern const int INCORRECT_DATA;
     extern const int INCORRECT_NUMBER_OF_COLUMNS;
@@ -94,7 +95,10 @@ void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
         return true;
     };
 
-    Base::load_from_stream(callback);
+    auto result = Base::load_from_stream(callback);
+    if (result.error)
+        /// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here
+        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not load USearch index, error: " + String(result.error.release()) + " Please drop the index and create it again.");
 }
 
 USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
@@ -143,9 +147,12 @@ void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const
     if (empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name));
 
+    writeIntBinary(FILE_FORMAT_VERSION, ostr);
+
     /// Number of dimensions is required in the index constructor,
     /// so it must be written and read separately from the other part
     writeIntBinary(static_cast<UInt64>(index->dimensions()), ostr); // write dimension
+                                                                    //
     index->serialize(ostr);
 
     auto statistics = index->getStatistics();
@@ -155,9 +162,20 @@ void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const
 
 void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
 {
+    UInt64 file_version;
+    readIntBinary(file_version, istr);
+    if (file_version != FILE_FORMAT_VERSION)
+        throw Exception(
+            ErrorCodes::FORMAT_VERSION_TOO_OLD,
+            "USearch index could not be loaded because its version is too old (current version: {}, persisted version: {}). Please drop the index and create it again.",
+            FILE_FORMAT_VERSION, file_version);
+        /// More fancy error handling would be: Set a flag on the index that it failed to load. During usage return all granules, i.e.
+        /// behave as if the index does not exist. Since format changes are expected to happen only rarely and it is "only" an index, keep it simple for now.
+
     UInt64 dimension;
     readIntBinary(dimension, istr);
     index = std::make_shared<USearchIndexWithSerialization>(dimension, metric_kind, scalar_kind);
+
     index->deserialize(istr);
 
     auto statistics = index->getStatistics();
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index ee9de4acbfc..d9d9def416d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -63,6 +63,13 @@ struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
     USearchIndexWithSerializationPtr index;
 
     LoggerPtr logger = getLogger("USearchIndex");
+
+private:
+    /// The version of the persistence format of USearch index. Increment whenever you change the format.
+    /// Note: USearch prefixes the serialized data with its own version header. We can't rely on that because 1. the index in ClickHouse
+    /// is (at least in theory) agnostic of specific vector search libraries, and 2. additional data (e.g. the number of dimensions)
+    /// outside USearch exists which we should version separately.
+    static constexpr UInt64 FILE_FORMAT_VERSION = 1;
 };
 
 
From 27a6931a3506954275ccc8d1bd39ed58ee5f23e0 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 12:52:07 +0000
Subject: [PATCH 208/265] Cosmetics: variable naming

---
 .../MergeTree/MergeTreeIndexUSearch.cpp        | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 65a7603c006..812c19d37ba 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -44,11 +44,13 @@ namespace ErrorCodes
 namespace
 {
 
-std::unordered_map<String, unum::usearch::metric_kind_t> nameToMetricKind = {
+/// Maps from user-facing name to internal name
+std::unordered_map<String, unum::usearch::metric_kind_t> distanceFunctionToMetricKind = {
     {"L2Distance", unum::usearch::metric_kind_t::l2sq_k},
     {"cosineDistance", unum::usearch::metric_kind_t::cos_k}};
 
-std::unordered_map<String, unum::usearch::scalar_kind_t> nameToScalarKind = {
+/// Maps from user-facing name to internal name
+std::unordered_map<String, unum::usearch::scalar_kind_t> quantizationToScalarKind = {
     {"f64", unum::usearch::scalar_kind_t::f64_k},
     {"f32", unum::usearch::scalar_kind_t::f32_k},
     {"f16", unum::usearch::scalar_kind_t::f16_k},
@@ -380,12 +382,12 @@ MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index)
     static constexpr auto default_metric_kind = unum::usearch::metric_kind_t::l2sq_k;
     auto metric_kind = default_metric_kind;
     if (!index.arguments.empty())
-        metric_kind = nameToMetricKind.at(index.arguments[0].safeGet<String>());
+        metric_kind = distanceFunctionToMetricKind.at(index.arguments[0].safeGet<String>());
 
     static constexpr auto default_scalar_kind = unum::usearch::scalar_kind_t::f16_k;
     auto scalar_kind = default_scalar_kind;
     if (index.arguments.size() > 1)
-        scalar_kind = nameToScalarKind.at(index.arguments[1].safeGet<String>());
+        scalar_kind = quantizationToScalarKind.at(index.arguments[1].safeGet<String>());
 
     return std::make_shared<MergeTreeIndexUSearch>(index, metric_kind, scalar_kind);
 }
@@ -409,13 +411,13 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
 
     /// Check that a supported metric was passed as first argument
 
-    if (!index.arguments.empty() && !nameToMetricKind.contains(index.arguments[0].safeGet<String>()))
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized metric kind (first argument) for vector index. Supported kinds are: {}", keysAsString(nameToMetricKind));
+    if (!index.arguments.empty() && !distanceFunctionToMetricKind.contains(index.arguments[0].safeGet<String>()))
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized metric kind (first argument) for vector index. Supported kinds are: {}", keysAsString(distanceFunctionToMetricKind));
 
     /// Check that a supported kind was passed as a second argument
 
-    if (index.arguments.size() > 1 && !nameToScalarKind.contains(index.arguments[1].safeGet<String>()))
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized scalar kind (second argument) for vector index. Supported kinds are: {}", keysAsString(nameToScalarKind));
+    if (index.arguments.size() > 1 && !quantizationToScalarKind.contains(index.arguments[1].safeGet<String>()))
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized scalar kind (second argument) for vector index. Supported kinds are: {}", keysAsString(quantizationToScalarKind));
 
     /// Check data type of indexed column:
 

From 9ad890e399e412512252a8916f4aaad4d7a1be12 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 12:59:54 +0000
Subject: [PATCH 209/265] Cosmetics: whitespaces

---
 .../MergeTree/MergeTreeIndexUSearch.cpp       | 17 +++++----
 .../MergeTree/MergeTreeIndexUSearch.h         | 36 +++++++++++++++----
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 812c19d37ba..795461d181e 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -71,7 +71,10 @@ String keysAsString(const T & t)
 
 }
 
-USearchIndexWithSerialization::USearchIndexWithSerialization(size_t dimensions, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind)
+USearchIndexWithSerialization::USearchIndexWithSerialization(
+    size_t dimensions,
+    unum::usearch::metric_kind_t metric_kind,
+    unum::usearch::scalar_kind_t scalar_kind)
     : Base(Base::make(unum::usearch::metric_punned_t(dimensions, metric_kind, scalar_kind)))
 {
 }
@@ -122,11 +125,7 @@ MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch(
     const Block & index_sample_block_,
     unum::usearch::metric_kind_t metric_kind_,
     unum::usearch::scalar_kind_t scalar_kind_)
-    : index_name(index_name_)
-    , index_sample_block(index_sample_block_)
-    , metric_kind(metric_kind_)
-    , scalar_kind(scalar_kind_)
-    , index(nullptr)
+    : MergeTreeIndexGranuleUSearch(index_name_, index_sample_block_, metric_kind_, scalar_kind_, nullptr)
 {
 }
 
@@ -291,7 +290,7 @@ MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch(
 {
 }
 
-bool MergeTreeIndexConditionUSearch::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /*idx_granule*/) const
+bool MergeTreeIndexConditionUSearch::mayBeTrueOnGranule(MergeTreeIndexGranulePtr) const
 {
     throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes");
 }
@@ -308,14 +307,14 @@ bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const
     return ann_condition.alwaysUnknownOrTrue(index_distance_function);
 }
 
-std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const
+std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr granule_) const
 {
     const UInt64 limit = ann_condition.getLimit();
     const UInt64 index_granularity = ann_condition.getIndexGranularity();
 
     const std::vector<float> reference_vector = ann_condition.getReferenceVector();
 
-    const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleUSearch>(idx_granule);
+    const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleUSearch>(granule_);
     if (granule == nullptr)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type");
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index d9d9def416d..a5989c44127 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -21,7 +21,11 @@ class USearchIndexWithSerialization : public USearchIndex
     using Base = USearchIndex;
 
 public:
-    USearchIndexWithSerialization(size_t dimensions, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind);
+    USearchIndexWithSerialization(
+        size_t dimensions,
+        unum::usearch::metric_kind_t metric_kind,
+        unum::usearch::scalar_kind_t scalar_kind);
+
     void serialize(WriteBuffer & ostr) const;
     void deserialize(ReadBuffer & istr);
 
@@ -46,8 +50,18 @@ using USearchIndexWithSerializationPtr = std::shared_ptr<USearchIndexWithSeriali
 
 struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
 {
-    MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind_);
-    MergeTreeIndexGranuleUSearch(const String & index_name_, const Block & index_sample_block_, unum::usearch::metric_kind_t metric_kind, unum::usearch::scalar_kind_t scalar_kind_, USearchIndexWithSerializationPtr index_);
+    MergeTreeIndexGranuleUSearch(
+        const String & index_name_,
+        const Block & index_sample_block_,
+        unum::usearch::metric_kind_t metric_kind_,
+        unum::usearch::scalar_kind_t scalar_kind_);
+
+    MergeTreeIndexGranuleUSearch(
+        const String & index_name_,
+        const Block & index_sample_block_,
+        unum::usearch::metric_kind_t metric_kind_,
+        unum::usearch::scalar_kind_t scalar_kind_,
+        USearchIndexWithSerializationPtr index_);
 
     ~MergeTreeIndexGranuleUSearch() override = default;
 
@@ -75,7 +89,12 @@ private:
 
 struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator
 {
-    MergeTreeIndexAggregatorUSearch(const String & index_name_, const Block & index_sample_block, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_);
+    MergeTreeIndexAggregatorUSearch(
+        const String & index_name_,
+        const Block & index_sample_block,
+        unum::usearch::metric_kind_t metric_kind_,
+        unum::usearch::scalar_kind_t scalar_kind_);
+
     ~MergeTreeIndexAggregatorUSearch() override = default;
 
     bool empty() const override { return !index || index->size() == 0; }
@@ -102,8 +121,8 @@ public:
     ~MergeTreeIndexConditionUSearch() override = default;
 
     bool alwaysUnknownOrTrue() const override;
-    bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
-    std::vector<size_t> getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const override;
+    bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override;
+    std::vector<size_t> getUsefulRanges(MergeTreeIndexGranulePtr granule) const override;
 
 private:
     const ApproximateNearestNeighborCondition ann_condition;
@@ -114,7 +133,10 @@ private:
 class MergeTreeIndexUSearch : public IMergeTreeIndex
 {
 public:
-    MergeTreeIndexUSearch(const IndexDescription & index_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_);
+    MergeTreeIndexUSearch(
+        const IndexDescription & index_,
+        unum::usearch::metric_kind_t metric_kind_,
+        unum::usearch::scalar_kind_t scalar_kind_);
 
     ~MergeTreeIndexUSearch() override = default;
 

From a8167abca23755021eafac3dccf44f1774244fcf Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:02:10 +0000
Subject: [PATCH 210/265] Cosmetics: use native types/functions

---
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 795461d181e..a5245d3e627 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -6,6 +6,7 @@
 #pragma clang diagnostic ignored "-Wpass-failed"
 
 #include <Columns/ColumnArray.h>
+#include <Common/BitHelpers.h>
 #include <Common/formatReadable.h>
 #include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
@@ -217,8 +218,8 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos,
     if (rows_read == 0)
         return;
 
-    if (rows_read > std::numeric_limits<uint32_t>::max())
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Index granularity is too big: more than 4B rows per index granule.");
+    if (rows_read > std::numeric_limits<UInt32>::max())
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Index granularity is too big: more than {} rows per index granule.", std::numeric_limits<UInt32>::max());
 
     if (index_sample_block.columns() > 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected block with single column");
@@ -259,13 +260,13 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos,
         if (!index)
             index = std::make_shared<USearchIndexWithSerialization>(dimensions, metric_kind, scalar_kind);
 
-        /// Add all rows of block
-        if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows)))
+        /// Reserving space is mandatory
+        if (!index->reserve(roundUpToPowerOfTwoOrZero(index->size() + num_rows)))
             throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index");
 
         for (size_t current_row = 0; current_row < num_rows; ++current_row)
         {
-            auto rc = index->add(static_cast<uint32_t>(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
+            auto rc = index->add(static_cast<UInt32>(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
             if (!rc)
                 throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not add data to USearch index, error: " + String(rc.error.release()));
 
@@ -333,8 +334,8 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeInd
     ProfileEvents::increment(ProfileEvents::USearchSearchVisitedMembers, result.visited_members);
     ProfileEvents::increment(ProfileEvents::USearchSearchComputedDistances, result.computed_distances);
 
-    std::vector<UInt32> neighbors(result.size()); /// indexes of dots which were closest to the reference vector
-    std::vector<Float32> distances(result.size());
+    std::vector<USearchIndex::key_t> neighbors(result.size()); /// indexes of dots which were closest to the reference vector
+    std::vector<USearchIndex::distance_t> distances(result.size());
     result.dump_to(neighbors.data(), distances.data());
 
     std::vector<size_t> granules;

From 0f1765a273daea8815edcbd91a35764c8fe29081 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:09:07 +0000
Subject: [PATCH 211/265] Cosmetics: function naming

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp | 6 +++---
 src/Storages/MergeTree/MergeTreeIndexUSearch.h | 2 +-
 src/Storages/MergeTree/MergeTreeIndices.h      | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 5b491de2dfc..3324cc4e42a 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1475,11 +1475,11 @@ static void buildIndexes(
             else
             {
                 MergeTreeIndexConditionPtr condition;
-                if (index_helper->isVectorSearch())
+                if (index_helper->isVectorSimilarityIndex())
                 {
 #if USE_USEARCH
-                    if (const auto * usearch = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
-                        condition = usearch->createIndexCondition(query_info, context);
+                    if (const auto * usearch_index = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
+                        condition = usearch_index->createIndexCondition(query_info, context);
 #endif
                     if (!condition)
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index a5989c44127..e90b3aabe38 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -144,7 +144,7 @@ public:
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
     MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const;
     MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override;
-    bool isVectorSearch() const override { return true; }
+    bool isVectorSimilarityIndex() const override { return true; }
 
 private:
     const unum::usearch::metric_kind_t metric_kind;
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index a67c602e32f..28c0bc79b04 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -170,7 +170,7 @@ struct IMergeTreeIndex
     virtual MergeTreeIndexConditionPtr createIndexCondition(
         const ActionsDAG * filter_actions_dag, ContextPtr context) const = 0;
 
-    virtual bool isVectorSearch() const { return false; }
+    virtual bool isVectorSimilarityIndex() const { return false; }
 
     virtual MergeTreeIndexMergedConditionPtr createIndexMergedCondition(
         const SelectQueryInfo & /*query_info*/, StorageMetadataPtr /*storage_metadata*/) const

From fb26a9e6d415b41f8af49c0cb1b5ef0498ff4757 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:14:24 +0000
Subject: [PATCH 212/265] Cosmetics: whitespaces

---
 ...pproximateNearestNeighborIndexesCommon.cpp | 10 ++++----
 .../ApproximateNearestNeighborIndexesCommon.h | 23 ++++---------------
 src/Storages/MergeTree/MergeTreeIndices.h     |  8 +++----
 3 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
index 3e2a019f9f6..73b246cf9e7 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
@@ -60,8 +60,8 @@ ApproximateNearestNeighborCondition::ApproximateNearestNeighborCondition(const S
 bool ApproximateNearestNeighborCondition::alwaysUnknownOrTrue(String metric) const
 {
     if (!index_is_useful)
-        return true; // Query isn't supported
-    // If query is supported, check metrics for match
+        return true; /// query isn't supported
+    /// If query is supported, check if distance function of index is the same as distance function in query
     return !(stringToMetric(metric) == query_information->metric);
 }
 
@@ -138,11 +138,11 @@ bool ApproximateNearestNeighborCondition::checkQueryStructure(const SelectQueryI
 
 void ApproximateNearestNeighborCondition::traverseAST(const ASTPtr & node, RPN & rpn)
 {
-    // If the node is ASTFunction, it may have children nodes
+    /// If the node is ASTFunction, it may have children nodes
     if (const auto * func = node->as<ASTFunction>())
     {
         const ASTs & children = func->arguments->children;
-        // Traverse children nodes
+        /// Traverse children nodes
         for (const auto& child : children)
             traverseAST(child, rpn);
     }
@@ -253,7 +253,7 @@ void ApproximateNearestNeighborCondition::traverseOrderByAST(const ASTPtr & node
 /// Returns true and stores ANNExpr if the query has valid ORDERBY clause
 bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & ann_info)
 {
-    // ORDER BY clause must have at least 3 expressions
+    /// ORDER BY clause must have at least 3 expressions
     if (rpn.size() < 3)
         return false;
 
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index e1130f91d32..2837469c049 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -23,18 +23,17 @@ namespace DB
 struct ApproximateNearestNeighborInformation
 {
     using Embedding = std::vector<float>;
-    Embedding reference_vector;
 
     enum class Metric : uint8_t
     {
         Unknown,
         L2
     };
-    Metric metric;
 
+    Embedding reference_vector;
+    Metric metric;
     String column_name;
     UInt64 limit;
-
     float distance = -1.0;
 };
 
@@ -51,7 +50,7 @@ struct ApproximateNearestNeighborInformation
 ///
 /// Queries without LIMIT count are not supported
 /// If the query is both of type 1. and 2., than we can't use the index and alwaysUnknownOrTrue returns true.
-/// reference_vector should have float coordinates, e.g. (0.2, 0.1, .., 0.5)
+/// reference_vector should have float coordinates, e.g. [0.2, 0.1, .., 0.5]
 ///
 /// If the query matches one of these two types, then this class extracts the main information needed for ANN indexes from the query.
 ///
@@ -73,19 +72,11 @@ public:
     /// Returns false if query can be speeded up by an ANN index, true otherwise.
     bool alwaysUnknownOrTrue(String metric) const;
 
-    /// Distance should be calculated regarding to referenceVector
     std::vector<float> getReferenceVector() const;
-
-    /// Reference vector's dimension count
     size_t getDimensions() const;
-
     String getColumnName() const;
-
     ApproximateNearestNeighborInformation::Metric getMetricType() const;
-
     UInt64 getIndexGranularity() const { return index_granularity; }
-
-    /// Length's value from LIMIT clause
     UInt64 getLimit() const;
 
 private:
@@ -126,18 +117,14 @@ private:
 
         explicit RPNElement(Function function_ = FUNCTION_UNKNOWN)
             : function(function_)
-            , func_name("Unknown")
-            , float_literal(std::nullopt)
-            , identifier(std::nullopt)
         {}
 
         Function function;
-        String func_name;
+        String func_name = "Unknown";
 
         std::optional<float> float_literal;
         std::optional<String> identifier;
         std::optional<int64_t> int_literal;
-
         std::optional<Array> array_literal;
 
         UInt32 dim = 0;
@@ -165,7 +152,7 @@ private:
     /// Returns true and stores Length if we have valid LIMIT clause in query
     static bool matchRPNLimit(RPNElement & rpn, UInt64 & limit);
 
-    /* Matches dist function, reference vector, column name */
+    /// Matches dist function, reference vector, column name
     static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & ann_info);
 
     /// Gets float or int from AST node
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index 28c0bc79b04..ceaac57373f 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -232,11 +232,11 @@ MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index);
 void hypothesisIndexValidator(const IndexDescription & index, bool attach);
 
 #if USE_USEARCH
-MergeTreeIndexPtr usearchIndexCreator(const IndexDescription& index);
-void usearchIndexValidator(const IndexDescription& index, bool attach);
+MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index);
+void usearchIndexValidator(const IndexDescription & index, bool attach);
 #endif
 
-MergeTreeIndexPtr fullTextIndexCreator(const IndexDescription& index);
-void fullTextIndexValidator(const IndexDescription& index, bool attach);
+MergeTreeIndexPtr fullTextIndexCreator(const IndexDescription & index);
+void fullTextIndexValidator(const IndexDescription & index, bool attach);
 
 }

From 3f47b42d711e9246d46af4416e8eb251e923c2e8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:31:23 +0000
Subject: [PATCH 213/265] Remove funny typedef

---
 .../MergeTree/ApproximateNearestNeighborIndexesCommon.cpp     | 2 +-
 .../MergeTree/ApproximateNearestNeighborIndexesCommon.h       | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
index 73b246cf9e7..bb0038b3501 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
@@ -24,7 +24,7 @@ namespace
 {
 
 template <typename Literal>
-void extractReferenceVectorFromLiteral(ApproximateNearestNeighborInformation::Embedding & reference_vector, Literal literal)
+void extractReferenceVectorFromLiteral(std::vector<Float32> & reference_vector, Literal literal)
 {
     Float64 float_element_of_reference_vector;
     Int64 int_element_of_reference_vector;
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index 2837469c049..474a8056c23 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -22,15 +22,13 @@ namespace DB
 /// This struct holds all these components.
 struct ApproximateNearestNeighborInformation
 {
-    using Embedding = std::vector<float>;
-
     enum class Metric : uint8_t
     {
         Unknown,
         L2
     };
 
-    Embedding reference_vector;
+    std::vector<Float32> reference_vector;
     Metric metric;
     String column_name;
     UInt64 limit;

From 1bf320a1a8aa4e881f23a88877387b7af29dbaec Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:36:55 +0000
Subject: [PATCH 214/265] Cosmetics: metric --> distance_function (for
 consistent terminology)

---
 ...pproximateNearestNeighborIndexesCommon.cpp | 20 +++++++++----------
 .../ApproximateNearestNeighborIndexesCommon.h | 14 ++++++-------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
index bb0038b3501..56a9d44fb4a 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
@@ -40,12 +40,12 @@ void extractReferenceVectorFromLiteral(std::vector<Float32> & reference_vector,
     }
 }
 
-ApproximateNearestNeighborInformation::Metric stringToMetric(std::string_view metric)
+ApproximateNearestNeighborInformation::DistanceFunction stringToDistanceFunction(std::string_view distance_function)
 {
-    if (metric == "L2Distance")
-        return ApproximateNearestNeighborInformation::Metric::L2;
+    if (distance_function == "L2Distance")
+        return ApproximateNearestNeighborInformation::DistanceFunction::L2;
     else
-        return ApproximateNearestNeighborInformation::Metric::Unknown;
+        return ApproximateNearestNeighborInformation::DistanceFunction::Unknown;
 }
 
 }
@@ -57,12 +57,12 @@ ApproximateNearestNeighborCondition::ApproximateNearestNeighborCondition(const S
     , index_is_useful(checkQueryStructure(query_info))
 {}
 
-bool ApproximateNearestNeighborCondition::alwaysUnknownOrTrue(String metric) const
+bool ApproximateNearestNeighborCondition::alwaysUnknownOrTrue(String distance_function) const
 {
     if (!index_is_useful)
         return true; /// query isn't supported
     /// If query is supported, check if distance function of index is the same as distance function in query
-    return !(stringToMetric(metric) == query_information->metric);
+    return !(stringToDistanceFunction(distance_function) == query_information->distance_function);
 }
 
 UInt64 ApproximateNearestNeighborCondition::getLimit() const
@@ -93,11 +93,11 @@ String ApproximateNearestNeighborCondition::getColumnName() const
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name was requested for useless or uninitialized index.");
 }
 
-ApproximateNearestNeighborInformation::Metric ApproximateNearestNeighborCondition::getMetricType() const
+ApproximateNearestNeighborInformation::DistanceFunction ApproximateNearestNeighborCondition::getDistanceFunction() const
 {
     if (index_is_useful && query_information.has_value())
-        return query_information->metric;
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Metric name was requested for useless or uninitialized index.");
+        return query_information->distance_function;
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Distance function was requested for useless or uninitialized index.");
 }
 
 bool ApproximateNearestNeighborCondition::checkQueryStructure(const SelectQueryInfo & query)
@@ -266,7 +266,7 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
     if (iter->function != RPNElement::FUNCTION_DISTANCE)
         return false;
 
-    ann_info.metric = stringToMetric(iter->func_name);
+    ann_info.distance_function = stringToDistanceFunction(iter->func_name);
     ++iter;
 
     if (iter->function == RPNElement::FUNCTION_IDENTIFIER)
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index 474a8056c23..d8c0a272c0b 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -11,7 +11,7 @@ namespace DB
 
 /// Approximate Nearest Neighbour queries have a similar structure:
 /// - reference vector from which all distances are calculated
-/// - metric name, e.g L2Distance
+/// - distance function, e.g L2Distance
 /// - name of column with embeddings
 /// - type of query
 /// - maximum number of returned elements (LIMIT)
@@ -22,14 +22,14 @@ namespace DB
 /// This struct holds all these components.
 struct ApproximateNearestNeighborInformation
 {
-    enum class Metric : uint8_t
+    enum class DistanceFunction : uint8_t
     {
         Unknown,
         L2
     };
 
     std::vector<Float32> reference_vector;
-    Metric metric;
+    DistanceFunction distance_function;
     String column_name;
     UInt64 limit;
     float distance = -1.0;
@@ -38,7 +38,7 @@ struct ApproximateNearestNeighborInformation
 
 // Class ANNCondition, is responsible for recognizing if the query is an ANN queries which can utilize ANN indexes. It parses the SQL query
 /// and checks if it matches ANNIndexes. Method alwaysUnknownOrTrue returns false if we can speed up the query, and true otherwise. It has
-/// only one argument, the name of the metric with which index was built. Two main patterns of queries are supported
+/// only one argument, the name of the distance function with which index was built. Two main patterns of queries are supported
 ///
 /// - 1. WHERE queries:
 ///   SELECT * FROM * WHERE DistanceFunc(column, reference_vector) < floatLiteral LIMIT count
@@ -54,7 +54,7 @@ struct ApproximateNearestNeighborInformation
 ///
 /// From matching query it extracts
 /// - referenceVector
-/// - metricName(DistanceFunction)
+/// - distance function
 /// - distance to compare(ONLY for search types, otherwise you get exception)
 /// - spaceDimension(which is referenceVector's components count)
 /// - column
@@ -68,12 +68,12 @@ public:
     ApproximateNearestNeighborCondition(const SelectQueryInfo & query_info, ContextPtr context);
 
     /// Returns false if query can be speeded up by an ANN index, true otherwise.
-    bool alwaysUnknownOrTrue(String metric) const;
+    bool alwaysUnknownOrTrue(String distance_function) const;
 
     std::vector<float> getReferenceVector() const;
     size_t getDimensions() const;
     String getColumnName() const;
-    ApproximateNearestNeighborInformation::Metric getMetricType() const;
+    ApproximateNearestNeighborInformation::DistanceFunction getDistanceFunction() const;
     UInt64 getIndexGranularity() const { return index_granularity; }
     UInt64 getLimit() const;
 

From e20eff635ea0e5a20b22d9cadd1cfbfd55760f2f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:47:39 +0000
Subject: [PATCH 215/265] Cosmetics: variable naming

---
 .../ApproximateNearestNeighborIndexesCommon.cpp  | 16 ++++++++--------
 .../ApproximateNearestNeighborIndexesCommon.h    |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
index 56a9d44fb4a..8f229906d30 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
@@ -251,7 +251,7 @@ void ApproximateNearestNeighborCondition::traverseOrderByAST(const ASTPtr & node
 }
 
 /// Returns true and stores ANNExpr if the query has valid ORDERBY clause
-bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & ann_info)
+bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & info)
 {
     /// ORDER BY clause must have at least 3 expressions
     if (rpn.size() < 3)
@@ -266,13 +266,13 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
     if (iter->function != RPNElement::FUNCTION_DISTANCE)
         return false;
 
-    ann_info.distance_function = stringToDistanceFunction(iter->func_name);
+    info.distance_function = stringToDistanceFunction(iter->func_name);
     ++iter;
 
     if (iter->function == RPNElement::FUNCTION_IDENTIFIER)
     {
         identifier_found = true;
-        ann_info.column_name = std::move(iter->identifier.value());
+        info.column_name = std::move(iter->identifier.value());
         ++iter;
     }
 
@@ -281,7 +281,7 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
 
     if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY)
     {
-        extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->array_literal);
+        extractReferenceVectorFromLiteral(info.reference_vector, iter->array_literal);
         ++iter;
     }
 
@@ -296,7 +296,7 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
         ++iter;
         if (iter->function == RPNElement::FUNCTION_LITERAL_ARRAY)
         {
-            extractReferenceVectorFromLiteral(ann_info.reference_vector, iter->array_literal);
+            extractReferenceVectorFromLiteral(info.reference_vector, iter->array_literal);
             ++iter;
         }
         else
@@ -307,12 +307,12 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
     {
         if (iter->function == RPNElement::FUNCTION_FLOAT_LITERAL ||
             iter->function == RPNElement::FUNCTION_INT_LITERAL)
-            ann_info.reference_vector.emplace_back(getFloatOrIntLiteralOrPanic(iter));
+            info.reference_vector.emplace_back(getFloatOrIntLiteralOrPanic(iter));
         else if (iter->function == RPNElement::FUNCTION_IDENTIFIER)
         {
             if (identifier_found)
                 return false;
-            ann_info.column_name = std::move(iter->identifier.value());
+            info.column_name = std::move(iter->identifier.value());
             identifier_found = true;
         }
         else
@@ -322,7 +322,7 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
     }
 
     /// Final checks of correctness
-    return identifier_found && !ann_info.reference_vector.empty();
+    return identifier_found && !info.reference_vector.empty();
 }
 
 /// Returns true and stores Length if we have valid LIMIT clause in query
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
index d8c0a272c0b..80203bc10b9 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
@@ -142,16 +142,16 @@ private:
     void traverseOrderByAST(const ASTPtr & node, RPN & rpn);
 
     /// Returns true and stores ANNExpr if the query has valid WHERE section
-    static bool matchRPNWhere(RPN & rpn, ApproximateNearestNeighborInformation & ann_info);
+    static bool matchRPNWhere(RPN & rpn, ApproximateNearestNeighborInformation & info);
 
     /// Returns true and stores ANNExpr if the query has valid ORDERBY section
-    static bool matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & ann_info);
+    static bool matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & info);
 
     /// Returns true and stores Length if we have valid LIMIT clause in query
     static bool matchRPNLimit(RPNElement & rpn, UInt64 & limit);
 
     /// Matches dist function, reference vector, column name
-    static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & ann_info);
+    static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & info);
 
     /// Gets float or int from AST node
     static float getFloatOrIntLiteralOrPanic(const RPN::iterator& iter);

From 6170aad43eeb41ec886b9c2584eb27d2f4835124 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:49:08 +0000
Subject: [PATCH 216/265] Cosmetics: ApproximateNearestNeighborIndexesCommon
 --> VectorSimilarityCondition

---
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  2 +-
 .../MergeTree/MergeTreeIndexUSearch.cpp       | 14 ++++----
 .../MergeTree/MergeTreeIndexUSearch.h         |  4 +--
 ...mmon.cpp => VectorSimilarityCondition.cpp} | 32 +++++++++----------
 ...esCommon.h => VectorSimilarityCondition.h} |  4 +--
 5 files changed, 28 insertions(+), 28 deletions(-)
 rename src/Storages/MergeTree/{ApproximateNearestNeighborIndexesCommon.cpp => VectorSimilarityCondition.cpp} (87%)
 rename src/Storages/MergeTree/{ApproximateNearestNeighborIndexesCommon.h => VectorSimilarityCondition.h} (97%)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index a6ef0063069..44707274146 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -11,6 +11,7 @@
 #include <Storages/MergeTree/MergeTreeDataPartUUID.h>
 #include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
 #include <Storages/MergeTree/MergeTreeIndexFullText.h>
+#include <Storages/MergeTree/VectorSimilarityCondition.h>
 #include <Storages/ReadInOrderOptimizer.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Parsers/ASTIdentifier.h>
@@ -48,7 +49,6 @@
 #include <Functions/IFunction.h>
 
 #include <IO/WriteBufferFromOStream.h>
-#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
 
 namespace CurrentMetrics
 {
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index a5245d3e627..5659b3e5329 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -286,7 +286,7 @@ MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch(
     const SelectQueryInfo & query,
     unum::usearch::metric_kind_t metric_kind_,
     ContextPtr context)
-    : ann_condition(query, context)
+    : vector_similarity_condition(query, context)
     , metric_kind(metric_kind_)
 {
 }
@@ -305,15 +305,15 @@ bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const
         case unum::usearch::metric_kind_t::cos_k:  index_distance_function = "cosineDistance"; break;
         default: std::unreachable();
     }
-    return ann_condition.alwaysUnknownOrTrue(index_distance_function);
+    return vector_similarity_condition.alwaysUnknownOrTrue(index_distance_function);
 }
 
 std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr granule_) const
 {
-    const UInt64 limit = ann_condition.getLimit();
-    const UInt64 index_granularity = ann_condition.getIndexGranularity();
+    const UInt64 limit = vector_similarity_condition.getLimit();
+    const UInt64 index_granularity = vector_similarity_condition.getIndexGranularity();
 
-    const std::vector<float> reference_vector = ann_condition.getReferenceVector();
+    const std::vector<float> reference_vector = vector_similarity_condition.getReferenceVector();
 
     const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleUSearch>(granule_);
     if (granule == nullptr)
@@ -321,10 +321,10 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeInd
 
     const USearchIndexWithSerializationPtr index = granule->index;
 
-    if (ann_condition.getDimensions() != index->dimensions())
+    if (vector_similarity_condition.getDimensions() != index->dimensions())
         throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) "
             "does not match the dimension in the index ({})",
-            ann_condition.getDimensions(), index->dimensions());
+            vector_similarity_condition.getDimensions(), index->dimensions());
 
     auto result = index->search(reference_vector.data(), limit);
     if (result.error)
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index e90b3aabe38..fc5a8ccd964 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -6,7 +6,7 @@
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wpass-failed"
-#  include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
+#  include <Storages/MergeTree/VectorSimilarityCondition.h>
 #  include <Common/Logger.h>
 #  include <usearch/index_dense.hpp>
 #pragma clang diagnostic pop
@@ -125,7 +125,7 @@ public:
     std::vector<size_t> getUsefulRanges(MergeTreeIndexGranulePtr granule) const override;
 
 private:
-    const ApproximateNearestNeighborCondition ann_condition;
+    const VectorSimilarityCondition vector_similarity_condition;
     const unum::usearch::metric_kind_t metric_kind;
 };
 
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp b/src/Storages/MergeTree/VectorSimilarityCondition.cpp
similarity index 87%
rename from src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
rename to src/Storages/MergeTree/VectorSimilarityCondition.cpp
index 8f229906d30..4550eb4ec39 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.cpp
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.cpp
@@ -1,4 +1,4 @@
-#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
+#include <Storages/MergeTree/VectorSimilarityCondition.h>
 
 #include <Core/Settings.h>
 #include <Interpreters/Context.h>
@@ -50,14 +50,14 @@ ApproximateNearestNeighborInformation::DistanceFunction stringToDistanceFunction
 
 }
 
-ApproximateNearestNeighborCondition::ApproximateNearestNeighborCondition(const SelectQueryInfo & query_info, ContextPtr context)
+VectorSimilarityCondition::VectorSimilarityCondition(const SelectQueryInfo & query_info, ContextPtr context)
     : block_with_constants(KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context))
     , index_granularity(context->getMergeTreeSettings().index_granularity)
     , max_limit_for_ann_queries(context->getSettingsRef().max_limit_for_ann_queries)
     , index_is_useful(checkQueryStructure(query_info))
 {}
 
-bool ApproximateNearestNeighborCondition::alwaysUnknownOrTrue(String distance_function) const
+bool VectorSimilarityCondition::alwaysUnknownOrTrue(String distance_function) const
 {
     if (!index_is_useful)
         return true; /// query isn't supported
@@ -65,42 +65,42 @@ bool ApproximateNearestNeighborCondition::alwaysUnknownOrTrue(String distance_fu
     return !(stringToDistanceFunction(distance_function) == query_information->distance_function);
 }
 
-UInt64 ApproximateNearestNeighborCondition::getLimit() const
+UInt64 VectorSimilarityCondition::getLimit() const
 {
     if (index_is_useful && query_information.has_value())
         return query_information->limit;
     throw Exception(ErrorCodes::LOGICAL_ERROR, "No LIMIT section in query, not supported");
 }
 
-std::vector<float> ApproximateNearestNeighborCondition::getReferenceVector() const
+std::vector<float> VectorSimilarityCondition::getReferenceVector() const
 {
     if (index_is_useful && query_information.has_value())
         return query_information->reference_vector;
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Reference vector was requested for useless or uninitialized index.");
 }
 
-size_t ApproximateNearestNeighborCondition::getDimensions() const
+size_t VectorSimilarityCondition::getDimensions() const
 {
     if (index_is_useful && query_information.has_value())
         return query_information->reference_vector.size();
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of dimensions was requested for useless or uninitialized index.");
 }
 
-String ApproximateNearestNeighborCondition::getColumnName() const
+String VectorSimilarityCondition::getColumnName() const
 {
     if (index_is_useful && query_information.has_value())
         return query_information->column_name;
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name was requested for useless or uninitialized index.");
 }
 
-ApproximateNearestNeighborInformation::DistanceFunction ApproximateNearestNeighborCondition::getDistanceFunction() const
+ApproximateNearestNeighborInformation::DistanceFunction VectorSimilarityCondition::getDistanceFunction() const
 {
     if (index_is_useful && query_information.has_value())
         return query_information->distance_function;
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Distance function was requested for useless or uninitialized index.");
 }
 
-bool ApproximateNearestNeighborCondition::checkQueryStructure(const SelectQueryInfo & query)
+bool VectorSimilarityCondition::checkQueryStructure(const SelectQueryInfo & query)
 {
     ApproximateNearestNeighborInformation order_by_info;
 
@@ -136,7 +136,7 @@ bool ApproximateNearestNeighborCondition::checkQueryStructure(const SelectQueryI
     return false;
 }
 
-void ApproximateNearestNeighborCondition::traverseAST(const ASTPtr & node, RPN & rpn)
+void VectorSimilarityCondition::traverseAST(const ASTPtr & node, RPN & rpn)
 {
     /// If the node is ASTFunction, it may have children nodes
     if (const auto * func = node->as<ASTFunction>())
@@ -155,7 +155,7 @@ void ApproximateNearestNeighborCondition::traverseAST(const ASTPtr & node, RPN &
     rpn.emplace_back(std::move(element));
 }
 
-bool ApproximateNearestNeighborCondition::traverseAtomAST(const ASTPtr & node, RPNElement & out)
+bool VectorSimilarityCondition::traverseAtomAST(const ASTPtr & node, RPNElement & out)
 {
     /// Match Functions
     if (const auto * function = node->as<ASTFunction>())
@@ -192,7 +192,7 @@ bool ApproximateNearestNeighborCondition::traverseAtomAST(const ASTPtr & node, R
     return tryCastToConstType(node, out);
 }
 
-bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node, RPNElement & out)
+bool VectorSimilarityCondition::tryCastToConstType(const ASTPtr & node, RPNElement & out)
 {
     Field const_value;
     DataTypePtr const_type;
@@ -243,7 +243,7 @@ bool ApproximateNearestNeighborCondition::tryCastToConstType(const ASTPtr & node
     return false;
 }
 
-void ApproximateNearestNeighborCondition::traverseOrderByAST(const ASTPtr & node, RPN & rpn)
+void VectorSimilarityCondition::traverseOrderByAST(const ASTPtr & node, RPN & rpn)
 {
     if (const auto * expr_list = node->as<ASTExpressionList>())
         if (const auto * order_by_element = expr_list->children.front()->as<ASTOrderByElement>())
@@ -251,7 +251,7 @@ void ApproximateNearestNeighborCondition::traverseOrderByAST(const ASTPtr & node
 }
 
 /// Returns true and stores ANNExpr if the query has valid ORDERBY clause
-bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & info)
+bool VectorSimilarityCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & info)
 {
     /// ORDER BY clause must have at least 3 expressions
     if (rpn.size() < 3)
@@ -326,7 +326,7 @@ bool ApproximateNearestNeighborCondition::matchRPNOrderBy(RPN & rpn, Approximate
 }
 
 /// Returns true and stores Length if we have valid LIMIT clause in query
-bool ApproximateNearestNeighborCondition::matchRPNLimit(RPNElement & rpn, UInt64 & limit)
+bool VectorSimilarityCondition::matchRPNLimit(RPNElement & rpn, UInt64 & limit)
 {
     if (rpn.function == RPNElement::FUNCTION_INT_LITERAL)
     {
@@ -338,7 +338,7 @@ bool ApproximateNearestNeighborCondition::matchRPNLimit(RPNElement & rpn, UInt64
 }
 
 /// Gets float or int from AST node
-float ApproximateNearestNeighborCondition::getFloatOrIntLiteralOrPanic(const RPN::iterator& iter)
+float VectorSimilarityCondition::getFloatOrIntLiteralOrPanic(const RPN::iterator& iter)
 {
     if (iter->float_literal.has_value())
         return iter->float_literal.value();
diff --git a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h b/src/Storages/MergeTree/VectorSimilarityCondition.h
similarity index 97%
rename from src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
rename to src/Storages/MergeTree/VectorSimilarityCondition.h
index 80203bc10b9..a3f5524bee0 100644
--- a/src/Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.h
@@ -62,10 +62,10 @@ struct ApproximateNearestNeighborInformation
 /// - queryHasOrderByClause and queryHasWhereClause return true if query matches the type
 ///
 /// Search query type is also recognized for PREWHERE clause
-class ApproximateNearestNeighborCondition
+class VectorSimilarityCondition
 {
 public:
-    ApproximateNearestNeighborCondition(const SelectQueryInfo & query_info, ContextPtr context);
+    VectorSimilarityCondition(const SelectQueryInfo & query_info, ContextPtr context);
 
     /// Returns false if query can be speeded up by an ANN index, true otherwise.
     bool alwaysUnknownOrTrue(String distance_function) const;

From 901906159d96b4dd0d36d397147b4c722c7fc533 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 13:57:53 +0000
Subject: [PATCH 217/265] Cosmetics: ApproximateNearestNeighborInformation -->
 Info + nest in class

---
 .../MergeTree/VectorSimilarityCondition.cpp   | 12 ++--
 .../MergeTree/VectorSimilarityCondition.h     | 63 +++++++++----------
 2 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.cpp b/src/Storages/MergeTree/VectorSimilarityCondition.cpp
index 4550eb4ec39..2e53b4ecb3a 100644
--- a/src/Storages/MergeTree/VectorSimilarityCondition.cpp
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.cpp
@@ -40,12 +40,12 @@ void extractReferenceVectorFromLiteral(std::vector<Float32> & reference_vector,
     }
 }
 
-ApproximateNearestNeighborInformation::DistanceFunction stringToDistanceFunction(std::string_view distance_function)
+VectorSimilarityCondition::Info::DistanceFunction stringToDistanceFunction(std::string_view distance_function)
 {
     if (distance_function == "L2Distance")
-        return ApproximateNearestNeighborInformation::DistanceFunction::L2;
+        return VectorSimilarityCondition::Info::DistanceFunction::L2;
     else
-        return ApproximateNearestNeighborInformation::DistanceFunction::Unknown;
+        return VectorSimilarityCondition::Info::DistanceFunction::Unknown;
 }
 
 }
@@ -93,7 +93,7 @@ String VectorSimilarityCondition::getColumnName() const
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Column name was requested for useless or uninitialized index.");
 }
 
-ApproximateNearestNeighborInformation::DistanceFunction VectorSimilarityCondition::getDistanceFunction() const
+VectorSimilarityCondition::Info::DistanceFunction VectorSimilarityCondition::getDistanceFunction() const
 {
     if (index_is_useful && query_information.has_value())
         return query_information->distance_function;
@@ -102,7 +102,7 @@ ApproximateNearestNeighborInformation::DistanceFunction VectorSimilarityConditio
 
 bool VectorSimilarityCondition::checkQueryStructure(const SelectQueryInfo & query)
 {
-    ApproximateNearestNeighborInformation order_by_info;
+    Info order_by_info;
 
     /// Build rpns for query sections
     const auto & select = query.query->as<ASTSelectQuery &>();
@@ -251,7 +251,7 @@ void VectorSimilarityCondition::traverseOrderByAST(const ASTPtr & node, RPN & rp
 }
 
 /// Returns true and stores ANNExpr if the query has valid ORDERBY clause
-bool VectorSimilarityCondition::matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & info)
+bool VectorSimilarityCondition::matchRPNOrderBy(RPN & rpn, Info & info)
 {
     /// ORDER BY clause must have at least 3 expressions
     if (rpn.size() < 3)
diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.h b/src/Storages/MergeTree/VectorSimilarityCondition.h
index a3f5524bee0..a83af09c484 100644
--- a/src/Storages/MergeTree/VectorSimilarityCondition.h
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.h
@@ -9,33 +9,6 @@
 namespace DB
 {
 
-/// Approximate Nearest Neighbour queries have a similar structure:
-/// - reference vector from which all distances are calculated
-/// - distance function, e.g L2Distance
-/// - name of column with embeddings
-/// - type of query
-/// - maximum number of returned elements (LIMIT)
-///
-/// And one optional parameter:
-/// - distance to compare with (only for where queries)
-///
-/// This struct holds all these components.
-struct ApproximateNearestNeighborInformation
-{
-    enum class DistanceFunction : uint8_t
-    {
-        Unknown,
-        L2
-    };
-
-    std::vector<Float32> reference_vector;
-    DistanceFunction distance_function;
-    String column_name;
-    UInt64 limit;
-    float distance = -1.0;
-};
-
-
 // Class ANNCondition, is responsible for recognizing if the query is an ANN queries which can utilize ANN indexes. It parses the SQL query
 /// and checks if it matches ANNIndexes. Method alwaysUnknownOrTrue returns false if we can speed up the query, and true otherwise. It has
 /// only one argument, the name of the distance function with which index was built. Two main patterns of queries are supported
@@ -67,13 +40,39 @@ class VectorSimilarityCondition
 public:
     VectorSimilarityCondition(const SelectQueryInfo & query_info, ContextPtr context);
 
+    /// Approximate Nearest Neighbour queries have a similar structure:
+    /// - reference vector from which all distances are calculated
+    /// - distance function, e.g L2Distance
+    /// - name of column with embeddings
+    /// - type of query
+    /// - maximum number of returned elements (LIMIT)
+    ///
+    /// And one optional parameter:
+    /// - distance to compare with (only for where queries)
+    ///
+    /// This struct holds all these components.
+    struct Info
+    {
+        enum class DistanceFunction : uint8_t
+        {
+            Unknown,
+            L2
+        };
+
+        std::vector<Float32> reference_vector;
+        DistanceFunction distance_function;
+        String column_name;
+        UInt64 limit;
+        float distance = -1.0;
+    };
+
     /// Returns false if query can be speeded up by an ANN index, true otherwise.
     bool alwaysUnknownOrTrue(String distance_function) const;
 
     std::vector<float> getReferenceVector() const;
     size_t getDimensions() const;
     String getColumnName() const;
-    ApproximateNearestNeighborInformation::DistanceFunction getDistanceFunction() const;
+    Info::DistanceFunction getDistanceFunction() const;
     UInt64 getIndexGranularity() const { return index_granularity; }
     UInt64 getLimit() const;
 
@@ -142,16 +141,16 @@ private:
     void traverseOrderByAST(const ASTPtr & node, RPN & rpn);
 
     /// Returns true and stores ANNExpr if the query has valid WHERE section
-    static bool matchRPNWhere(RPN & rpn, ApproximateNearestNeighborInformation & info);
+    static bool matchRPNWhere(RPN & rpn, Info & info);
 
     /// Returns true and stores ANNExpr if the query has valid ORDERBY section
-    static bool matchRPNOrderBy(RPN & rpn, ApproximateNearestNeighborInformation & info);
+    static bool matchRPNOrderBy(RPN & rpn, Info & info);
 
     /// Returns true and stores Length if we have valid LIMIT clause in query
     static bool matchRPNLimit(RPNElement & rpn, UInt64 & limit);
 
     /// Matches dist function, reference vector, column name
-    static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, ApproximateNearestNeighborInformation & info);
+    static bool matchMainParts(RPN::iterator & iter, const RPN::iterator & end, Info & info);
 
     /// Gets float or int from AST node
     static float getFloatOrIntLiteralOrPanic(const RPN::iterator& iter);
@@ -159,7 +158,7 @@ private:
     Block block_with_constants;
 
     /// true if we have one of two supported query types
-    std::optional<ApproximateNearestNeighborInformation> query_information;
+    std::optional<Info> query_information;
 
     // Get from settings ANNIndex parameters
     const UInt64 index_granularity;

From 2aa037985baa78ceba3abe55b36ee4286c26df49 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 14:04:31 +0000
Subject: [PATCH 218/265] Cosmetics: simplify inheritance hierarchy

---
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 5 ++---
 src/Storages/MergeTree/MergeTreeIndexUSearch.h         | 2 +-
 src/Storages/MergeTree/MergeTreeIndices.h              | 7 +++++++
 src/Storages/MergeTree/VectorSimilarityCondition.h     | 9 ---------
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 44707274146..59f3a299c99 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1406,11 +1406,10 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
             if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin)
                 reader.read(granule);
 
-            auto ann_condition = std::dynamic_pointer_cast<IMergeTreeIndexConditionApproximateNearestNeighbor>(condition);
-            if (ann_condition != nullptr)
+            if (index_helper->isVectorSimilarityIndex())
             {
                 /// An array of indices of useful ranges.
-                auto result = ann_condition->getUsefulRanges(granule);
+                auto result = condition->getUsefulRanges(granule);
 
                 for (auto range : result)
                 {
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
index fc5a8ccd964..d4df6658a90 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h
@@ -109,7 +109,7 @@ struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator
 };
 
 
-class MergeTreeIndexConditionUSearch final : public IMergeTreeIndexConditionApproximateNearestNeighbor
+class MergeTreeIndexConditionUSearch final : public IMergeTreeIndexCondition
 {
 public:
     MergeTreeIndexConditionUSearch(
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index ceaac57373f..3dee79aae85 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -93,6 +93,13 @@ public:
     virtual bool alwaysUnknownOrTrue() const = 0;
 
     virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0;
+
+    /// Special stuff for vector similarity indexes
+    /// - Returns vector of indexes of ranges in granule which are useful for query.
+    virtual std::vector<size_t> getUsefulRanges(MergeTreeIndexGranulePtr) const
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for non-vector-similarity indexes.");
+    }
 };
 
 using MergeTreeIndexConditionPtr = std::shared_ptr<IMergeTreeIndexCondition>;
diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.h b/src/Storages/MergeTree/VectorSimilarityCondition.h
index a83af09c484..4b953326919 100644
--- a/src/Storages/MergeTree/VectorSimilarityCondition.h
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.h
@@ -169,13 +169,4 @@ private:
     bool index_is_useful = false;
 };
 
-
-/// Common interface of ANN indexes.
-class IMergeTreeIndexConditionApproximateNearestNeighbor : public IMergeTreeIndexCondition
-{
-public:
-    /// Returns vector of indexes of ranges in granule which are useful for query.
-    virtual std::vector<size_t> getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const = 0;
-};
-
 }

From 021fad920e52b51b0d6e1d5ef0583c0cc1469bb1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 14:19:13 +0000
Subject: [PATCH 219/265] Cosmetics: minor stuff

---
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp   | 12 ++----------
 src/Storages/MergeTree/VectorSimilarityCondition.h |  9 +++++----
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 5659b3e5329..1aa6c9c14d4 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -421,23 +421,15 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
 
     /// Check data type of indexed column:
 
-    auto throw_unsupported_underlying_column_exception = []()
-    {
-        throw Exception(
-            ErrorCodes::ILLEGAL_COLUMN,
-            "USearch can only be created on columns of type Array(Float32)");
-    };
-
     DataTypePtr data_type = index.sample_block.getDataTypes()[0];
-
     if (const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get()))
     {
         TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();
         if (!WhichDataType(nested_type_index).isFloat32())
-            throw_unsupported_underlying_column_exception();
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "USearch can only be created on columns of type Array(Float32)");
     }
     else
-        throw_unsupported_underlying_column_exception();
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "USearch can only be created on columns of type Array(Float32)");
 }
 
 }
diff --git a/src/Storages/MergeTree/VectorSimilarityCondition.h b/src/Storages/MergeTree/VectorSimilarityCondition.h
index 4b953326919..fd339ed715d 100644
--- a/src/Storages/MergeTree/VectorSimilarityCondition.h
+++ b/src/Storages/MergeTree/VectorSimilarityCondition.h
@@ -9,8 +9,8 @@
 namespace DB
 {
 
-// Class ANNCondition, is responsible for recognizing if the query is an ANN queries which can utilize ANN indexes. It parses the SQL query
-/// and checks if it matches ANNIndexes. Method alwaysUnknownOrTrue returns false if we can speed up the query, and true otherwise. It has
+/// Class VectorSimilarityCondition is responsible for recognizing if the query can utilize vector similarity indexes.
+/// Method alwaysUnknownOrTrue returns false if we can speed up the query, and true otherwise. It has
 /// only one argument, the name of the distance function with which index was built. Two main patterns of queries are supported
 ///
 /// - 1. WHERE queries:
@@ -23,7 +23,8 @@ namespace DB
 /// If the query is both of type 1. and 2., than we can't use the index and alwaysUnknownOrTrue returns true.
 /// reference_vector should have float coordinates, e.g. [0.2, 0.1, .., 0.5]
 ///
-/// If the query matches one of these two types, then this class extracts the main information needed for ANN indexes from the query.
+/// If the query matches one of these two types, then this class extracts the main information needed for vector similarity indexes from the
+/// query.
 ///
 /// From matching query it extracts
 /// - referenceVector
@@ -40,7 +41,7 @@ class VectorSimilarityCondition
 public:
     VectorSimilarityCondition(const SelectQueryInfo & query_info, ContextPtr context);
 
-    /// Approximate Nearest Neighbour queries have a similar structure:
+    /// Approximate nearest neighbour (ANN) / vector similarity queries have a similar structure:
     /// - reference vector from which all distances are calculated
     /// - distance function, e.g L2Distance
     /// - name of column with embeddings

From 785b6637faa9f680da73fb68453f6660828ec2c9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 14:21:25 +0000
Subject: [PATCH 220/265] Rename index type "usearch" to "vector_similarity"

First, index type "vector_similarity" is more speaking and user-friendly
than "usearch". Second, we should not expose the name of the library
doing the job (usearch). Of course, the docs will continue to mention
usearch (credit where credit is due).

Existing setting `allow_experimental_usearch_index` was marked obsolete.
A new settings `allow_experimental_vector_similarity_index` was added.
---
 .../mergetree-family/annindexes.md            | 117 +++++++-----------
 src/Core/Settings.h                           |   3 +-
 src/Core/SettingsChangesHistory.cpp           |   1 +
 src/Databases/DatabaseReplicated.cpp          |   2 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |   4 +-
 src/Parsers/ASTIndexDeclaration.h             |   2 +-
 src/Parsers/ParserCreateIndexQuery.cpp        |   4 +-
 src/Parsers/ParserCreateQuery.cpp             |   4 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |   6 +-
 ...cpp => MergeTreeIndexVectorSimilarity.cpp} |  87 +++++++------
 ...rch.h => MergeTreeIndexVectorSimilarity.h} |  28 ++---
 src/Storages/MergeTree/MergeTreeIndices.cpp   |   4 +-
 src/Storages/MergeTree/MergeTreeIndices.h     |   4 +-
 .../0_stateless/02354_vector_search_bugs.sql  |   8 +-
 ...2354_vector_search_default_granularity.sql |   6 +-
 .../02354_vector_search_detach_attach.sql     |   4 +-
 ..._vector_search_index_creation_negative.sql |  24 ++--
 .../02354_vector_search_queries.reference     |  14 +--
 .../02354_vector_search_queries.sql           |  18 +--
 19 files changed, 154 insertions(+), 186 deletions(-)
 rename src/Storages/MergeTree/{MergeTreeIndexUSearch.cpp => MergeTreeIndexVectorSimilarity.cpp} (76%)
 rename src/Storages/MergeTree/{MergeTreeIndexUSearch.h => MergeTreeIndexVectorSimilarity.h} (84%)

diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index e2f8051d064..63c061a0d46 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows:
 
 ``` sql
 SELECT *
-FROM table_with_ann_index
+FROM table
 ORDER BY Distance(vectors, Point)
 LIMIT N
 ```
@@ -34,21 +34,54 @@ With brute force search, the query is expensive (linear in the number of points)
 `Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
 of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
 
-# Creating and Using ANN Indexes {#creating_using_ann_indexes}
+# Creating and Using Vector Similarity Indexes
 
-Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
+Syntax to create a vector similarity index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
 
 ```sql
-CREATE TABLE table_with_ann_index
+CREATE TABLE table
 (
-  `id` Int64,
-  `vectors` Array(Float32),
-  INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
+  id Int64,
+  vectors Array(Float32),
+  INDEX [index_name vectors TYPE vector_similarity([Distance[, ScalarKind]]) [GRANULARITY [N]]
 )
 ENGINE = MergeTree
 ORDER BY id;
 ```
 
+Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
+algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
+similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
+overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, that are expensive
+to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further distance computations on modern
+Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient navigation around immutable persistent
+files, without loading them into RAM.
+
+USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_vector_similarity_index = 1`.
+
+Vector similarity indexes currently support two distance functions:
+- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
+  ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
+- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
+  ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
+
+Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`.
+If no scalar kind was specified during index creation, `f16` is used as default.
+
+For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
+distance function was specified during index creation, `L2Distance` is used as default.
+
+:::note
+All arrays must have same length. To avoid errors, you can use a
+[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
+length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
+:::
+
+:::note
+The vector similarity index currently does not work with per-table, non-default `index_granularity` settings (see
+[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
+:::
+
 ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
 tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
 requests.
@@ -57,7 +90,7 @@ ANN indexes support these queries:
 
   ``` sql
   SELECT *
-  FROM table_with_ann_index
+  FROM table
   [WHERE ...]
   ORDER BY Distance(vectors, Point)
   LIMIT N
@@ -68,7 +101,7 @@ To avoid writing out large vectors, you can use [query
 parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
 
 ```bash
-clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
+clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
 ```
 :::
 
@@ -100,69 +133,3 @@ brute-force distance calculation over all rows of the granules. With a small `GR
 equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
 back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
 was specified for ANN indexes, the default value is 100 million.
-
-
-# Available ANN Indexes {#available_ann_indexes}
-
-- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
-
-## USearch {#usearch}
-
-This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
-algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
-similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
-overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
-that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
-distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
-navigation around immutable persistent files, without loading them into RAM.
-
-USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_usearch_index = 1`.
-
-<div class='vimeo-container'>
-  <iframe src="//www.youtube.com/embed/UMrhB3icP9w"
-    width="640"
-    height="360"
-    frameborder="0"
-    allow="autoplay;
-    fullscreen;
-    picture-in-picture"
-    allowfullscreen>
-  </iframe>
-</div>
-
-Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
-
-```sql
-CREATE TABLE table_with_usearch_index
-(
-  id Int64,
-  vectors Array(Float32),
-  INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
-)
-ENGINE = MergeTree
-ORDER BY id;
-```
-
-USearch currently supports two distance functions:
-- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
-  ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
-- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
-  ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
-
-USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
-was specified during index creation, `f16` is used as default.
-
-For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
-distance function was specified during index creation, `L2Distance` is used as default.
-
-:::note
-All arrays must have same length. To avoid errors, you can use a
-[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
-length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
-:::
-
-:::note
-The USearch index currently does not work with per-table, non-default `index_granularity` settings (see
-[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
-:::
-
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 23ad12bb017..e9f3b95dbc1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -907,9 +907,9 @@ class IColumn;
     M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
     M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
     M(Bool, allow_experimental_time_series_table, false, "Allows experimental TimeSeries table engine", 0) \
+    M(Bool, allow_experimental_vector_similarity_index, false, "Allow experimental vector similarity index", 0) \
     M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
     M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
-    M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
     M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
     M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
     M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
@@ -1036,6 +1036,7 @@ class IColumn;
     MAKE_OBSOLETE(M, Bool, allow_experimental_annoy_index, false) \
     MAKE_OBSOLETE(M, UInt64, max_threads_for_annoy_index_creation, 4) \
     MAKE_OBSOLETE(M, Int64, annoy_index_search_k_nodes, -1) \
+    MAKE_OBSOLETE(M, Bool, allow_experimental_usearch_index, false) \
     MAKE_OBSOLETE(M, Bool, optimize_move_functions_out_of_any, false) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \
     MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 511723f1873..8fabd1ecf91 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -85,6 +85,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
             {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
             {"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},
+            {"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
         }
     },
     {"24.7",
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 207b317fbe6..525e97f0be2 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1153,7 +1153,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         query_context->setSetting("allow_experimental_object_type", 1);
         query_context->setSetting("allow_experimental_variant_type", 1);
         query_context->setSetting("allow_experimental_dynamic_type", 1);
-        query_context->setSetting("allow_experimental_usearch_index", 1);
+        query_context->setSetting("allow_experimental_vector_similarity_index", 1);
         query_context->setSetting("allow_experimental_bigint_types", 1);
         query_context->setSetting("allow_experimental_window_functions", 1);
         query_context->setSetting("allow_experimental_geo_types", 1);
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index a1ffcf07588..95143031707 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -787,8 +787,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
                 if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index)
                     throw Exception(ErrorCodes::ILLEGAL_INDEX, "Please use index type 'full_text' instead of 'inverted'");
                 /// ----
-                if (index_desc.type == "usearch" && !settings.allow_experimental_usearch_index)
-                    throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index is disabled. Turn on allow_experimental_usearch_index");
+                if (index_desc.type == "vector_similarity" && !settings.allow_experimental_vector_similarity_index)
+                    throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index is disabled. Turn on allow_experimental_vector_similarity_index");
 
                 properties.indices.push_back(index_desc);
             }
diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h
index 90645f12b7c..72f3f017a99 100644
--- a/src/Parsers/ASTIndexDeclaration.h
+++ b/src/Parsers/ASTIndexDeclaration.h
@@ -13,7 +13,7 @@ class ASTIndexDeclaration : public IAST
 {
 public:
     static const auto DEFAULT_INDEX_GRANULARITY = 1uz;
-    static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz;
+    static const auto DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY = 100'000'000uz;
 
     ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_);
 
diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp
index e7cfd753f99..ed89b80edca 100644
--- a/src/Parsers/ParserCreateIndexQuery.cpp
+++ b/src/Parsers/ParserCreateIndexQuery.cpp
@@ -89,8 +89,8 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
     else
     {
         auto index_type = index->getType();
-        if (index_type && index_type->name == "usearch")
-            index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
+        if (index_type && index_type->name == "vector_similarity")
+            index->granularity = ASTIndexDeclaration::DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY;
         else
             index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
     }
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index b31fe21c4cc..cc4e02f46a3 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -214,8 +214,8 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
     else
     {
         auto index_type = index->getType();
-        if (index_type->name == "usearch")
-            index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY;
+        if (index_type->name == "vector_similarity")
+            index->granularity = ASTIndexDeclaration::DEFAULT_VECTOR_SIMILARITY_INDEX_GRANULARITY;
         else
             index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
     }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 3324cc4e42a..1f30725b4d0 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -24,7 +24,7 @@
 #include <Processors/Transforms/SelectByIndicesTransform.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
-#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
+#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
 #include <Storages/MergeTree/MergeTreeReadPoolInOrder.h>
@@ -1478,8 +1478,8 @@ static void buildIndexes(
                 if (index_helper->isVectorSimilarityIndex())
                 {
 #if USE_USEARCH
-                    if (const auto * usearch_index = typeid_cast<const MergeTreeIndexUSearch *>(index_helper.get()))
-                        condition = usearch_index->createIndexCondition(query_info, context);
+                    if (const auto * vector_similarity_index = typeid_cast<const MergeTreeIndexVectorSimilarity *>(index_helper.get()))
+                        condition = vector_similarity_index->createIndexCondition(query_info, context);
 #endif
                     if (!condition)
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
similarity index 76%
rename from src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
rename to src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
index 1aa6c9c14d4..6f3b1b043cd 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
@@ -1,4 +1,4 @@
-#include <Storages/MergeTree/MergeTreeIndexUSearch.h>
+#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
 
 #if USE_USEARCH
 
@@ -90,7 +90,7 @@ void USearchIndexWithSerialization::serialize(WriteBuffer & ostr) const
 
     auto result = Base::save_to_stream(callback);
     if (result.error)
-        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not save USearch index, error: " + String(result.error.release()));
+        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not save vector similarity index, error: " + String(result.error.release()));
 }
 
 void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
@@ -104,7 +104,7 @@ void USearchIndexWithSerialization::deserialize(ReadBuffer & istr)
     auto result = Base::load_from_stream(callback);
     if (result.error)
         /// See the comment in MergeTreeIndexGranuleVectorSimilarity::deserializeBinary why we throw here
-        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not load USearch index, error: " + String(result.error.release()) + " Please drop the index and create it again.");
+        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not load vector similarity index, error: " + String(result.error.release()) + " Please drop the index and create it again.");
 }
 
 USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStatistics() const
@@ -121,16 +121,16 @@ USearchIndexWithSerialization::Statistics USearchIndexWithSerialization::getStat
     return statistics;
 }
 
-MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch(
+MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity(
     const String & index_name_,
     const Block & index_sample_block_,
     unum::usearch::metric_kind_t metric_kind_,
     unum::usearch::scalar_kind_t scalar_kind_)
-    : MergeTreeIndexGranuleUSearch(index_name_, index_sample_block_, metric_kind_, scalar_kind_, nullptr)
+    : MergeTreeIndexGranuleVectorSimilarity(index_name_, index_sample_block_, metric_kind_, scalar_kind_, nullptr)
 {
 }
 
-MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch(
+MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity(
     const String & index_name_,
     const Block & index_sample_block_,
     unum::usearch::metric_kind_t metric_kind_,
@@ -144,7 +144,7 @@ MergeTreeIndexGranuleUSearch::MergeTreeIndexGranuleUSearch(
 {
 }
 
-void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const
+void MergeTreeIndexGranuleVectorSimilarity::serializeBinary(WriteBuffer & ostr) const
 {
     if (empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name));
@@ -158,18 +158,18 @@ void MergeTreeIndexGranuleUSearch::serializeBinary(WriteBuffer & ostr) const
     index->serialize(ostr);
 
     auto statistics = index->getStatistics();
-    LOG_TRACE(logger, "Wrote USearch index: max_level = {}, connectivity = {}, size = {}, capacity = {}, memory_usage = {}",
+    LOG_TRACE(logger, "Wrote vector similarity index: max_level = {}, connectivity = {}, size = {}, capacity = {}, memory_usage = {}",
                       statistics.max_level, statistics.connectivity, statistics.size, statistics.capacity, ReadableSize(statistics.memory_usage));
 }
 
-void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
+void MergeTreeIndexGranuleVectorSimilarity::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion /*version*/)
 {
     UInt64 file_version;
     readIntBinary(file_version, istr);
     if (file_version != FILE_FORMAT_VERSION)
         throw Exception(
             ErrorCodes::FORMAT_VERSION_TOO_OLD,
-            "USearch index could not be loaded because its version is too old (current version: {}, persisted version: {}). Please drop the index and create it again.",
+            "Vector similarity index could not be loaded because its version is too old (current version: {}, persisted version: {}). Please drop the index and create it again.",
             FILE_FORMAT_VERSION, file_version);
         /// More fancy error handling would be: Set a flag on the index that it failed to load. During usage return all granules, i.e.
         /// behave as if the index does not exist. Since format changes are expected to happen only rarely and it is "only" an index, keep it simple for now.
@@ -181,11 +181,11 @@ void MergeTreeIndexGranuleUSearch::deserializeBinary(ReadBuffer & istr, MergeTre
     index->deserialize(istr);
 
     auto statistics = index->getStatistics();
-    LOG_TRACE(logger, "Loaded USearch index: max_level = {}, connectivity = {}, size = {}, capacity = {}, memory_usage = {}",
+    LOG_TRACE(logger, "Loaded vector similarity index: max_level = {}, connectivity = {}, size = {}, capacity = {}, memory_usage = {}",
                       statistics.max_level, statistics.connectivity, statistics.size, statistics.capacity, ReadableSize(statistics.memory_usage));
 }
 
-MergeTreeIndexAggregatorUSearch::MergeTreeIndexAggregatorUSearch(
+MergeTreeIndexAggregatorVectorSimilarity::MergeTreeIndexAggregatorVectorSimilarity(
     const String & index_name_,
     const Block & index_sample_block_,
     unum::usearch::metric_kind_t metric_kind_,
@@ -197,14 +197,14 @@ MergeTreeIndexAggregatorUSearch::MergeTreeIndexAggregatorUSearch(
 {
 }
 
-MergeTreeIndexGranulePtr MergeTreeIndexAggregatorUSearch::getGranuleAndReset()
+MergeTreeIndexGranulePtr MergeTreeIndexAggregatorVectorSimilarity::getGranuleAndReset()
 {
-    auto granule = std::make_shared<MergeTreeIndexGranuleUSearch>(index_name, index_sample_block, metric_kind, scalar_kind, index);
+    auto granule = std::make_shared<MergeTreeIndexGranuleVectorSimilarity>(index_name, index_sample_block, metric_kind, scalar_kind, index);
     index = nullptr;
     return granule;
 }
 
-void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos, size_t limit)
+void MergeTreeIndexAggregatorVectorSimilarity::update(const Block & block, size_t * pos, size_t limit)
 {
     if (*pos >= block.rows())
         throw Exception(
@@ -239,8 +239,8 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos,
         if (column_array->empty())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty");
 
-        /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
-        /// are INSERTed into an Usearch-indexed column or if no value was specified at all in which case the arrays take on their default
+        /// The vector similarity algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
+        /// are INSERTed into an vector-similarity-indexed column or if no value was specified at all in which case the arrays take on their default
         /// values which is also empty.
         if (column_array->isDefaultAt(0))
             throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
@@ -262,13 +262,13 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos,
 
         /// Reserving space is mandatory
         if (!index->reserve(roundUpToPowerOfTwoOrZero(index->size() + num_rows)))
-            throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for usearch index");
+            throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");
 
         for (size_t current_row = 0; current_row < num_rows; ++current_row)
         {
             auto rc = index->add(static_cast<UInt32>(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
             if (!rc)
-                throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not add data to USearch index, error: " + String(rc.error.release()));
+                throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index, error: " + String(rc.error.release()));
 
             ProfileEvents::increment(ProfileEvents::USearchAddCount);
             ProfileEvents::increment(ProfileEvents::USearchAddVisitedMembers, rc.visited_members);
@@ -281,7 +281,7 @@ void MergeTreeIndexAggregatorUSearch::update(const Block & block, size_t * pos,
     *pos += rows_read;
 }
 
-MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch(
+MergeTreeIndexConditionVectorSimilarity::MergeTreeIndexConditionVectorSimilarity(
     const IndexDescription & /*index_description*/,
     const SelectQueryInfo & query,
     unum::usearch::metric_kind_t metric_kind_,
@@ -291,12 +291,12 @@ MergeTreeIndexConditionUSearch::MergeTreeIndexConditionUSearch(
 {
 }
 
-bool MergeTreeIndexConditionUSearch::mayBeTrueOnGranule(MergeTreeIndexGranulePtr) const
+bool MergeTreeIndexConditionVectorSimilarity::mayBeTrueOnGranule(MergeTreeIndexGranulePtr) const
 {
     throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes");
 }
 
-bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const
+bool MergeTreeIndexConditionVectorSimilarity::alwaysUnknownOrTrue() const
 {
     String index_distance_function;
     switch (metric_kind)
@@ -308,14 +308,14 @@ bool MergeTreeIndexConditionUSearch::alwaysUnknownOrTrue() const
     return vector_similarity_condition.alwaysUnknownOrTrue(index_distance_function);
 }
 
-std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeIndexGranulePtr granule_) const
+std::vector<size_t> MergeTreeIndexConditionVectorSimilarity::getUsefulRanges(MergeTreeIndexGranulePtr granule_) const
 {
     const UInt64 limit = vector_similarity_condition.getLimit();
     const UInt64 index_granularity = vector_similarity_condition.getIndexGranularity();
 
     const std::vector<float> reference_vector = vector_similarity_condition.getReferenceVector();
 
-    const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleUSearch>(granule_);
+    const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleVectorSimilarity>(granule_);
     if (granule == nullptr)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type");
 
@@ -328,7 +328,7 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeInd
 
     auto result = index->search(reference_vector.data(), limit);
     if (result.error)
-        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not search in USearch index, error: " + String(result.error.release()));
+        throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not search in vector similarity index, error: " + String(result.error.release()));
 
     ProfileEvents::increment(ProfileEvents::USearchSearchCount);
     ProfileEvents::increment(ProfileEvents::USearchSearchVisitedMembers, result.visited_members);
@@ -350,34 +350,34 @@ std::vector<size_t> MergeTreeIndexConditionUSearch::getUsefulRanges(MergeTreeInd
     return granules;
 }
 
-MergeTreeIndexUSearch::MergeTreeIndexUSearch(const IndexDescription & index_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_)
+MergeTreeIndexVectorSimilarity::MergeTreeIndexVectorSimilarity(const IndexDescription & index_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_)
     : IMergeTreeIndex(index_)
     , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
 {
 }
 
-MergeTreeIndexGranulePtr MergeTreeIndexUSearch::createIndexGranule() const
+MergeTreeIndexGranulePtr MergeTreeIndexVectorSimilarity::createIndexGranule() const
 {
-    return std::make_shared<MergeTreeIndexGranuleUSearch>(index.name, index.sample_block, metric_kind, scalar_kind);
+    return std::make_shared<MergeTreeIndexGranuleVectorSimilarity>(index.name, index.sample_block, metric_kind, scalar_kind);
 }
 
-MergeTreeIndexAggregatorPtr MergeTreeIndexUSearch::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const
+MergeTreeIndexAggregatorPtr MergeTreeIndexVectorSimilarity::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const
 {
-    return std::make_shared<MergeTreeIndexAggregatorUSearch>(index.name, index.sample_block, metric_kind, scalar_kind);
+    return std::make_shared<MergeTreeIndexAggregatorVectorSimilarity>(index.name, index.sample_block, metric_kind, scalar_kind);
 }
 
-MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const
+MergeTreeIndexConditionPtr MergeTreeIndexVectorSimilarity::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const
 {
-    return std::make_shared<MergeTreeIndexConditionUSearch>(index, query, metric_kind, context);
+    return std::make_shared<MergeTreeIndexConditionVectorSimilarity>(index, query, metric_kind, context);
 };
 
-MergeTreeIndexConditionPtr MergeTreeIndexUSearch::createIndexCondition(const ActionsDAG *, ContextPtr) const
+MergeTreeIndexConditionPtr MergeTreeIndexVectorSimilarity::createIndexCondition(const ActionsDAG *, ContextPtr) const
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeIndexAnnoy cannot be created with ActionsDAG");
 }
 
-MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index)
+MergeTreeIndexPtr vectorSimilarityIndexCreator(const IndexDescription & index)
 {
     static constexpr auto default_metric_kind = unum::usearch::metric_kind_t::l2sq_k;
     auto metric_kind = default_metric_kind;
@@ -389,25 +389,25 @@ MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index)
     if (index.arguments.size() > 1)
         scalar_kind = quantizationToScalarKind.at(index.arguments[1].safeGet<String>());
 
-    return std::make_shared<MergeTreeIndexUSearch>(index, metric_kind, scalar_kind);
+    return std::make_shared<MergeTreeIndexVectorSimilarity>(index, metric_kind, scalar_kind);
 }
 
-void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
+void vectorSimilarityIndexValidator(const IndexDescription & index, bool /* attach */)
 {
-    /// Check number and type of USearch index arguments:
+    /// Check number and type of index arguments:
 
     if (index.arguments.size() > 2)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "USearch index must not have more than one parameters");
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index must not have more than one parameters");
 
     if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "First argument of USearch index (distance function) must be of type String");
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "First argument of vector similarity index (distance function) must be of type String");
     if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Second argument of USearch index (scalar type) must be of type String");
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Second argument of vector similarity index (scalar type) must be of type String");
 
     /// Check that the index is created on a single column
 
     if (index.column_names.size() != 1 || index.data_types.size() != 1)
-        throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "USearch indexes must be created on a single column");
+        throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Vector similarity indexes must be created on a single column");
 
     /// Check that a supported metric was passed as first argument
 
@@ -420,16 +420,15 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
         throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized scalar kind (second argument) for vector index. Supported kinds are: {}", keysAsString(quantizationToScalarKind));
 
     /// Check data type of indexed column:
-
     DataTypePtr data_type = index.sample_block.getDataTypes()[0];
     if (const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get()))
     {
         TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();
         if (!WhichDataType(nested_type_index).isFloat32())
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "USearch can only be created on columns of type Array(Float32)");
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity index can only be created on columns of type Array(Float32)");
     }
     else
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "USearch can only be created on columns of type Array(Float32)");
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity index can only be created on columns of type Array(Float32)");
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h
similarity index 84%
rename from src/Storages/MergeTree/MergeTreeIndexUSearch.h
rename to src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h
index d4df6658a90..95ea3cd5240 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h
@@ -48,22 +48,22 @@ public:
 using USearchIndexWithSerializationPtr = std::shared_ptr<USearchIndexWithSerialization>;
 
 
-struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
+struct MergeTreeIndexGranuleVectorSimilarity final : public IMergeTreeIndexGranule
 {
-    MergeTreeIndexGranuleUSearch(
+    MergeTreeIndexGranuleVectorSimilarity(
         const String & index_name_,
         const Block & index_sample_block_,
         unum::usearch::metric_kind_t metric_kind_,
         unum::usearch::scalar_kind_t scalar_kind_);
 
-    MergeTreeIndexGranuleUSearch(
+    MergeTreeIndexGranuleVectorSimilarity(
         const String & index_name_,
         const Block & index_sample_block_,
         unum::usearch::metric_kind_t metric_kind_,
         unum::usearch::scalar_kind_t scalar_kind_,
         USearchIndexWithSerializationPtr index_);
 
-    ~MergeTreeIndexGranuleUSearch() override = default;
+    ~MergeTreeIndexGranuleVectorSimilarity() override = default;
 
     void serializeBinary(WriteBuffer & ostr) const override;
     void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
@@ -76,7 +76,7 @@ struct MergeTreeIndexGranuleUSearch final : public IMergeTreeIndexGranule
     const unum::usearch::scalar_kind_t scalar_kind;
     USearchIndexWithSerializationPtr index;
 
-    LoggerPtr logger = getLogger("USearchIndex");
+    LoggerPtr logger = getLogger("VectorSimilarityIndex");
 
 private:
     /// The version of the persistence format of USearch index. Increment whenever you change the format.
@@ -87,15 +87,15 @@ private:
 };
 
 
-struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator
+struct MergeTreeIndexAggregatorVectorSimilarity final : IMergeTreeIndexAggregator
 {
-    MergeTreeIndexAggregatorUSearch(
+    MergeTreeIndexAggregatorVectorSimilarity(
         const String & index_name_,
         const Block & index_sample_block,
         unum::usearch::metric_kind_t metric_kind_,
         unum::usearch::scalar_kind_t scalar_kind_);
 
-    ~MergeTreeIndexAggregatorUSearch() override = default;
+    ~MergeTreeIndexAggregatorVectorSimilarity() override = default;
 
     bool empty() const override { return !index || index->size() == 0; }
     MergeTreeIndexGranulePtr getGranuleAndReset() override;
@@ -109,16 +109,16 @@ struct MergeTreeIndexAggregatorUSearch final : IMergeTreeIndexAggregator
 };
 
 
-class MergeTreeIndexConditionUSearch final : public IMergeTreeIndexCondition
+class MergeTreeIndexConditionVectorSimilarity final : public IMergeTreeIndexCondition
 {
 public:
-    MergeTreeIndexConditionUSearch(
+    MergeTreeIndexConditionVectorSimilarity(
         const IndexDescription & index_description,
         const SelectQueryInfo & query,
         unum::usearch::metric_kind_t metric_kind_,
         ContextPtr context);
 
-    ~MergeTreeIndexConditionUSearch() override = default;
+    ~MergeTreeIndexConditionVectorSimilarity() override = default;
 
     bool alwaysUnknownOrTrue() const override;
     bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const override;
@@ -130,15 +130,15 @@ private:
 };
 
 
-class MergeTreeIndexUSearch : public IMergeTreeIndex
+class MergeTreeIndexVectorSimilarity : public IMergeTreeIndex
 {
 public:
-    MergeTreeIndexUSearch(
+    MergeTreeIndexVectorSimilarity(
         const IndexDescription & index_,
         unum::usearch::metric_kind_t metric_kind_,
         unum::usearch::scalar_kind_t scalar_kind_);
 
-    ~MergeTreeIndexUSearch() override = default;
+    ~MergeTreeIndexVectorSimilarity() override = default;
 
     MergeTreeIndexGranulePtr createIndexGranule() const override;
     MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override;
diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp
index f07449f762c..89aed7873a4 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndices.cpp
@@ -129,8 +129,8 @@ MergeTreeIndexFactory::MergeTreeIndexFactory()
     registerValidator("hypothesis", hypothesisIndexValidator);
 
 #if USE_USEARCH
-    registerCreator("usearch", usearchIndexCreator);
-    registerValidator("usearch", usearchIndexValidator);
+    registerCreator("vector_similarity", vectorSimilarityIndexCreator);
+    registerValidator("vector_similarity", vectorSimilarityIndexValidator);
 #endif
 
     registerCreator("inverted", fullTextIndexCreator);
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index 3dee79aae85..48ef2a4739e 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -239,8 +239,8 @@ MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index);
 void hypothesisIndexValidator(const IndexDescription & index, bool attach);
 
 #if USE_USEARCH
-MergeTreeIndexPtr usearchIndexCreator(const IndexDescription & index);
-void usearchIndexValidator(const IndexDescription & index, bool attach);
+MergeTreeIndexPtr vectorSimilarityIndexCreator(const IndexDescription & index);
+void vectorSimilarityIndexValidator(const IndexDescription & index, bool attach);
 #endif
 
 MergeTreeIndexPtr fullTextIndexCreator(const IndexDescription & index);
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql
index de36683ede1..2ef75d0a7fe 100644
--- a/tests/queries/0_stateless/02354_vector_search_bugs.sql
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql
@@ -2,21 +2,21 @@
 
 -- Tests various bugs and special cases for vector indexes.
 
-SET allow_experimental_usearch_index = 1;
+SET allow_experimental_vector_similarity_index = 1;
 SET enable_analyzer = 1; -- 0 vs. 1 produce slightly different error codes, make it future-proof
 
 DROP TABLE IF EXISTS tab;
 
 SELECT 'Issue #52258: Empty Arrays or Arrays with default values are rejected';
 
-CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY id;
+CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree() ORDER BY id;
 INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
 INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
 
 SELECT 'It is possible to create parts with different Array vector sizes but there will be an error at query time';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id;
 SYSTEM STOP MERGES tab;
 INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]);
 INSERT INTO tab values (2, [2.2, 2.3, 2.4]) (3, [3.1, 3.2, 3.3]);
@@ -31,7 +31,7 @@ DROP TABLE tab;
 
 SELECT 'Correctness of index with > 1 mark';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192; -- disable adaptive granularity due to bug
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192; -- disable adaptive granularity due to bug
 INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000);
 
 WITH [1.0, 0.0] AS reference_vec
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
index ff659b56033..a19a0d17536 100644
--- a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
@@ -2,17 +2,17 @@
 
 -- Tests that vector search indexes use a (non-standard) index granularity of 100 mio by default.
 
-SET allow_experimental_usearch_index = 1;
+SET allow_experimental_vector_similarity_index = 1;
 
 -- After CREATE TABLE
 DROP TABLE IF EXISTS tab;
-CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE usearch) ENGINE = MergeTree ORDER BY id;
+CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE vector_similarity) ENGINE = MergeTree ORDER BY id;
 SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
 
 -- After ALTER TABLE
 DROP TABLE tab;
 CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE = MergeTree ORDER BY id;
-ALTER TABLE tab ADD INDEX idx(vec) TYPE usearch;
+ALTER TABLE tab ADD INDEX idx(vec) TYPE vector_similarity;
 SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
 
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_detach_attach.sql b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
index 92e8efd918b..36241dfabf7 100644
--- a/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
+++ b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
@@ -2,10 +2,10 @@
 
 -- Tests that vector similarity indexes can be detached/attached.
 
-SET allow_experimental_usearch_index = 1;
+SET allow_experimental_vector_similarity_index = 1;
 
 DROP TABLE IF EXISTS tab;
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
 
 DETACH TABLE tab SYNC;
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
index 60bd54d1dbe..912f7d7fcae 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
@@ -2,36 +2,36 @@
 
 -- Tests that various conditions are checked during creation of vector search indexes.
 
-SET allow_experimental_usearch_index = 1;
+SET allow_experimental_vector_similarity_index = 1;
 
 DROP TABLE IF EXISTS tab;
 
 SELECT 'At most two index arguments';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('too', 'many', 'args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('too', 'many', 'args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
 SELECT '1st argument (distance function) must be String';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
 SELECT 'Unsupported distance functions are rejected';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
 SELECT '2nd argument (scalar kind) must be String';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
 SELECT 'Unsupported scalar kinds are rejected';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('L2Distance', 'invalidKind')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('L2Distance', 'invalidKind')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
 SELECT 'Must be created on single column';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
 
 SELECT 'Must be created on Array(Float32) columns';
 SET allow_suspicious_low_cardinality_types = 1;
-CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
 SELECT 'Rejects INSERTs of Arrays with different sizes';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id;
 INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference
index 22ad46f802c..7c8e4c0ca59 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.reference
+++ b/tests/queries/0_stateless/02354_vector_search_queries.reference
@@ -1,9 +1,9 @@
 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block
-- Usearch: ORDER-BY-type
+- ORDER-BY-type
 5	[0,2]	0
 6	[0,2.1]	0.09999990463256836
 7	[0,2.2]	0.20000004768371582
-- Usearch: ORDER-BY-type, EXPLAIN
+- ORDER-BY-type, EXPLAIN
 Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
     Sorting (Sorting for ORDER BY)
@@ -16,15 +16,15 @@ Expression (Projection)
             Granules: 1/1
           Skip
             Name: idx
-            Description: usearch GRANULARITY 100000000
+            Description: vector_similarity GRANULARITY 100000000
             Parts: 1/1
             Granules: 1/1
 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block
-- Usearch: ORDER-BY-type
+- ORDER-BY-type
 6	[0,2]	0
 7	[0,2.1]	0.09999990463256836
 8	[0,2.2]	0.20000004768371582
-- Usearch: ORDER-BY-type, EXPLAIN
+- ORDER-BY-type, EXPLAIN
 Expression (Projection)
   Limit (preliminary LIMIT (without OFFSET))
     Sorting (Sorting for ORDER BY)
@@ -37,11 +37,11 @@ Expression (Projection)
             Granules: 4/4
           Skip
             Name: idx
-            Description: usearch GRANULARITY 2
+            Description: vector_similarity GRANULARITY 2
             Parts: 1/1
             Granules: 2/4
 Special cases
-- Usearch: ORDER-BY-type
+- ORDER-BY-type
 6	[1,9.3]	0.005731362878640178
 1	[2,3.2]	0.15200169244542905
 7	[5.5,4.7]	0.3503476876550442
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
index 555f47b364f..50537ad6244 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -2,7 +2,7 @@
 
 -- Tests various simple approximate nearest neighborhood (ANN) queries that utilize vector search indexes.
 
-SET allow_experimental_usearch_index = 1;
+SET allow_experimental_vector_similarity_index = 1;
 
 SET enable_analyzer = 0;
 
@@ -10,18 +10,18 @@ SELECT '10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule
 
 DROP TABLE IF EXISTS tab;
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
 
 
-SELECT '- Usearch: ORDER-BY-type';
+SELECT '- ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
 FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
+SELECT '- ORDER-BY-type, EXPLAIN';
 EXPLAIN indexes = 1
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@@ -34,17 +34,17 @@ DROP TABLE tab;
 
 SELECT '12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
 
-SELECT '- Usearch: ORDER-BY-type';
+SELECT '- ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
 FROM tab
 ORDER BY L2Distance(vec, reference_vec)
 LIMIT 3;
 
-SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
+SELECT '- ORDER-BY-type, EXPLAIN';
 EXPLAIN indexes = 1
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, L2Distance(vec, reference_vec)
@@ -58,10 +58,10 @@ DROP TABLE tab;
 SELECT 'Special cases'; -- Not a systematic test, just to check that no bad things happen.
 -- Just for jun, use metric = 'cosineDistance', scalarKind = 'f64'
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
 
-SELECT '- Usearch: ORDER-BY-type';
+SELECT '- ORDER-BY-type';
 WITH [0.0, 2.0] AS reference_vec
 SELECT id, vec, cosineDistance(vec, reference_vec)
 FROM tab

From cc5c64e1ede7284d91ada1f28edbb18a457f5894 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 10 Jun 2024 19:48:51 +0000
Subject: [PATCH 221/265] Add migration helper for legacy 'annoy' and 'usearch'
 indexes types

Index types 'annoy' and 'usearch' were removed and replaced by
'vector_similarity' indexes in an earlier commit.

This means unfortuantely, that if customers have tables with these
indexes and upgrade, their database might not start anymore - the
system loads the metadata at startup, thinks something is wrong with
such tables, and halts immediately.

This commit adds support for loading and attaching such indexes back.
Data insert or use (search) return an error which recommends a migration
to 'vector_similarity' indexes. The implementation is generally similar
to what has recently been implemented for 'full_text' indexes [1, 2].

[1] https://github.com/ClickHouse/ClickHouse/pull/64656
[2] https://github.com/ClickHouse/ClickHouse/pull/64846
---
 .../QueryPlan/ReadFromMergeTree.cpp           |  3 ++
 .../MergeTreeIndexLegacyVectorSimilarity.cpp  | 45 +++++++++++++++++++
 .../MergeTreeIndexLegacyVectorSimilarity.h    | 26 +++++++++++
 src/Storages/MergeTree/MergeTreeIndices.cpp   | 10 +++++
 src/Storages/MergeTree/MergeTreeIndices.h     |  3 ++
 ...earch_legacy_index_compatibility.reference |  2 +
 ...ctor_search_legacy_index_compatibility.sql | 43 ++++++++++++++++++
 7 files changed, 132 insertions(+)
 create mode 100644 src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp
 create mode 100644 src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h
 create mode 100644 tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 1f30725b4d0..348019d7d10 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -25,6 +25,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
 #include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
+#include <Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
 #include <Storages/MergeTree/MergeTreeReadPoolInOrder.h>
@@ -1481,6 +1482,8 @@ static void buildIndexes(
                     if (const auto * vector_similarity_index = typeid_cast<const MergeTreeIndexVectorSimilarity *>(index_helper.get()))
                         condition = vector_similarity_index->createIndexCondition(query_info, context);
 #endif
+                    if (const auto * legacy_vector_similarity_index = typeid_cast<const MergeTreeIndexLegacyVectorSimilarity *>(index_helper.get()))
+                        condition = legacy_vector_similarity_index->createIndexCondition(query_info, context);
                     if (!condition)
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown vector search index {}", index_helper->index.name);
                 }
diff --git a/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp
new file mode 100644
index 00000000000..29de109d4fc
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.cpp
@@ -0,0 +1,45 @@
+#include <Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_INDEX;
+}
+
+MergeTreeIndexLegacyVectorSimilarity::MergeTreeIndexLegacyVectorSimilarity(const IndexDescription & index_)
+    : IMergeTreeIndex(index_)
+{
+}
+
+MergeTreeIndexGranulePtr MergeTreeIndexLegacyVectorSimilarity::createIndexGranule() const
+{
+    throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'");
+}
+
+MergeTreeIndexAggregatorPtr MergeTreeIndexLegacyVectorSimilarity::createIndexAggregator(const MergeTreeWriterSettings &) const
+{
+    throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'");
+}
+
+MergeTreeIndexConditionPtr MergeTreeIndexLegacyVectorSimilarity::createIndexCondition(const SelectQueryInfo &, ContextPtr) const
+{
+    throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'");
+};
+
+MergeTreeIndexConditionPtr MergeTreeIndexLegacyVectorSimilarity::createIndexCondition(const ActionsDAG *, ContextPtr) const
+{
+    throw Exception(ErrorCodes::ILLEGAL_INDEX, "Indexes of type 'annoy' or 'usearch' are no longer supported. Please drop and recreate the index as type 'vector_similarity'");
+}
+
+MergeTreeIndexPtr legacyVectorSimilarityIndexCreator(const IndexDescription & index)
+{
+    return std::make_shared<MergeTreeIndexLegacyVectorSimilarity>(index);
+}
+
+void legacyVectorSimilarityIndexValidator(const IndexDescription &, bool)
+{
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h
new file mode 100644
index 00000000000..1015401823d
--- /dev/null
+++ b/src/Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <Storages/MergeTree/VectorSimilarityCondition.h>
+
+/// Walking corpse implementation for removed skipping index of type "annoy" and "usearch".
+/// Its only purpose is to allow loading old tables with indexes of these types.
+/// Data insertion and index usage/search will throw an exception, suggesting to migrate to "vector_similarity" indexes.
+
+namespace DB
+{
+
+class MergeTreeIndexLegacyVectorSimilarity : public IMergeTreeIndex
+{
+public:
+    explicit MergeTreeIndexLegacyVectorSimilarity(const IndexDescription & index_);
+    ~MergeTreeIndexLegacyVectorSimilarity() override = default;
+
+    MergeTreeIndexGranulePtr createIndexGranule() const override;
+    MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings &) const override;
+    MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo &, ContextPtr) const;
+    MergeTreeIndexConditionPtr createIndexCondition(const ActionsDAG *, ContextPtr) const override;
+
+    bool isVectorSimilarityIndex() const override { return true; }
+};
+
+}
diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp
index 89aed7873a4..d2fc0e84b56 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndices.cpp
@@ -132,6 +132,16 @@ MergeTreeIndexFactory::MergeTreeIndexFactory()
     registerCreator("vector_similarity", vectorSimilarityIndexCreator);
     registerValidator("vector_similarity", vectorSimilarityIndexValidator);
 #endif
+    /// ------
+    /// TODO: remove this block at the end of 2024.
+    /// Index types 'annoy' and 'usearch' are no longer supported as of June 2024. Their successor is index type 'vector_similarity'.
+    /// To support loading tables with old indexes during a transition period, register dummy indexes which allow load/attaching but
+    /// throw an exception when the user attempts to use them.
+    registerCreator("annoy", legacyVectorSimilarityIndexCreator);
+    registerValidator("annoy", legacyVectorSimilarityIndexValidator);
+    registerCreator("usearch", legacyVectorSimilarityIndexCreator);
+    registerValidator("usearch", legacyVectorSimilarityIndexValidator);
+    /// ------
 
     registerCreator("inverted", fullTextIndexCreator);
     registerValidator("inverted", fullTextIndexValidator);
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index 48ef2a4739e..c52d7ffe131 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -243,6 +243,9 @@ MergeTreeIndexPtr vectorSimilarityIndexCreator(const IndexDescription & index);
 void vectorSimilarityIndexValidator(const IndexDescription & index, bool attach);
 #endif
 
+MergeTreeIndexPtr legacyVectorSimilarityIndexCreator(const IndexDescription & index);
+void legacyVectorSimilarityIndexValidator(const IndexDescription & index, bool attach);
+
 MergeTreeIndexPtr fullTextIndexCreator(const IndexDescription & index);
 void fullTextIndexValidator(const IndexDescription & index, bool attach);
 
diff --git a/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference
new file mode 100644
index 00000000000..030bfa9b1bd
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.reference
@@ -0,0 +1,2 @@
+Annoy
+Usearch
diff --git a/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql
new file mode 100644
index 00000000000..0889aa74f7a
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_legacy_index_compatibility.sql
@@ -0,0 +1,43 @@
+-- Indexes of type 'annoy' or 'usearch' are no longer supported.
+-- Test what happens when ClickHouse encounters tables with the old index type.
+
+DROP TABLE IF EXISTS tab;
+
+SELECT 'Annoy';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX vec_idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); -- { serverError ILLEGAL_INDEX }
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+-- (*) The search succeeds because the index contains no data (i.e. some shortcut)
+-- If it had data (can't really test in SQL tests ...), this statement would also return an error, trust me.
+
+-- Detach and attach should work.
+DETACH TABLE tab;
+ATTACH TABLE tab;
+
+DROP TABLE tab;
+
+SELECT 'Usearch';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX vec_idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]); -- { serverError ILLEGAL_INDEX }
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+-- see above: (*)
+
+-- Detach and attach should work.
+DETACH TABLE tab;
+ATTACH TABLE tab;
+
+DROP TABLE tab;

From d2e79f0b92936eb3ec3f6409fe6db18a3091919d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 9 Aug 2024 15:28:38 +0000
Subject: [PATCH 222/265] Rework vector index parameters

USearch (similar to FAISS) allows to specify the distance function,
quantization, and various HNSW meta-parameters for index creation and
sarch. Some users wished for greater configurability, so let's expose
them.

Index creation now requires either
- 2 parameters (with the other 4 parameters taking on default values), or
- 6 parameters for full control

This commit also remove quantization `f64` (that would be upsampling).
---
 .../mergetree-family/annindexes.md            |  12 +-
 .../MergeTreeIndexVectorSimilarity.cpp        | 166 ++++++++++++------
 .../MergeTreeIndexVectorSimilarity.h          |  23 ++-
 .../0_stateless/02354_vector_search_bugs.sql  |   6 +-
 ...2354_vector_search_default_granularity.sql |   4 +-
 .../02354_vector_search_detach_attach.sql     |   2 +-
 ...r_search_index_creation_negative.reference |  12 +-
 ..._vector_search_index_creation_negative.sql |  48 +++--
 ...4_vector_search_multiple_indexes.reference |   0
 .../02354_vector_search_multiple_indexes.sql  |  14 ++
 .../02354_vector_search_queries.sql           |   8 +-
 11 files changed, 203 insertions(+), 92 deletions(-)
 create mode 100644 tests/queries/0_stateless/02354_vector_search_multiple_indexes.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql

diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index 63c061a0d46..354fac6ea74 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -43,12 +43,22 @@ CREATE TABLE table
 (
   id Int64,
   vectors Array(Float32),
-  INDEX [index_name vectors TYPE vector_similarity([Distance[, ScalarKind]]) [GRANULARITY [N]]
+  INDEX index_name vec TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
 )
 ENGINE = MergeTree
 ORDER BY id;
 ```
 
+Parameters:
+- `method`: Supports currently only `hnsw`.
+- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a
+  line between two points in Euclidean space), or `cosineDistance` (the [cosine
+  distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors).
+- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`)
+- `m`: the number of neighbors per graph node (optional, default: 16)
+- `ef_construction`: (optional, default: 128)
+- `ef_search`: (optional, default: 64)
+
 Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
 algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
 similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
index 6f3b1b043cd..5b0793fa0c8 100644
--- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.cpp
@@ -45,6 +45,9 @@ namespace ErrorCodes
 namespace
 {
 
+/// The only indexing method currently supported by USearch
+std::set<String> methods = {"hnsw"};
+
 /// Maps from user-facing name to internal name
 std::unordered_map<String, unum::usearch::metric_kind_t> distanceFunctionToMetricKind = {
     {"L2Distance", unum::usearch::metric_kind_t::l2sq_k},
@@ -52,22 +55,37 @@ std::unordered_map<String, unum::usearch::metric_kind_t> distanceFunctionToMetri
 
 /// Maps from user-facing name to internal name
 std::unordered_map<String, unum::usearch::scalar_kind_t> quantizationToScalarKind = {
-    {"f64", unum::usearch::scalar_kind_t::f64_k},
     {"f32", unum::usearch::scalar_kind_t::f32_k},
     {"f16", unum::usearch::scalar_kind_t::f16_k},
     {"i8", unum::usearch::scalar_kind_t::i8_k}};
 
+template<typename T>
+concept is_set = std::same_as<T, std::set<typename T::key_type, typename T::key_compare, typename T::allocator_type>>;
+
+template<typename T>
+concept is_unordered_map = std::same_as<T, std::unordered_map<typename T::key_type, typename T::mapped_type, typename T::hasher, typename T::key_equal, typename T::allocator_type>>;
+
 template <typename T>
-String keysAsString(const T & t)
+String joinByComma(const T & t)
 {
-    String result;
-    for (const auto & [k, _] : t)
+    if constexpr (is_set<T>)
     {
-        if (!result.empty())
-            result += ", ";
-        result += k;
+        return fmt::format("{}", fmt::join(t, ", "));
     }
-    return result;
+    else if constexpr (is_unordered_map<T>)
+    {
+        String joined_keys;
+        for (const auto & [k, _] : t)
+        {
+            if (!joined_keys.empty())
+                joined_keys += ", ";
+            joined_keys += k;
+        }
+        return joined_keys;
+    }
+    /// TODO once our libcxx is recent enough, replace above by
+    ///      return fmt::format("{}", fmt::join(std::views::keys(t)), ", "));
+    std::unreachable();
 }
 
 }
@@ -75,8 +93,10 @@ String keysAsString(const T & t)
 USearchIndexWithSerialization::USearchIndexWithSerialization(
     size_t dimensions,
     unum::usearch::metric_kind_t metric_kind,
-    unum::usearch::scalar_kind_t scalar_kind)
-    : Base(Base::make(unum::usearch::metric_punned_t(dimensions, metric_kind, scalar_kind)))
+    unum::usearch::scalar_kind_t scalar_kind,
+    UsearchHnswParams usearch_hnsw_params)
+    : Base(Base::make(unum::usearch::metric_punned_t(dimensions, metric_kind, scalar_kind),
+                      unum::usearch::index_dense_config_t(usearch_hnsw_params.m, usearch_hnsw_params.ef_construction, usearch_hnsw_params.ef_search)))
 {
 }
 
@@ -125,8 +145,9 @@ MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity(
     const String & index_name_,
     const Block & index_sample_block_,
     unum::usearch::metric_kind_t metric_kind_,
-    unum::usearch::scalar_kind_t scalar_kind_)
-    : MergeTreeIndexGranuleVectorSimilarity(index_name_, index_sample_block_, metric_kind_, scalar_kind_, nullptr)
+    unum::usearch::scalar_kind_t scalar_kind_,
+    UsearchHnswParams usearch_hnsw_params_)
+    : MergeTreeIndexGranuleVectorSimilarity(index_name_, index_sample_block_, metric_kind_, scalar_kind_, usearch_hnsw_params_, nullptr)
 {
 }
 
@@ -135,11 +156,13 @@ MergeTreeIndexGranuleVectorSimilarity::MergeTreeIndexGranuleVectorSimilarity(
     const Block & index_sample_block_,
     unum::usearch::metric_kind_t metric_kind_,
     unum::usearch::scalar_kind_t scalar_kind_,
+    UsearchHnswParams usearch_hnsw_params_,
     USearchIndexWithSerializationPtr index_)
     : index_name(index_name_)
     , index_sample_block(index_sample_block_)
     , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
+    , usearch_hnsw_params(usearch_hnsw_params_)
     , index(std::move(index_))
 {
 }
@@ -153,8 +176,8 @@ void MergeTreeIndexGranuleVectorSimilarity::serializeBinary(WriteBuffer & ostr)
 
     /// Number of dimensions is required in the index constructor,
     /// so it must be written and read separately from the other part
-    writeIntBinary(static_cast<UInt64>(index->dimensions()), ostr); // write dimension
-                                                                    //
+    writeIntBinary(static_cast<UInt64>(index->dimensions()), ostr);
+
     index->serialize(ostr);
 
     auto statistics = index->getStatistics();
@@ -176,7 +199,7 @@ void MergeTreeIndexGranuleVectorSimilarity::deserializeBinary(ReadBuffer & istr,
 
     UInt64 dimension;
     readIntBinary(dimension, istr);
-    index = std::make_shared<USearchIndexWithSerialization>(dimension, metric_kind, scalar_kind);
+    index = std::make_shared<USearchIndexWithSerialization>(dimension, metric_kind, scalar_kind, usearch_hnsw_params);
 
     index->deserialize(istr);
 
@@ -189,17 +212,19 @@ MergeTreeIndexAggregatorVectorSimilarity::MergeTreeIndexAggregatorVectorSimilari
     const String & index_name_,
     const Block & index_sample_block_,
     unum::usearch::metric_kind_t metric_kind_,
-    unum::usearch::scalar_kind_t scalar_kind_)
+    unum::usearch::scalar_kind_t scalar_kind_,
+    UsearchHnswParams usearch_hnsw_params_)
     : index_name(index_name_)
     , index_sample_block(index_sample_block_)
     , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
+    , usearch_hnsw_params(usearch_hnsw_params_)
 {
 }
 
 MergeTreeIndexGranulePtr MergeTreeIndexAggregatorVectorSimilarity::getGranuleAndReset()
 {
-    auto granule = std::make_shared<MergeTreeIndexGranuleVectorSimilarity>(index_name, index_sample_block, metric_kind, scalar_kind, index);
+    auto granule = std::make_shared<MergeTreeIndexGranuleVectorSimilarity>(index_name, index_sample_block, metric_kind, scalar_kind, usearch_hnsw_params, index);
     index = nullptr;
     return granule;
 }
@@ -258,15 +283,15 @@ void MergeTreeIndexAggregatorVectorSimilarity::update(const Block & block, size_
             throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
         if (!index)
-            index = std::make_shared<USearchIndexWithSerialization>(dimensions, metric_kind, scalar_kind);
+            index = std::make_shared<USearchIndexWithSerialization>(dimensions, metric_kind, scalar_kind, usearch_hnsw_params);
 
         /// Reserving space is mandatory
         if (!index->reserve(roundUpToPowerOfTwoOrZero(index->size() + num_rows)))
             throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Could not reserve memory for vector similarity index");
 
-        for (size_t current_row = 0; current_row < num_rows; ++current_row)
+        for (size_t row = 0; row < num_rows; ++row)
         {
-            auto rc = index->add(static_cast<UInt32>(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
+            auto rc = index->add(static_cast<UInt32>(index->size()), &column_array_data_float_data[column_array_offsets[row - 1]]);
             if (!rc)
                 throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not add data to vector similarity index, error: " + String(rc.error.release()));
 
@@ -313,8 +338,6 @@ std::vector<size_t> MergeTreeIndexConditionVectorSimilarity::getUsefulRanges(Mer
     const UInt64 limit = vector_similarity_condition.getLimit();
     const UInt64 index_granularity = vector_similarity_condition.getIndexGranularity();
 
-    const std::vector<float> reference_vector = vector_similarity_condition.getReferenceVector();
-
     const auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleVectorSimilarity>(granule_);
     if (granule == nullptr)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type");
@@ -326,6 +349,8 @@ std::vector<size_t> MergeTreeIndexConditionVectorSimilarity::getUsefulRanges(Mer
             "does not match the dimension in the index ({})",
             vector_similarity_condition.getDimensions(), index->dimensions());
 
+    const std::vector<float> reference_vector = vector_similarity_condition.getReferenceVector();
+
     auto result = index->search(reference_vector.data(), limit);
     if (result.error)
         throw Exception::createRuntime(ErrorCodes::INCORRECT_DATA, "Could not search in vector similarity index, error: " + String(result.error.release()));
@@ -350,21 +375,26 @@ std::vector<size_t> MergeTreeIndexConditionVectorSimilarity::getUsefulRanges(Mer
     return granules;
 }
 
-MergeTreeIndexVectorSimilarity::MergeTreeIndexVectorSimilarity(const IndexDescription & index_, unum::usearch::metric_kind_t metric_kind_, unum::usearch::scalar_kind_t scalar_kind_)
+MergeTreeIndexVectorSimilarity::MergeTreeIndexVectorSimilarity(
+    const IndexDescription & index_,
+    unum::usearch::metric_kind_t metric_kind_,
+    unum::usearch::scalar_kind_t scalar_kind_,
+    UsearchHnswParams usearch_hnsw_params_)
     : IMergeTreeIndex(index_)
     , metric_kind(metric_kind_)
     , scalar_kind(scalar_kind_)
+    , usearch_hnsw_params(usearch_hnsw_params_)
 {
 }
 
 MergeTreeIndexGranulePtr MergeTreeIndexVectorSimilarity::createIndexGranule() const
 {
-    return std::make_shared<MergeTreeIndexGranuleVectorSimilarity>(index.name, index.sample_block, metric_kind, scalar_kind);
+    return std::make_shared<MergeTreeIndexGranuleVectorSimilarity>(index.name, index.sample_block, metric_kind, scalar_kind, usearch_hnsw_params);
 }
 
 MergeTreeIndexAggregatorPtr MergeTreeIndexVectorSimilarity::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const
 {
-    return std::make_shared<MergeTreeIndexAggregatorVectorSimilarity>(index.name, index.sample_block, metric_kind, scalar_kind);
+    return std::make_shared<MergeTreeIndexAggregatorVectorSimilarity>(index.name, index.sample_block, metric_kind, scalar_kind, usearch_hnsw_params);
 }
 
 MergeTreeIndexConditionPtr MergeTreeIndexVectorSimilarity::createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const
@@ -379,56 +409,82 @@ MergeTreeIndexConditionPtr MergeTreeIndexVectorSimilarity::createIndexCondition(
 
 MergeTreeIndexPtr vectorSimilarityIndexCreator(const IndexDescription & index)
 {
-    static constexpr auto default_metric_kind = unum::usearch::metric_kind_t::l2sq_k;
-    auto metric_kind = default_metric_kind;
-    if (!index.arguments.empty())
-        metric_kind = distanceFunctionToMetricKind.at(index.arguments[0].safeGet<String>());
+    const bool has_six_args = (index.arguments.size() == 6);
 
-    static constexpr auto default_scalar_kind = unum::usearch::scalar_kind_t::f16_k;
-    auto scalar_kind = default_scalar_kind;
-    if (index.arguments.size() > 1)
-        scalar_kind = quantizationToScalarKind.at(index.arguments[1].safeGet<String>());
+    unum::usearch::metric_kind_t metric_kind = distanceFunctionToMetricKind.at(index.arguments[1].safeGet<String>());
 
-    return std::make_shared<MergeTreeIndexVectorSimilarity>(index, metric_kind, scalar_kind);
+    /// use defaults for the other parameters
+    unum::usearch::scalar_kind_t scalar_kind = unum::usearch::scalar_kind_t::f32_k;
+    UsearchHnswParams usearch_hnsw_params;
+
+    if (has_six_args)
+    {
+        scalar_kind = quantizationToScalarKind.at(index.arguments[2].safeGet<String>());
+        usearch_hnsw_params = {.m               = index.arguments[3].safeGet<UInt64>(),
+                               .ef_construction = index.arguments[4].safeGet<UInt64>(),
+                               .ef_search       = index.arguments[5].safeGet<UInt64>()};
+    }
+
+    return std::make_shared<MergeTreeIndexVectorSimilarity>(index, metric_kind, scalar_kind, usearch_hnsw_params);
 }
 
 void vectorSimilarityIndexValidator(const IndexDescription & index, bool /* attach */)
 {
-    /// Check number and type of index arguments:
+    const bool has_two_args = (index.arguments.size() == 2);
+    const bool has_six_args = (index.arguments.size() == 6);
 
-    if (index.arguments.size() > 2)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index must not have more than one parameters");
+    /// Check number and type of arguments
+    if (!has_two_args && !has_six_args)
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Vector similarity index must have two or six arguments");
+    if (index.arguments[0].getType() != Field::Types::String)
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "First argument of vector similarity index (method) must be of type String");
+    if (index.arguments[1].getType() != Field::Types::String)
+        throw Exception(ErrorCodes::INCORRECT_QUERY, "Second argument of vector similarity index (metric) must be of type String");
+    if (has_six_args)
+    {
+        if (index.arguments[2].getType() != Field::Types::String)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Third argument of vector similarity index (quantization) must be of type String");
+        if (index.arguments[3].getType() != Field::Types::UInt64)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Fourth argument of vector similarity index (M) must be of type UInt64");
+        if (index.arguments[4].getType() != Field::Types::UInt64)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Fifth argument of vector similarity index (ef_construction) must be of type UInt64");
+        if (index.arguments[5].getType() != Field::Types::UInt64)
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "Sixth argument of vector similarity index (ef_search) must be of type UInt64");
+    }
 
-    if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::String)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "First argument of vector similarity index (distance function) must be of type String");
-    if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String)
-        throw Exception(ErrorCodes::INCORRECT_QUERY, "Second argument of vector similarity index (scalar type) must be of type String");
+    /// Check that passed arguments are supported
+    if (!methods.contains(index.arguments[0].safeGet<String>()))
+        throw Exception(ErrorCodes::INCORRECT_DATA, "First argument (method) of vector similarity index is not supported. Supported methods are: {}", joinByComma(methods));
+    if (!distanceFunctionToMetricKind.contains(index.arguments[1].safeGet<String>()))
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Second argument (distance function) of vector similarity index is not supported. Supported distance function are: {}", joinByComma(distanceFunctionToMetricKind));
+    if (has_six_args)
+    {
+        if (!quantizationToScalarKind.contains(index.arguments[2].safeGet<String>()))
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Third argument (quantization) of vector similarity index is not supported. Supported quantizations are: {}", joinByComma(quantizationToScalarKind));
+        if (index.arguments[3].safeGet<UInt64>() < 2)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Fourth argument (M) of vector similarity index must be > 1");
+        if (index.arguments[4].safeGet<UInt64>() < 1)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Fifth argument (ef_construction) of vector similarity index must be > 0");
+        if (index.arguments[5].safeGet<UInt64>() < 1)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Sixth argument (ef_search) of vector similarity index must be > 0");
+    }
 
     /// Check that the index is created on a single column
-
     if (index.column_names.size() != 1 || index.data_types.size() != 1)
         throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Vector similarity indexes must be created on a single column");
 
-    /// Check that a supported metric was passed as first argument
-
-    if (!index.arguments.empty() && !distanceFunctionToMetricKind.contains(index.arguments[0].safeGet<String>()))
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized metric kind (first argument) for vector index. Supported kinds are: {}", keysAsString(distanceFunctionToMetricKind));
-
-    /// Check that a supported kind was passed as a second argument
-
-    if (index.arguments.size() > 1 && !quantizationToScalarKind.contains(index.arguments[1].safeGet<String>()))
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Unrecognized scalar kind (second argument) for vector index. Supported kinds are: {}", keysAsString(quantizationToScalarKind));
-
-    /// Check data type of indexed column:
+    /// Check data type of the indexed column:
     DataTypePtr data_type = index.sample_block.getDataTypes()[0];
     if (const auto * data_type_array = typeid_cast<const DataTypeArray *>(data_type.get()))
     {
         TypeIndex nested_type_index = data_type_array->getNestedType()->getTypeId();
         if (!WhichDataType(nested_type_index).isFloat32())
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity index can only be created on columns of type Array(Float32)");
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float32)");
     }
     else
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity index can only be created on columns of type Array(Float32)");
+    {
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Vector similarity indexes can only be created on columns of type Array(Float32)");
+    }
 }
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h
index 95ea3cd5240..f7098c1626c 100644
--- a/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h
+++ b/src/Storages/MergeTree/MergeTreeIndexVectorSimilarity.h
@@ -14,6 +14,13 @@
 namespace DB
 {
 
+struct UsearchHnswParams
+{
+    size_t m = unum::usearch::default_connectivity();
+    size_t ef_construction = unum::usearch::default_expansion_add();
+    size_t ef_search = unum::usearch::default_expansion_search();
+};
+
 using USearchIndex = unum::usearch::index_dense_gt</*key_at*/ uint32_t, /*compressed_slot_at*/ uint32_t>;
 
 class USearchIndexWithSerialization : public USearchIndex
@@ -24,7 +31,8 @@ public:
     USearchIndexWithSerialization(
         size_t dimensions,
         unum::usearch::metric_kind_t metric_kind,
-        unum::usearch::scalar_kind_t scalar_kind);
+        unum::usearch::scalar_kind_t scalar_kind,
+        UsearchHnswParams usearch_hnsw_params);
 
     void serialize(WriteBuffer & ostr) const;
     void deserialize(ReadBuffer & istr);
@@ -54,13 +62,15 @@ struct MergeTreeIndexGranuleVectorSimilarity final : public IMergeTreeIndexGranu
         const String & index_name_,
         const Block & index_sample_block_,
         unum::usearch::metric_kind_t metric_kind_,
-        unum::usearch::scalar_kind_t scalar_kind_);
+        unum::usearch::scalar_kind_t scalar_kind_,
+        UsearchHnswParams usearch_hnsw_params_);
 
     MergeTreeIndexGranuleVectorSimilarity(
         const String & index_name_,
         const Block & index_sample_block_,
         unum::usearch::metric_kind_t metric_kind_,
         unum::usearch::scalar_kind_t scalar_kind_,
+        UsearchHnswParams usearch_hnsw_params_,
         USearchIndexWithSerializationPtr index_);
 
     ~MergeTreeIndexGranuleVectorSimilarity() override = default;
@@ -74,6 +84,7 @@ struct MergeTreeIndexGranuleVectorSimilarity final : public IMergeTreeIndexGranu
     const Block index_sample_block;
     const unum::usearch::metric_kind_t metric_kind;
     const unum::usearch::scalar_kind_t scalar_kind;
+    const UsearchHnswParams usearch_hnsw_params;
     USearchIndexWithSerializationPtr index;
 
     LoggerPtr logger = getLogger("VectorSimilarityIndex");
@@ -93,7 +104,8 @@ struct MergeTreeIndexAggregatorVectorSimilarity final : IMergeTreeIndexAggregato
         const String & index_name_,
         const Block & index_sample_block,
         unum::usearch::metric_kind_t metric_kind_,
-        unum::usearch::scalar_kind_t scalar_kind_);
+        unum::usearch::scalar_kind_t scalar_kind_,
+        UsearchHnswParams usearch_hnsw_params_);
 
     ~MergeTreeIndexAggregatorVectorSimilarity() override = default;
 
@@ -105,6 +117,7 @@ struct MergeTreeIndexAggregatorVectorSimilarity final : IMergeTreeIndexAggregato
     const Block index_sample_block;
     const unum::usearch::metric_kind_t metric_kind;
     const unum::usearch::scalar_kind_t scalar_kind;
+    const UsearchHnswParams usearch_hnsw_params;
     USearchIndexWithSerializationPtr index;
 };
 
@@ -136,7 +149,8 @@ public:
     MergeTreeIndexVectorSimilarity(
         const IndexDescription & index_,
         unum::usearch::metric_kind_t metric_kind_,
-        unum::usearch::scalar_kind_t scalar_kind_);
+        unum::usearch::scalar_kind_t scalar_kind_,
+        UsearchHnswParams usearch_hnsw_params_);
 
     ~MergeTreeIndexVectorSimilarity() override = default;
 
@@ -149,6 +163,7 @@ public:
 private:
     const unum::usearch::metric_kind_t metric_kind;
     const unum::usearch::scalar_kind_t scalar_kind;
+    const UsearchHnswParams usearch_hnsw_params;
 };
 
 }
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql
index 2ef75d0a7fe..7c66b4b8e45 100644
--- a/tests/queries/0_stateless/02354_vector_search_bugs.sql
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql
@@ -9,14 +9,14 @@ DROP TABLE IF EXISTS tab;
 
 SELECT 'Issue #52258: Empty Arrays or Arrays with default values are rejected';
 
-CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree() ORDER BY id;
+CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree() ORDER BY id;
 INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
 INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
 
 SELECT 'It is possible to create parts with different Array vector sizes but there will be an error at query time';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id;
 SYSTEM STOP MERGES tab;
 INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]);
 INSERT INTO tab values (2, [2.2, 2.3, 2.4]) (3, [3.1, 3.2, 3.3]);
@@ -31,7 +31,7 @@ DROP TABLE tab;
 
 SELECT 'Correctness of index with > 1 mark';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192; -- disable adaptive granularity due to bug
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192; -- disable adaptive granularity due to bug
 INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000);
 
 WITH [1.0, 0.0] AS reference_vec
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
index a19a0d17536..acb69cb6ff8 100644
--- a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
@@ -6,13 +6,13 @@ SET allow_experimental_vector_similarity_index = 1;
 
 -- After CREATE TABLE
 DROP TABLE IF EXISTS tab;
-CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE vector_similarity) ENGINE = MergeTree ORDER BY id;
+CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id;
 SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
 
 -- After ALTER TABLE
 DROP TABLE tab;
 CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE = MergeTree ORDER BY id;
-ALTER TABLE tab ADD INDEX idx(vec) TYPE vector_similarity;
+ALTER TABLE tab ADD INDEX idx(vec) TYPE vector_similarity('hnsw', 'L2Distance');
 SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
 
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_detach_attach.sql b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
index 36241dfabf7..f92eaddbbed 100644
--- a/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
+++ b/tests/queries/0_stateless/02354_vector_search_detach_attach.sql
@@ -5,7 +5,7 @@
 SET allow_experimental_vector_similarity_index = 1;
 
 DROP TABLE IF EXISTS tab;
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
 
 DETACH TABLE tab SYNC;
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
index bee3236f436..b6d034208d0 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
@@ -1,8 +1,10 @@
-At most two index arguments
-1st argument (distance function) must be String
-Unsupported distance functions are rejected
-2nd argument (scalar kind) must be String
-Unsupported scalar kinds are rejected
+Two or six index arguments
+1st argument (method) must be String and hnsw
+2nd argument (distance function) must be String and L2Distance or cosineDistance
+3nd argument (quantization), if given, must be String and f32, f16, ...
+4nd argument (M), if given, must be UInt64 and > 1
+5nd argument (ef_construction), if given, must be UInt64 and > 0
+6nd argument (ef_search), if given, must be UInt64 and > 0
 Must be created on single column
 Must be created on Array(Float32) columns
 Rejects INSERTs of Arrays with different sizes
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
index 912f7d7fcae..7c2ddfe81fc 100644
--- a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
@@ -6,32 +6,46 @@ SET allow_experimental_vector_similarity_index = 1;
 
 DROP TABLE IF EXISTS tab;
 
-SELECT 'At most two index arguments';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('too', 'many', 'args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+SELECT 'Two or six index arguments';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cant_have_one_arg')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cant', 'have', 'three_args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cant', 'have', 'more', 'than', 'six', 'args', '!')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
 
-SELECT '1st argument (distance function) must be String';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+SELECT '1st argument (method) must be String and hnsw';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity(3, 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('not_hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
-SELECT 'Unsupported distance functions are rejected';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+SELECT '2nd argument (distance function) must be String and L2Distance or cosineDistance';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'invalid_distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
-SELECT '2nd argument (scalar kind) must be String';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-
-SELECT 'Unsupported scalar kinds are rejected';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('L2Distance', 'invalidKind')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+SELECT '3nd argument (quantization), if given, must be String and f32, f16, ...';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 1, 1, 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'invalid', 2, 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+SELECT '4nd argument (M), if given, must be UInt64 and > 1';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 'invalid', 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 1, 1, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+SELECT '5nd argument (ef_construction), if given, must be UInt64 and > 0';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 2, 'invalid', 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 2, 0, 1)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+SELECT '6nd argument (ef_search), if given, must be UInt64 and > 0';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 2, 1, 'invalid')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance', 'f32', 2, 1, 0)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
 
 SELECT 'Must be created on single column';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
 
 SELECT 'Must be created on Array(Float32) columns';
 SET allow_suspicious_low_cardinality_types = 1;
-CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec UInt64, INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
 
 SELECT 'Rejects INSERTs of Arrays with different sizes';
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id;
 INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
 DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_multiple_indexes.reference b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql
new file mode 100644
index 00000000000..f1cfc041233
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_multiple_indexes.sql
@@ -0,0 +1,14 @@
+-- Tags: no-fasttest, no-ordinary-database
+
+-- Tests that multiple vector similarity indexes can be created on the same column (even if that makes no sense)
+
+SET allow_experimental_vector_similarity_index = 1;
+
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity('hnsw', 'L2Distance'));
+
+ALTER TABLE tab ADD INDEX idx(vec) TYPE minmax;
+ALTER TABLE tab ADD INDEX vec_idx1(vec) TYPE vector_similarity('hnsw', 'cosineDistance');
+ALTER TABLE tab ADD INDEX vec_idx2(vec) TYPE vector_similarity('hnsw', 'L2Distance'); -- silly but creating the same index also works for non-vector indexes ...
+
+DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
index 50537ad6244..dbf0fca32ab 100644
--- a/tests/queries/0_stateless/02354_vector_search_queries.sql
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -10,7 +10,7 @@ SELECT '10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule
 
 DROP TABLE IF EXISTS tab;
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance')) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
 
 
@@ -34,7 +34,7 @@ DROP TABLE tab;
 
 SELECT '12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block';
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'L2Distance') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
 
 SELECT '- ORDER-BY-type';
@@ -56,9 +56,9 @@ DROP TABLE tab;
 
 
 SELECT 'Special cases'; -- Not a systematic test, just to check that no bad things happen.
--- Just for jun, use metric = 'cosineDistance', scalarKind = 'f64'
+-- Test with non-default metric, M, ef_construction, ef_search
 
-CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE vector_similarity('hnsw', 'cosineDistance', 'f32', 42, 99, 66) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
 INSERT INTO tab VALUES (0, [4.6, 2.3]), (1, [2.0, 3.2]), (2, [4.2, 3.4]), (3, [5.3, 2.9]), (4, [2.4, 5.2]), (5, [5.3, 2.3]), (6, [1.0, 9.3]), (7, [5.5, 4.7]), (8, [6.4, 3.5]), (9, [5.3, 2.5]), (10, [6.4, 3.4]), (11, [6.4, 3.2]);
 
 SELECT '- ORDER-BY-type';

From fb76cb90b1badef334b96b61d976136fd38d535d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 11 Aug 2024 09:31:36 +0000
Subject: [PATCH 223/265] Allow un-quoted skip index parameters

Previously, only this syntax to create a skip index worked:

   INDEX index_name column_name TYPE vector_similarity('hnsw', 'L2Distance')

Now, this syntax will work as well:

  INDEX index_name column_name TYPE vector_similarity(hnsw, L2Distance)
---
 .../mergetree-family/annindexes.md            | 15 +++++++++++-
 src/Storages/IndicesDescription.cpp           | 12 +++++++---
 ...search_unquoted_index_parameters.reference |  0
 ...ector_search_unquoted_index_parameters.sql | 23 +++++++++++++++++++
 4 files changed, 46 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.sql

diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index 354fac6ea74..e73d6f07a32 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -43,7 +43,7 @@ CREATE TABLE table
 (
   id Int64,
   vectors Array(Float32),
-  INDEX index_name vec TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
+  INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
 )
 ENGINE = MergeTree
 ORDER BY id;
@@ -59,6 +59,19 @@ Parameters:
 - `ef_construction`: (optional, default: 128)
 - `ef_search`: (optional, default: 64)
 
+Example:
+
+```sql
+CREATE TABLE table
+(
+  id Int64,
+  vectors Array(Float32),
+  INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance)
+)
+ENGINE = MergeTree
+ORDER BY id;
+```
+
 Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
 algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
 similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp
index cef8fd85f97..753fbf1d635 100644
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@@ -3,6 +3,7 @@
 #include <Storages/IndicesDescription.h>
 
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ParserCreateQuery.h>
@@ -130,10 +131,15 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
     {
         for (size_t i = 0; i < index_type->arguments->children.size(); ++i)
         {
-            const auto * argument = index_type->arguments->children[i]->as<ASTLiteral>();
-            if (!argument)
+            const auto & child = index_type->arguments->children[i];
+            if (const auto * ast_literal = child->as<ASTLiteral>(); ast_literal != nullptr)
+                /// E.g. INDEX index_name column_name TYPE vector_similarity('hnsw', 'f32')
+                result.arguments.emplace_back(ast_literal->value);
+            else if (const auto * ast_identifier = child->as<ASTIdentifier>(); ast_identifier != nullptr)
+                /// E.g. INDEX index_name column_name TYPE vector_similarity(hnsw, f32)
+                result.arguments.emplace_back(ast_identifier->name());
+            else
                 throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments");
-            result.arguments.emplace_back(argument->value);
         }
     }
 
diff --git a/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.reference b/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.sql b/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.sql
new file mode 100644
index 00000000000..da6494bf831
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_unquoted_index_parameters.sql
@@ -0,0 +1,23 @@
+-- Tags: no-fasttest, no-ordinary-database
+
+SET allow_experimental_vector_similarity_index = 1;
+
+-- Tests that quoted and unquoted parameters can be passed to vector search indexes.
+
+DROP TABLE IF EXISTS tab1;
+DROP TABLE IF EXISTS tab2;
+
+CREATE TABLE tab1 (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity('hnsw', 'L2Distance'));
+CREATE TABLE tab2 (id Int32, vec Array(Float32), PRIMARY KEY id, INDEX vec_idx(vec) TYPE vector_similarity(hnsw, L2Distance));
+
+DROP TABLE tab1;
+DROP TABLE tab2;
+
+CREATE TABLE tab1 (id Int32, vec Array(Float32), PRIMARY KEY id);
+CREATE TABLE tab2 (id Int32, vec Array(Float32), PRIMARY KEY id);
+
+ALTER TABLE tab1 ADD INDEX idx1(vec) TYPE vector_similarity('hnsw', 'L2Distance');
+ALTER TABLE tab2 ADD INDEX idx2(vec) TYPE vector_similarity(hnsw, L2Distance);
+
+DROP TABLE tab1;
+DROP TABLE tab2;

From ea1cd665750f82bbeaf66f67b1d85e014afdec18 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 12 Aug 2024 17:32:43 +0200
Subject: [PATCH 224/265]  fix tidy

---
 src/Storages/VirtualColumnUtils.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 1abac56d266..3143c7f78f6 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -38,7 +38,6 @@
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/Sinks/EmptySink.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
-#include <Storages/VirtualColumnUtils.h>
 #include <IO/WriteHelpers.h>
 #include <Common/re2.h>
 #include <Common/typeid_cast.h>
@@ -46,15 +45,7 @@
 #include <Formats/EscapingRuleUtils.h>
 #include <Formats/FormatFactory.h>
 #include <Core/Settings.h>
-#include "Functions/FunctionsLogical.h"
-#include "Functions/IFunction.h"
-#include "Functions/IFunctionAdaptors.h"
-#include "Functions/indexHint.h"
 #include <Interpreters/convertFieldToType.h>
-#include <Parsers/makeASTForLogicalFunction.h>
-#include <Columns/ColumnSet.h>
-#include <Functions/FunctionHelpers.h>
-#include <Interpreters/ActionsVisitor.h>
 
 
 namespace DB

From 3a6e05eb43cbb9937cded286ac7259b2f7168057 Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Mon, 12 Aug 2024 18:03:42 +0200
Subject: [PATCH 225/265] try to fix includes

---
 src/Storages/VirtualColumnUtils.cpp | 55 ++++++++++++++++-------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 3143c7f78f6..d932f5cc469 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -1,43 +1,40 @@
 #include <memory>
 #include <stack>
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnSet.h>
-#include <Columns/ColumnsCommon.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/FilterDescription.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/TypeId.h>
-#include <DataTypes/DataTypeDateTime.h>
-#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/FunctionsLogical.h>
-#include <Functions/IFunction.h>
-#include <Functions/IFunctionAdaptors.h>
-#include <Functions/indexHint.h>
-#include <IO/WriteHelpers.h>
-#include <Interpreters/ActionsDAG.h>
-#include <Interpreters/ActionsVisitor.h>
+
 #include <Interpreters/Context.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/misc.h>
-#include <Parsers/ASTExpressionList.h>
-#include <Parsers/ASTFunction.h>
+
 #include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSubquery.h>
-#include <Parsers/makeASTForLogicalFunction.h>
-#include <Processors/Executors/CompletedPipelineExecutor.h>
+
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnsCommon.h>
+#include <Columns/FilterDescription.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeDateTime.h>
+
+#include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
-#include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/Sinks/EmptySink.h>
+#include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
+
+#include <Storages/VirtualColumnUtils.h>
 #include <IO/WriteHelpers.h>
 #include <Common/re2.h>
 #include <Common/typeid_cast.h>
@@ -45,7 +42,15 @@
 #include <Formats/EscapingRuleUtils.h>
 #include <Formats/FormatFactory.h>
 #include <Core/Settings.h>
+#include "Functions/FunctionsLogical.h"
+#include "Functions/IFunction.h"
+#include "Functions/IFunctionAdaptors.h"
+#include "Functions/indexHint.h"
 #include <Interpreters/convertFieldToType.h>
+#include <Parsers/makeASTForLogicalFunction.h>
+#include <Columns/ColumnSet.h>
+#include <Functions/FunctionHelpers.h>
+#include <Interpreters/ActionsVisitor.h>
 
 
 namespace DB

From 858b7e55d0df3db1412d538f701c30584b5783bf Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Mon, 12 Aug 2024 16:16:50 +0000
Subject: [PATCH 226/265] Improve condition in case the default column consumes
 slightly more memory

It never happened in the few hundreds of tests I ran successfully,
but we'd rather be safe than sorry.
---
 .../01903_correct_block_size_prediction_with_default.sh        | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
index 075d9a1dacf..1482730af2c 100755
--- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
+++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh
@@ -28,7 +28,8 @@ function test()
         SYSTEM FLUSH LOGS;
         WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1),
              memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2)
-                SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage FROM memory_1, memory_2;"
+                SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage OR
+                       memory_2.memory_usage <= 1.2 * memory_1.memory_usage FROM memory_1, memory_2;"
 }
 
 test ""

From f7af4c5643af2ee87b81a7972c0bb91cf723c8a2 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 12 Aug 2024 17:27:43 +0100
Subject: [PATCH 227/265] don't report system-wide metrics when cgroup metrics
 present

---
 src/Common/AsynchronousMetrics.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index 02c130d3caa..9b6a7428411 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -75,12 +75,8 @@ AsynchronousMetrics::AsynchronousMetrics(
     , protocol_server_metrics_func(protocol_server_metrics_func_)
 {
 #if defined(OS_LINUX)
-    openFileIfExists("/proc/meminfo", meminfo);
-    openFileIfExists("/proc/loadavg", loadavg);
-    openFileIfExists("/proc/stat", proc_stat);
     openFileIfExists("/proc/cpuinfo", cpuinfo);
     openFileIfExists("/proc/sys/fs/file-nr", file_nr);
-    openFileIfExists("/proc/uptime", uptime);
     openFileIfExists("/proc/net/dev", net_dev);
 
     /// CGroups v2
@@ -103,6 +99,19 @@ AsynchronousMetrics::AsynchronousMetrics(
     if (!cgroupcpu_stat)
         openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat);
 
+    if (!cgroupcpu_stat && !cgroupcpuacct_stat)
+    {
+        /// The following metrics are not cgroup-aware and we've found cgroup-specific metric files for the similar metrics,
+        /// so we're better not reporting them at all to avoid confusion
+        openFileIfExists("/proc/loadavg", loadavg);
+        openFileIfExists("/proc/stat", proc_stat);
+        openFileIfExists("/proc/uptime", uptime);
+    }
+
+    /// The same story for memory metrics
+    if (!cgroupmem_limit_in_bytes)
+        openFileIfExists("/proc/meminfo", meminfo);
+
     openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
     openFileIfExists("/proc/self/maps", vm_maps);
 
@@ -1193,8 +1202,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
     }
-
-    if (meminfo)
+    else if (meminfo)
     {
         try
         {

From f0f10bc0099e659bfc0bf31079e89832f9db4b17 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 12 Aug 2024 17:30:12 +0100
Subject: [PATCH 228/265] remove cgroupsV2MemoryControllerEnabled()

---
 base/base/cgroupsv2.cpp       | 24 ------------------------
 base/base/cgroupsv2.h         |  4 ----
 base/base/getMemoryAmount.cpp |  3 ---
 3 files changed, 31 deletions(-)

diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp
index 4372696c2b7..d8f95b23ae7 100644
--- a/base/base/cgroupsv2.cpp
+++ b/base/base/cgroupsv2.cpp
@@ -27,27 +27,6 @@ bool cgroupsV2Enabled()
 #endif
 }
 
-bool cgroupsV2MemoryControllerEnabled()
-{
-#if defined(OS_LINUX)
-    chassert(cgroupsV2Enabled());
-    /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available
-    /// for the current + child cgroups. The set of available controllers can be restricted from level to level using file
-    /// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file.
-    fs::path cgroup_dir = cgroupV2PathOfProcess();
-    if (cgroup_dir.empty())
-        return false;
-    std::ifstream controllers_file(cgroup_dir / "cgroup.controllers");
-    if (!controllers_file.is_open())
-        return false;
-    std::string controllers;
-    std::getline(controllers_file, controllers);
-    return controllers.find("memory") != std::string::npos;
-#else
-    return false;
-#endif
-}
-
 fs::path cgroupV2PathOfProcess()
 {
 #if defined(OS_LINUX)
@@ -77,9 +56,6 @@ std::optional<std::string> getCgroupsV2PathContainingFile(std::string_view file_
     if (!cgroupsV2Enabled())
         return {};
 
-    if (!cgroupsV2MemoryControllerEnabled())
-        return {};
-
     fs::path current_cgroup = cgroupV2PathOfProcess();
     if (current_cgroup.empty())
         return {};
diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h
index 9d8e178a866..925a399471e 100644
--- a/base/base/cgroupsv2.h
+++ b/base/base/cgroupsv2.h
@@ -12,10 +12,6 @@ static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgrou
 /// Is cgroups v2 enabled on the system?
 bool cgroupsV2Enabled();
 
-/// Is the memory controller of cgroups v2 enabled on the system?
-/// Assumes that cgroupsV2Enabled() is enabled.
-bool cgroupsV2MemoryControllerEnabled();
-
 /// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup.
 /// Returns an empty path the cgroup cannot be determined.
 /// Assumes that cgroupsV2Enabled() is enabled.
diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp
index 03aab1eac72..bbfbecdbffd 100644
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@@ -19,9 +19,6 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
     if (!cgroupsV2Enabled())
         return {};
 
-    if (!cgroupsV2MemoryControllerEnabled())
-        return {};
-
     std::filesystem::path current_cgroup = cgroupV2PathOfProcess();
     if (current_cgroup.empty())
         return {};

From 05b595094868dd29e59ea9c766d0829f57ce94f9 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Mon, 12 Aug 2024 17:31:56 +0100
Subject: [PATCH 229/265] small fix

---
 base/base/cgroupsv2.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp
index d8f95b23ae7..b4ca8271d64 100644
--- a/base/base/cgroupsv2.cpp
+++ b/base/base/cgroupsv2.cpp
@@ -60,8 +60,8 @@ std::optional<std::string> getCgroupsV2PathContainingFile(std::string_view file_
     if (current_cgroup.empty())
         return {};
 
-    /// Return the bottom-most nested current memory file. If there is no such file at the current
-    /// level, try again at the parent level as memory settings are inherited.
+    /// Return the bottom-most nested file. If there is no such file at the current
+    /// level, try again at the parent level as settings are inherited.
     while (current_cgroup != default_cgroups_mount.parent_path())
     {
         const auto path = current_cgroup / file_name;

From c22265b889684b7fa34ba6816ce3910143ef7226 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 12 Aug 2024 17:11:11 +0000
Subject: [PATCH 230/265] Some fixups

---
 docs/en/operations/query-cache.md             | 16 +++++-----
 docs/en/operations/settings/settings.md       |  8 ++---
 .../operations/system-tables/query_cache.md   |  4 +--
 src/Core/Settings.h                           |  2 +-
 src/Core/SettingsChangesHistory.cpp           |  2 +-
 src/Interpreters/Cache/QueryCache.cpp         | 25 ++++++++--------
 src/Interpreters/Cache/QueryCache.h           | 11 ++++---
 src/Interpreters/executeQuery.cpp             |  5 ++--
 .../System/StorageSystemQueryCache.cpp        | 16 +++++-----
 .../02494_query_cache_tag.reference           | 12 ++++----
 .../0_stateless/02494_query_cache_tag.sql     | 30 ++++++++++---------
 11 files changed, 66 insertions(+), 65 deletions(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index a6c4d74f4ac..384938e28f6 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -143,16 +143,18 @@ value can be specified at session, profile or query level using setting [query_c
 Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
 from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
 
-Entries in the query cache can separate by tag, using setting [query_cache_tag](settings/settings.md#query-cache-tag). Queries with different tags are considered different entries. For example, the result of query
+Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting
+[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache
+considers results of the same query with different tags different.
 
-``` sql
-SELECT 1 SETTINGS use_query_cache = true;
-SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one';
-SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one diff';
+Example for creating three different query cache entries for the same query:
+
+```sql
+SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string)
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
 ```
 
-have different entries in the query cache, find the specified tag in system table [system.query_cache](system-tables/query_cache.md)
-
 ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
 etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
 [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 7b855665efb..e4a126249ca 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1802,14 +1802,14 @@ Default value: `0`.
 
 ## query_cache_tag {#query-cache-tag}
 
-An arbitrary string to separate entries in the [query cache](../query-cache.md).
-Queries with different values of this setting are considered different.
+A string which acts as a label for [query cache](../query-cache.md) entries.
+The same queries with different tags are considered different by the query cache.
 
 Possible values:
 
-- string: name of query cache tag
+- Any string
 
-Default value: `''`.
+Default value: `''`
 
 ## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
 
diff --git a/docs/en/operations/system-tables/query_cache.md b/docs/en/operations/system-tables/query_cache.md
index 393b37d3616..9c48574a329 100644
--- a/docs/en/operations/system-tables/query_cache.md
+++ b/docs/en/operations/system-tables/query_cache.md
@@ -9,12 +9,12 @@ Columns:
 
 - `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
 - `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
+- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry.
 - `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
 - `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
 - `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
 - `expires_at` ([DateTime](../../sql-reference/data-types/datetime.md)) — When the query cache entry becomes stale.
 - `key_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — A hash of the query string, used as a key to find query cache entries.
-- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — An arbitrary string to separate entries in the query cache.
 
 **Example**
 
@@ -27,12 +27,12 @@ Row 1:
 ──────
 query:       SELECT 1 SETTINGS use_query_cache = 1
 result_size: 128
+tag:
 stale:       0
 shared:      0
 compressed:  1
 expires_at:  2023-10-13 13:35:45
 key_hash:    12188185624808016954
-tag:
 
 1 row in set. Elapsed: 0.004 sec.
 ```
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 75579f20187..f9ffab0ea57 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -676,7 +676,7 @@ class IColumn;
     M(Bool, query_cache_squash_partial_results, true, "Squash partial result blocks to blocks of size 'max_block_size'. Reduces performance of inserts into the query cache but improves the compressability of cache entries.", 0) \
     M(Seconds, query_cache_ttl, 60, "After this time in seconds entries in the query cache become stale", 0) \
     M(Bool, query_cache_share_between_users, false, "Allow other users to read entry in the query cache", 0) \
-    M(String, query_cache_tag, "", "An arbitrary string to separate entries in the query cache. Queries with different values of this setting are considered different.", 0) \
+    M(String, query_cache_tag, "", "A string which acts as a label for query cache entries. The same queries with different tags are considered different by the query cache.", 0) \
     M(Bool, enable_sharing_sets_for_mutations, true, "Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption", 0) \
     \
     M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 8fd16504e95..0528287e83e 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -82,9 +82,9 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"merge_tree_min_bytes_per_task_for_remote_reading", 4194304, 2097152, "Value is unified with `filesystem_prefetch_min_bytes_for_single_read_task`"},
             {"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
             {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
+            {"query_cache_tag", "", "", "New setting for labeling query cache settings."},
             {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
             {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
-            {"query_cache_tag", "", "", "Add a setting to separate entries in the query cache using an arbitrary string. Queries with different values of this setting are considered different."},
             {"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},
         }
     },
diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index c9ced873643..ab926037c67 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -128,7 +128,7 @@ namespace
 
 bool isQueryCacheRelatedSetting(const String & setting_name)
 {
-    return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache");
+    return (setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache")) && setting_name != "query_cache_tag";
 }
 
 class RemoveQueryCacheSettingsMatcher
@@ -182,7 +182,7 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
     return transformed_ast;
 }
 
-IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings, const String & tag)
+IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings)
 {
     ast = removeQueryCacheSettings(ast);
 
@@ -194,10 +194,6 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const S
     /// tables (issue #64136)
     hash.update(current_database);
 
-    /// Need to hash the tag since queries with different tags are considered different query cache entries.
-    if (!tag.empty())
-        hash.update(tag);
-
     /// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`).
     /// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order.
     ///       Therefore, collect and sort the settings first, then hash them.
@@ -237,9 +233,8 @@ QueryCache::Key::Key(
     std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
     bool is_shared_,
     std::chrono::time_point<std::chrono::system_clock> expires_at_,
-    bool is_compressed_,
-    const String & tag_)
-    : ast_hash(calculateAstHash(ast_, current_database, settings, tag_))
+    bool is_compressed_)
+    : ast_hash(calculateAstHash(ast_, current_database, settings))
     , header(header_)
     , user_id(user_id_)
     , current_user_roles(current_user_roles_)
@@ -247,12 +242,18 @@ QueryCache::Key::Key(
     , expires_at(expires_at_)
     , is_compressed(is_compressed_)
     , query_string(queryStringFromAST(ast_))
-    , tag(tag_)
+    , tag(settings.query_cache_tag)
 {
 }
 
-QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_, const String & tag_)
-    : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false, tag_) /// dummy values for everything != AST, current database, user name/roles
+QueryCache::Key::Key(
+    ASTPtr ast_,
+    const String & current_database,
+    const Settings & settings,
+    std::optional<UUID> user_id_,
+    const std::vector<UUID> & current_user_roles_)
+    : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false)
+    /// ^^ dummy values for everything != AST, current database, user name/roles
 {
 }
 
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index 54de5edb145..c7ebaf4d26a 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -88,8 +88,9 @@ public:
         /// SYSTEM.QUERY_CACHE.
         const String query_string;
 
-        /// An arbitrary string to separate entries in the query cache.
-        /// Queries with different values of this setting are considered different.
+        /// A tag (namespace) for distinguish multiple entries of the same query.
+        /// This member has currently no use besides that SYSTEM.QUERY_CACHE can populate the 'tag' column conveniently without having to
+        /// compute the tag from the query AST.
         const String tag;
 
         /// Ctor to construct a Key for writing into query cache.
@@ -100,15 +101,13 @@ public:
             std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
             bool is_shared_,
             std::chrono::time_point<std::chrono::system_clock> expires_at_,
-            bool is_compressed,
-            const String & tag_);
+            bool is_compressed);
 
         /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
         Key(ASTPtr ast_,
             const String & current_database,
             const Settings & settings,
-            std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
-            const String & tag_);
+            std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
 
         bool operator==(const Key & other) const;
     };
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 6422d3128fa..fe87eed5570 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1129,7 +1129,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             {
                 if (can_use_query_cache && settings.enable_reads_from_query_cache)
                 {
-                    QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles(), settings.query_cache_tag);
+                    QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles());
                     QueryCache::Reader reader = query_cache->createReader(key);
                     if (reader.hasCacheEntryForKey())
                     {
@@ -1258,8 +1258,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                                 context->getUserID(), context->getCurrentRoles(),
                                 settings.query_cache_share_between_users,
                                 std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
-                                settings.query_cache_compress_entries,
-                                settings.query_cache_tag);
+                                settings.query_cache_compress_entries);
 
                             const size_t num_query_runs = settings.query_cache_min_query_runs ? query_cache->recordQueryRun(key) : 1; /// try to avoid locking a mutex in recordQueryRun()
                             if (num_query_runs <= settings.query_cache_min_query_runs)
diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp
index f81d50e8806..b3532ba40a7 100644
--- a/src/Storages/System/StorageSystemQueryCache.cpp
+++ b/src/Storages/System/StorageSystemQueryCache.cpp
@@ -16,12 +16,12 @@ ColumnsDescription StorageSystemQueryCache::getColumnsDescription()
     {
         {"query", std::make_shared<DataTypeString>(), "Query string."},
         {"result_size", std::make_shared<DataTypeUInt64>(), "Size of the query cache entry."},
+        {"tag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Tag of the query cache entry."},
         {"stale", std::make_shared<DataTypeUInt8>(), "If the query cache entry is stale."},
         {"shared", std::make_shared<DataTypeUInt8>(), "If the query cache entry is shared between multiple users."},
         {"compressed", std::make_shared<DataTypeUInt8>(), "If the query cache entry is compressed."},
         {"expires_at", std::make_shared<DataTypeDateTime>(), "When the query cache entry becomes stale."},
-        {"key_hash", std::make_shared<DataTypeUInt64>(), "A hash of the query string, used as a key to find query cache entries."},
-        {"tag", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "An arbitrary string to separate entries in the query cache."}
+        {"key_hash", std::make_shared<DataTypeUInt64>(), "A hash of the query string, used as a key to find query cache entries."}
     };
 }
 
@@ -53,12 +53,12 @@ void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr
 
         res_columns[0]->insert(key.query_string); /// approximates the original query string
         res_columns[1]->insert(QueryCache::QueryCacheEntryWeight()(*query_result));
-        res_columns[2]->insert(key.expires_at < std::chrono::system_clock::now());
-        res_columns[3]->insert(key.is_shared);
-        res_columns[4]->insert(key.is_compressed);
-        res_columns[5]->insert(std::chrono::system_clock::to_time_t(key.expires_at));
-        res_columns[6]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258)
-        res_columns[7]->insert(key.tag);
+        res_columns[2]->insert(key.tag);
+        res_columns[3]->insert(key.expires_at < std::chrono::system_clock::now());
+        res_columns[4]->insert(key.is_shared);
+        res_columns[5]->insert(key.is_compressed);
+        res_columns[6]->insert(std::chrono::system_clock::to_time_t(key.expires_at));
+        res_columns[7]->insert(key.ast_hash.low64); /// query cache considers aliases (issue #56258)
     }
 }
 
diff --git a/tests/queries/0_stateless/02494_query_cache_tag.reference b/tests/queries/0_stateless/02494_query_cache_tag.reference
index 055d3d4c5bb..f7be5c06ecf 100644
--- a/tests/queries/0_stateless/02494_query_cache_tag.reference
+++ b/tests/queries/0_stateless/02494_query_cache_tag.reference
@@ -1,14 +1,12 @@
 1
-1
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\'	abc
 ---
 1
 1
-1
-2
+SELECT 1 SETTINGS use_query_cache = true	
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = \'abc\'	abc
 ---
 1
 1
-1
-2
-1
-3
+SELECT 1 SETTINGS use_query_cache = true	abc
+SELECT 1 SETTINGS use_query_cache = true	def
diff --git a/tests/queries/0_stateless/02494_query_cache_tag.sql b/tests/queries/0_stateless/02494_query_cache_tag.sql
index 054607058e8..62d36f6ebe6 100644
--- a/tests/queries/0_stateless/02494_query_cache_tag.sql
+++ b/tests/queries/0_stateless/02494_query_cache_tag.sql
@@ -3,30 +3,32 @@
 
 SYSTEM DROP QUERY CACHE;
 
--- Cache the query after the query invocation
-SELECT 1 SETTINGS use_query_cache = true;
-SELECT COUNT(*) FROM system.query_cache;
+-- Store the result a single query with a tag in the query cache and check that the system table knows about the tag
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc';
+
+SELECT query, tag FROM system.query_cache;
 
 SELECT '---';
 
 SYSTEM DROP QUERY CACHE;
 
--- Queries with tag value of this setting or not are considered different cache entries.
-SELECT 1 SETTINGS use_query_cache = true;
-SELECT COUNT(*) FROM system.query_cache;
-SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one';
-SELECT COUNT(*) FROM system.query_cache;
+-- Store the result of the same query with two different tags. The cache should store two entries.
+SELECT 1 SETTINGS use_query_cache = true; -- default query_cache_tag = ''
+SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc';
+SELECT query, tag FROM system.query_cache ORDER BY ALL;
 
 SELECT '---';
 
 SYSTEM DROP QUERY CACHE;
 
--- Queries with different tags values of this setting are considered different cache entries.
+-- Like before but the tag is set standalone.
+
+SET query_cache_tag = 'abc';
 SELECT 1 SETTINGS use_query_cache = true;
-SELECT COUNT(*) FROM system.query_cache;
-SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one';
-SELECT COUNT(*) FROM system.query_cache;
-SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'one diff';
-SELECT COUNT(*) FROM system.query_cache;
+
+SET query_cache_tag = 'def';
+SELECT 1 SETTINGS use_query_cache = true;
+
+SELECT query, tag FROM system.query_cache ORDER BY ALL;
 
 SYSTEM DROP QUERY CACHE;

From 38405dd7cdfb7189c1a1184c0eb8b3e23fda55e6 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Mon, 12 Aug 2024 18:14:22 +0000
Subject: [PATCH 231/265] add projection merge doc

---
 docs/en/operations/settings/merge-tree-settings.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index 67fa45c20cd..a3bd919d3ce 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -1041,3 +1041,14 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%.
 
 This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
 High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.
+
+### deduplicate_merge_projection_mode
+
+Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting.
+It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members.
+
+Possible values:
+
+- throw, drop, rebuild
+
+Default value: throw
\ No newline at end of file

From aa7a2bcb02f6c2f48bcc7acca3bcec2f1a16130b Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Mon, 12 Aug 2024 20:34:02 +0200
Subject: [PATCH 232/265] Fix typo

---
 docs/en/sql-reference/functions/other-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 21e4a6599ea..4f51dc6b8d3 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -4073,7 +4073,7 @@ getSubcolumn(col_name, subcol_name)
 
 **Returned value**
 
-- Returns the extracted sub-colum.
+- Returns the extracted sub-column.
 
 **Example**
 

From eab8594570e703a766f2f91ae3d13b0ed640b554 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Mon, 12 Aug 2024 20:35:33 +0200
Subject: [PATCH 233/265] Update aspell-dict.txt

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 862f38976ce..51246d990fa 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1697,6 +1697,8 @@ getOSKernelVersion
 getServerPort
 getSetting
 getSizeOfEnumType
+getSubcolumn
+getTypeSerializationStreams
 getblockinfo
 getevents
 ghcnd

From 45a14fa0ce3ae94a374bbf955ba0fb7109b7e678 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 12 Aug 2024 18:54:06 +0000
Subject: [PATCH 234/265] Fix spelling

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index ffd9fae7f45..03ec8e1752c 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2115,6 +2115,7 @@ namenode
 namepassword
 nameprofile
 namequota
+namespace
 namespaces
 natively
 nats

From 469c1698b0dbf8a91a6e94a2bab0669f33bf7be2 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Mon, 12 Aug 2024 19:31:57 +0000
Subject: [PATCH 235/265] Fix 'Refresh set entry already exists'

---
 src/Interpreters/InterpreterSystemQuery.cpp   | 17 +++--
 src/Interpreters/InterpreterSystemQuery.h     |  2 +-
 src/Storages/MaterializedView/RefreshSet.cpp  | 75 +++++++++++--------
 src/Storages/MaterializedView/RefreshSet.h    | 23 +++---
 src/Storages/MaterializedView/RefreshTask.cpp | 10 +--
 src/Storages/MaterializedView/RefreshTask.h   |  3 +-
 .../MaterializedView/RefreshTask_fwd.h        |  1 +
 src/Storages/StorageMaterializedView.cpp      |  1 -
 .../System/StorageSystemViewRefreshes.cpp     |  3 +
 9 files changed, 77 insertions(+), 58 deletions(-)

diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index ef6d1040c5e..1cd55a0020c 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -663,13 +663,16 @@ BlockIO InterpreterSystemQuery::execute()
             startStopAction(ActionLocks::ViewRefresh, false);
             break;
         case Type::REFRESH_VIEW:
-            getRefreshTask()->run();
+            for (const auto & task : getRefreshTasks())
+                task->run();
             break;
         case Type::CANCEL_VIEW:
-            getRefreshTask()->cancel();
+            for (const auto & task : getRefreshTasks())
+                task->cancel();
             break;
         case Type::TEST_VIEW:
-            getRefreshTask()->setFakeTime(query.fake_time_for_view);
+            for (const auto & task : getRefreshTasks())
+                task->setFakeTime(query.fake_time_for_view);
             break;
         case Type::DROP_REPLICA:
             dropReplica(query);
@@ -1242,15 +1245,15 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery & query)
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RESTART DISK is not supported");
 }
 
-RefreshTaskHolder InterpreterSystemQuery::getRefreshTask()
+RefreshTaskList InterpreterSystemQuery::getRefreshTasks()
 {
     auto ctx = getContext();
     ctx->checkAccess(AccessType::SYSTEM_VIEWS);
-    auto task = ctx->getRefreshSet().getTask(table_id);
-    if (!task)
+    auto tasks = ctx->getRefreshSet().findTasks(table_id);
+    if (tasks.empty())
         throw Exception(
             ErrorCodes::BAD_ARGUMENTS, "Refreshable view {} doesn't exist", table_id.getNameForLogs());
-    return task;
+    return tasks;
 }
 
 
diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h
index 776dd7915f0..f44fe930b04 100644
--- a/src/Interpreters/InterpreterSystemQuery.h
+++ b/src/Interpreters/InterpreterSystemQuery.h
@@ -74,7 +74,7 @@ private:
     void flushDistributed(ASTSystemQuery & query);
     [[noreturn]] void restartDisk(String & name);
 
-    RefreshTaskHolder getRefreshTask();
+    RefreshTaskList getRefreshTasks();
 
     AccessRightsElements getRequiredAccessForDDLOnCluster() const;
     void startStopAction(StorageActionBlockType action_type, bool start);
diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp
index a3ef327dc24..43aa0ada99b 100644
--- a/src/Storages/MaterializedView/RefreshSet.cpp
+++ b/src/Storages/MaterializedView/RefreshSet.cpp
@@ -27,6 +27,7 @@ RefreshSet::Handle & RefreshSet::Handle::operator=(Handle && other) noexcept
     parent_set = std::exchange(other.parent_set, nullptr);
     id = std::move(other.id);
     dependencies = std::move(other.dependencies);
+    iter = std::move(other.iter);
     metric_increment = std::move(other.metric_increment);
     return *this;
 }
@@ -39,21 +40,21 @@ RefreshSet::Handle::~Handle()
 void RefreshSet::Handle::rename(StorageID new_id)
 {
     std::lock_guard lock(parent_set->mutex);
-    parent_set->removeDependenciesLocked(id, dependencies);
-    auto it = parent_set->tasks.find(id);
-    auto task = it->second;
-    parent_set->tasks.erase(it);
+    RefreshTaskHolder task = *iter;
+    parent_set->removeDependenciesLocked(task, dependencies);
+    parent_set->removeTaskLocked(id, iter);
     id = new_id;
-    parent_set->tasks.emplace(id, task);
-    parent_set->addDependenciesLocked(id, dependencies);
+    iter = parent_set->addTaskLocked(id, task);
+    parent_set->addDependenciesLocked(task, dependencies);
 }
 
 void RefreshSet::Handle::changeDependencies(std::vector<StorageID> deps)
 {
     std::lock_guard lock(parent_set->mutex);
-    parent_set->removeDependenciesLocked(id, dependencies);
+    RefreshTaskHolder task = *iter;
+    parent_set->removeDependenciesLocked(task, dependencies);
     dependencies = std::move(deps);
-    parent_set->addDependenciesLocked(id, dependencies);
+    parent_set->addDependenciesLocked(task, dependencies);
 }
 
 void RefreshSet::Handle::reset()
@@ -63,8 +64,8 @@ void RefreshSet::Handle::reset()
 
     {
         std::lock_guard lock(parent_set->mutex);
-        parent_set->removeDependenciesLocked(id, dependencies);
-        parent_set->tasks.erase(id);
+        parent_set->removeDependenciesLocked(*iter, dependencies);
+        parent_set->removeTaskLocked(id, iter);
     }
 
     parent_set = nullptr;
@@ -76,37 +77,50 @@ RefreshSet::RefreshSet() = default;
 void RefreshSet::emplace(StorageID id, const std::vector<StorageID> & dependencies, RefreshTaskHolder task)
 {
     std::lock_guard guard(mutex);
-    auto [it, is_inserted] = tasks.emplace(id, task);
-    if (!is_inserted)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Refresh set entry already exists for table {}", id.getFullTableName());
-    addDependenciesLocked(id, dependencies);
+    const auto iter = addTaskLocked(id, task);
+    addDependenciesLocked(task, dependencies);
 
-    task->setRefreshSetHandleUnlock(Handle(this, id, dependencies));
+    task->setRefreshSetHandleUnlock(Handle(this, id, iter, dependencies));
 }
 
-void RefreshSet::addDependenciesLocked(const StorageID & id, const std::vector<StorageID> & dependencies)
+RefreshTaskList::iterator RefreshSet::addTaskLocked(StorageID id, RefreshTaskHolder task)
+{
+    RefreshTaskList & list = tasks[id];
+    list.push_back(task);
+    return std::prev(list.end());
+}
+
+void RefreshSet::removeTaskLocked(StorageID id, RefreshTaskList::iterator iter)
+{
+    const auto it = tasks.find(id);
+    it->second.erase(iter);
+    if (it->second.empty())
+        tasks.erase(it);
+}
+
+void RefreshSet::addDependenciesLocked(RefreshTaskHolder task, const std::vector<StorageID> & dependencies)
 {
     for (const StorageID & dep : dependencies)
-        dependents[dep].insert(id);
+        dependents[dep].insert(task);
 }
 
-void RefreshSet::removeDependenciesLocked(const StorageID & id, const std::vector<StorageID> & dependencies)
+void RefreshSet::removeDependenciesLocked(RefreshTaskHolder task, const std::vector<StorageID> & dependencies)
 {
     for (const StorageID & dep : dependencies)
     {
         auto & set = dependents[dep];
-        set.erase(id);
+        set.erase(task);
         if (set.empty())
             dependents.erase(dep);
     }
 }
 
-RefreshTaskHolder RefreshSet::getTask(const StorageID & id) const
+RefreshTaskList RefreshSet::findTasks(const StorageID & id) const
 {
     std::lock_guard lock(mutex);
-    if (auto task = tasks.find(id); task != tasks.end())
-        return task->second;
-    return nullptr;
+    if (auto it = tasks.find(id); it != tasks.end())
+        return it->second;
+    return {};
 }
 
 RefreshSet::InfoContainer RefreshSet::getInfo() const
@@ -116,26 +130,23 @@ RefreshSet::InfoContainer RefreshSet::getInfo() const
     lock.unlock();
 
     InfoContainer res;
-    for (const auto & [id, task] : tasks_copy)
-        res.push_back(task->getInfo());
+    for (const auto & [id, list] : tasks_copy)
+        for (const auto & task : list)
+            res.push_back(task->getInfo());
     return res;
 }
 
 std::vector<RefreshTaskHolder> RefreshSet::getDependents(const StorageID & id) const
 {
     std::lock_guard lock(mutex);
-    std::vector<RefreshTaskHolder> res;
     auto it = dependents.find(id);
     if (it == dependents.end())
         return {};
-    for (const StorageID & dep_id : it->second)
-        if (auto task = tasks.find(dep_id); task != tasks.end())
-            res.push_back(task->second);
-    return res;
+    return std::vector<RefreshTaskHolder>(it->second.begin(), it->second.end());
 }
 
-RefreshSet::Handle::Handle(RefreshSet * parent_set_, StorageID id_, std::vector<StorageID> dependencies_)
+RefreshSet::Handle::Handle(RefreshSet * parent_set_, StorageID id_, RefreshTaskList::iterator iter_, std::vector<StorageID> dependencies_)
     : parent_set(parent_set_), id(std::move(id_)), dependencies(std::move(dependencies_))
-    , metric_increment(CurrentMetrics::Increment(CurrentMetrics::RefreshableViews)) {}
+    , iter(iter_), metric_increment(CurrentMetrics::Increment(CurrentMetrics::RefreshableViews)) {}
 
 }
diff --git a/src/Storages/MaterializedView/RefreshSet.h b/src/Storages/MaterializedView/RefreshSet.h
index eff445023a6..7fb583fd316 100644
--- a/src/Storages/MaterializedView/RefreshSet.h
+++ b/src/Storages/MaterializedView/RefreshSet.h
@@ -5,12 +5,11 @@
 #include <Storages/IStorage.h>
 #include <Storages/MaterializedView/RefreshTask_fwd.h>
 #include <Common/CurrentMetrics.h>
+#include <list>
 
 namespace DB
 {
 
-using DatabaseAndTableNameSet = std::unordered_set<StorageID, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual>;
-
 enum class RefreshState : RefreshTaskStateUnderlying
 {
     Disabled = 0,
@@ -46,8 +45,7 @@ struct RefreshInfo
 class RefreshSet
 {
 public:
-    /// RAII thing that unregisters a task and its dependencies in destructor.
-    /// Storage IDs must be unique. Not thread safe.
+    /// RAII thing that unregisters a task and its dependencies in destructor. Not thread safe.
     class Handle
     {
         friend class RefreshSet;
@@ -73,9 +71,10 @@ public:
         RefreshSet * parent_set = nullptr;
         StorageID id = StorageID::createEmpty();
         std::vector<StorageID> dependencies;
+        RefreshTaskList::iterator iter; // in parent_set->tasks[id]
         std::optional<CurrentMetrics::Increment> metric_increment;
 
-        Handle(RefreshSet * parent_set_, StorageID id_, std::vector<StorageID> dependencies_);
+        Handle(RefreshSet * parent_set_, StorageID id_, RefreshTaskList::iterator iter_, std::vector<StorageID> dependencies_);
     };
 
     using InfoContainer = std::vector<RefreshInfo>;
@@ -84,7 +83,9 @@ public:
 
     void emplace(StorageID id, const std::vector<StorageID> & dependencies, RefreshTaskHolder task);
 
-    RefreshTaskHolder getTask(const StorageID & id) const;
+    /// Finds active refreshable view(s) by database and table name.
+    /// Normally there's at most one, but we allow name collisions here, just in case.
+    RefreshTaskList findTasks(const StorageID & id) const;
 
     InfoContainer getInfo() const;
 
@@ -92,8 +93,8 @@ public:
     std::vector<RefreshTaskHolder> getDependents(const StorageID & id) const;
 
 private:
-    using TaskMap = std::unordered_map<StorageID, RefreshTaskHolder, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual>;
-    using DependentsMap = std::unordered_map<StorageID, DatabaseAndTableNameSet, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual>;
+    using TaskMap = std::unordered_map<StorageID, RefreshTaskList, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual>;
+    using DependentsMap = std::unordered_map<StorageID, std::unordered_set<RefreshTaskHolder>, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual>;
 
     /// Protects the two maps below, not locked for any nontrivial operations (e.g. operations that
     /// block or lock other mutexes).
@@ -102,8 +103,10 @@ private:
     TaskMap tasks;
     DependentsMap dependents;
 
-    void addDependenciesLocked(const StorageID & id, const std::vector<StorageID> & dependencies);
-    void removeDependenciesLocked(const StorageID & id, const std::vector<StorageID> & dependencies);
+    RefreshTaskList::iterator addTaskLocked(StorageID id, RefreshTaskHolder task);
+    void removeTaskLocked(StorageID id, RefreshTaskList::iterator iter);
+    void addDependenciesLocked(RefreshTaskHolder task, const std::vector<StorageID> & dependencies);
+    void removeDependenciesLocked(RefreshTaskHolder task, const std::vector<StorageID> & dependencies);
 };
 
 }
diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp
index aa8f51d5295..0837eaf97fd 100644
--- a/src/Storages/MaterializedView/RefreshTask.cpp
+++ b/src/Storages/MaterializedView/RefreshTask.cpp
@@ -33,7 +33,6 @@ RefreshTask::RefreshTask(
 {}
 
 RefreshTaskHolder RefreshTask::create(
-    const StorageMaterializedView & view,
     ContextMutablePtr context,
     const DB::ASTRefreshStrategy & strategy)
 {
@@ -46,12 +45,9 @@ RefreshTaskHolder RefreshTask::create(
                 t->refreshTask();
         });
 
-    std::vector<StorageID> deps;
     if (strategy.dependencies)
         for (auto && dependency : strategy.dependencies->children)
-            deps.emplace_back(dependency->as<const ASTTableIdentifier &>());
-
-    context->getRefreshSet().emplace(view.getStorageID(), deps, task);
+            task->initial_dependencies.emplace_back(dependency->as<const ASTTableIdentifier &>());
 
     return task;
 }
@@ -61,6 +57,7 @@ void RefreshTask::initializeAndStart(std::shared_ptr<StorageMaterializedView> vi
     view_to_refresh = view;
     if (view->getContext()->getSettingsRef().stop_refreshable_materialized_views_on_startup)
         stop_requested = true;
+    view->getContext()->getRefreshSet().emplace(view->getStorageID(), initial_dependencies, shared_from_this());
     populateDependencies();
     advanceNextRefreshTime(currentTime());
     refresh_task->schedule();
@@ -69,7 +66,8 @@ void RefreshTask::initializeAndStart(std::shared_ptr<StorageMaterializedView> vi
 void RefreshTask::rename(StorageID new_id)
 {
     std::lock_guard guard(mutex);
-    set_handle.rename(new_id);
+    if (set_handle)
+        set_handle.rename(new_id);
 }
 
 void RefreshTask::alterRefreshParams(const DB::ASTRefreshStrategy & new_strategy)
diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h
index 1f050a97cd9..623493f6aec 100644
--- a/src/Storages/MaterializedView/RefreshTask.h
+++ b/src/Storages/MaterializedView/RefreshTask.h
@@ -26,7 +26,6 @@ public:
 
     /// The only proper way to construct task
     static RefreshTaskHolder create(
-        const StorageMaterializedView & view,
         ContextMutablePtr context,
         const DB::ASTRefreshStrategy & strategy);
 
@@ -84,9 +83,11 @@ private:
 
     RefreshSchedule refresh_schedule;
     RefreshSettings refresh_settings; // TODO: populate, use, update on alter
+    std::vector<StorageID> initial_dependencies;
     RefreshSet::Handle set_handle;
 
     /// StorageIDs of our dependencies that we're waiting for.
+    using DatabaseAndTableNameSet = std::unordered_set<StorageID, StorageID::DatabaseAndTableNameHash, StorageID::DatabaseAndTableNameEqual>;
     DatabaseAndTableNameSet remaining_dependencies;
     bool time_arrived = false;
 
diff --git a/src/Storages/MaterializedView/RefreshTask_fwd.h b/src/Storages/MaterializedView/RefreshTask_fwd.h
index 1f366962eb6..9a0a122381e 100644
--- a/src/Storages/MaterializedView/RefreshTask_fwd.h
+++ b/src/Storages/MaterializedView/RefreshTask_fwd.h
@@ -11,5 +11,6 @@ class RefreshTask;
 using RefreshTaskStateUnderlying = UInt8;
 using RefreshTaskHolder = std::shared_ptr<RefreshTask>;
 using RefreshTaskObserver = std::weak_ptr<RefreshTask>;
+using RefreshTaskList = std::list<RefreshTaskHolder>;
 
 }
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 696136834d4..4c6c2fff209 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -203,7 +203,6 @@ StorageMaterializedView::StorageMaterializedView(
     {
         fixed_uuid = false;
         refresher = RefreshTask::create(
-            *this,
             getContext(),
             *query.refresh_strategy);
         refresh_on_start = mode < LoadingStrictnessLevel::ATTACH && !query.is_create_empty;
diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp
index 30539ed6b6a..061201017a7 100644
--- a/src/Storages/System/StorageSystemViewRefreshes.cpp
+++ b/src/Storages/System/StorageSystemViewRefreshes.cpp
@@ -5,6 +5,7 @@
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/Context.h>
 #include <Storages/MaterializedView/RefreshSet.h>
@@ -19,6 +20,7 @@ ColumnsDescription StorageSystemViewRefreshes::getColumnsDescription()
     {
         {"database", std::make_shared<DataTypeString>(), "The name of the database the table is in."},
         {"view", std::make_shared<DataTypeString>(), "Table name."},
+        {"uuid", std::make_shared<DataTypeUUID>(), "Table uuid (Atomic database)."},
         {"status", std::make_shared<DataTypeString>(), "Current state of the refresh."},
         {"last_refresh_result", std::make_shared<DataTypeString>(), "Outcome of the latest refresh attempt."},
         {"last_refresh_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>()),
@@ -63,6 +65,7 @@ void StorageSystemViewRefreshes::fillData(
         std::size_t i = 0;
         res_columns[i++]->insert(refresh.view_id.getDatabaseName());
         res_columns[i++]->insert(refresh.view_id.getTableName());
+        res_columns[i++]->insert(refresh.view_id.uuid);
         res_columns[i++]->insert(toString(refresh.state));
         res_columns[i++]->insert(toString(refresh.last_refresh_result));
 

From 5a683796a0dc8408ed2694af672675929352bf8f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Mon, 12 Aug 2024 22:34:14 +0200
Subject: [PATCH 236/265] Update DatabaseReplicated.cpp

---
 src/Databases/DatabaseReplicated.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index f4aa925d6dd..6011b8e65e3 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -1584,6 +1584,8 @@ void DatabaseReplicated::dropTable(ContextPtr local_context, const String & tabl
     }
 
     auto table = tryGetTable(table_name, getContext());
+    if (!table)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} doesn't exist", table_name);
     if (table->getName() == "MaterializedView" || table->getName() == "WindowView")
     {
         /// Avoid recursive locking of metadata_mutex

From dccf34dc9565699b79242ee9e2c36a0e021e0f21 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Mon, 12 Aug 2024 21:20:57 +0000
Subject: [PATCH 237/265] fix drift of profile event time

---
 src/Storages/MergeTree/MergeTask.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index cb1921ede2b..3aa4d764685 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -944,8 +944,13 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const
 
 MergeTask::StageRuntimeContextPtr MergeTask::MergeProjectionsStage::getContextForNextStage()
 {
-    ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
-    ProfileEvents::increment(ProfileEvents::MergeProjectionStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    /// Do not increment for projection stage because time is already accounted in main task.
+    /// The projection stage has its own empty projection stage which may add a drift of severals milliseconds.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+        ProfileEvents::increment(ProfileEvents::MergeProjectionStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    }
 
     return nullptr;
 }
@@ -1034,6 +1039,8 @@ bool MergeTask::execute()
     UInt64 stage_elapsed_ms = current_elapsed_ms - global_ctx->prev_elapsed_ms;
     global_ctx->prev_elapsed_ms = current_elapsed_ms;
 
+    auto next_stage_context = current_stage->getContextForNextStage();
+
     /// Do not increment for projection stage because time is already accounted in main task.
     if (global_ctx->parent_part == nullptr)
     {
@@ -1041,8 +1048,6 @@ bool MergeTask::execute()
         ProfileEvents::increment(ProfileEvents::MergeTotalMilliseconds, stage_elapsed_ms);
     }
 
-    auto next_stage_context = current_stage->getContextForNextStage();
-
     /// Move to the next stage in an array of stages
     ++stages_iterator;
     if (stages_iterator == stages.end())

From 8136e6a45275b958a663ac0ee4682984e1536b07 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 12 Aug 2024 21:29:26 +0000
Subject: [PATCH 238/265] Update new prepareForSquashing method for
 ColumnDynamic

---
 src/Columns/ColumnDynamic.cpp                 | 23 ++++++++++---------
 src/DataTypes/DataTypeVariant.cpp             |  2 +-
 .../03210_dynamic_squashing.reference         | 20 +++++++++-------
 .../0_stateless/03210_dynamic_squashing.sql   | 17 ++++++++------
 4 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index ecc2c738366..69b4c5dfc4e 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -987,7 +987,8 @@ void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
     /// Internal variants of source dynamic columns may differ.
     /// We want to preallocate memory for all variants we will have after squashing.
     /// It may happen that the total number of variants in source columns will
-    /// exceed the limit, in this case we will choose the most frequent variants.
+    /// exceed the limit, in this case we will choose the most frequent variants
+    /// and insert the rest types into the shared variant.
 
     /// First, preallocate memory for variant discriminators and offsets.
     size_t new_size = size();
@@ -1030,17 +1031,14 @@ void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
 
     DataTypePtr result_variant_type;
     /// Check if the number of all variants exceeds the limit.
-    if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_variant_sizes.contains("String")))
+    if (!canAddNewVariants(0, all_variants.size()))
     {
         /// We want to keep the most frequent variants in the resulting dynamic column.
         DataTypes result_variants;
-        result_variants.reserve(max_dynamic_types);
+        result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant.
         /// Add variants from current variant column as we will not rewrite it.
         for (const auto & variant : assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants())
             result_variants.push_back(variant);
-        /// Add String variant in advance (if we didn't add it yet) as we must have it across variants when we reach the limit.
-        if (!variant_info.variant_name_to_discriminator.contains("String"))
-            result_variants.push_back(std::make_shared<DataTypeString>());
 
         /// Create list of remaining variants with their sizes and sort it.
         std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
@@ -1049,15 +1047,18 @@ void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
         {
             /// Add variant to the list only of we didn't add it yet.
             auto variant_name = variant->getName();
-            if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name))
-                variants_with_sizes.emplace_back(total_variant_sizes[variant->getName()], variant);
+            if (!variant_info.variant_name_to_discriminator.contains(variant_name))
+                variants_with_sizes.emplace_back(total_variant_sizes[variant_name], variant);
         }
 
         std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
         /// Add the most frequent variants until we reach max_dynamic_types.
-        size_t num_new_variants = max_dynamic_types - result_variants.size();
-        for (size_t i = 0; i != num_new_variants; ++i)
-            result_variants.push_back(variants_with_sizes[i].second);
+        for (const auto & [_, new_variant] : variants_with_sizes)
+        {
+            if (!canAddNewVariant(result_variants.size()))
+                break;
+            result_variants.push_back(new_variant);
+        }
 
         result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
     }
diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp
index 8a10ca7d06d..cc8d04e94da 100644
--- a/src/DataTypes/DataTypeVariant.cpp
+++ b/src/DataTypes/DataTypeVariant.cpp
@@ -117,7 +117,7 @@ bool DataTypeVariant::equals(const IDataType & rhs) const
 
         /// The same data types with different custom names considered different.
         /// For example, UInt8 and Bool.
-        if ((variants[i]->hasCustomName() || rhs_variant.variants[i]) && variants[i]->getName() != rhs_variant.variants[i]->getName())
+        if ((variants[i]->hasCustomName() || rhs_variant.variants[i]->hasCustomName()) && variants[i]->getName() != rhs_variant.variants[i]->getName())
             return false;
     }
 
diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.reference b/tests/queries/0_stateless/03210_dynamic_squashing.reference
index 4f5b5ba098c..1c23c22f550 100644
--- a/tests/queries/0_stateless/03210_dynamic_squashing.reference
+++ b/tests/queries/0_stateless/03210_dynamic_squashing.reference
@@ -1,8 +1,12 @@
-Array(UInt8)
-None
-UInt64
-None
-String
-UInt64
-String
-UInt64
+1
+Array(UInt8)	true
+None	false
+UInt64	false
+2
+Array(UInt8)	true
+None	false
+UInt64	false
+3
+Array(UInt8)	true
+String	false
+UInt64	true
diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.sql b/tests/queries/0_stateless/03210_dynamic_squashing.sql
index 23b47184e33..da3b911e796 100644
--- a/tests/queries/0_stateless/03210_dynamic_squashing.sql
+++ b/tests/queries/0_stateless/03210_dynamic_squashing.sql
@@ -4,17 +4,20 @@ set max_block_size = 1000;
 drop table if exists test;
 
 create table test (d Dynamic) engine=MergeTree order by tuple();
-insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000);
-select distinct dynamicType(d) as type from test order by type;
+insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
+select '1';
+select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
 
 drop table test;
-create table test (d Dynamic(max_types=2)) engine=MergeTree order by tuple();
-insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000);
-select distinct dynamicType(d) as type from test order by type;
+create table test (d Dynamic(max_types=1)) engine=MergeTree order by tuple();
+insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
+select '2';
+select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
 
 truncate table test;
-insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000);
-select distinct dynamicType(d) as type from test order by type;
+insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
+select '3';
+select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
 
 drop table test;
 

From 83cb991f75f242b11beb48134d6ebfb26c73bcd7 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 12 Aug 2024 21:30:30 +0000
Subject: [PATCH 239/265] Fix special build

---
 src/Columns/ColumnDynamic.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index d80055c1716..e6e720765f6 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -368,10 +368,10 @@ public:
     /// Check if we can add new variant types.
     /// Shared variant doesn't count in the limit but always presents,
     /// so we should subtract 1 from the total types count.
-    bool canAddNewVariants(size_t current_variants_count, size_t new_variants_count) { return current_variants_count + new_variants_count - 1 <= max_dynamic_types; }
-    bool canAddNewVariant(size_t current_variants_count) { return canAddNewVariants(current_variants_count, 1); }
-    bool canAddNewVariants(size_t new_variants_count) { return canAddNewVariants(variant_info.variant_names.size(), new_variants_count); }
-    bool canAddNewVariant() { return canAddNewVariants(variant_info.variant_names.size(), 1); }
+    bool canAddNewVariants(size_t current_variants_count, size_t new_variants_count) const { return current_variants_count + new_variants_count - 1 <= max_dynamic_types; }
+    bool canAddNewVariant(size_t current_variants_count) const { return canAddNewVariants(current_variants_count, 1); }
+    bool canAddNewVariants(size_t new_variants_count) const { return canAddNewVariants(variant_info.variant_names.size(), new_variants_count); }
+    bool canAddNewVariant() const { return canAddNewVariants(variant_info.variant_names.size(), 1); }
 
     void setVariantType(const DataTypePtr & variant_type);
     void setMaxDynamicPaths(size_t max_dynamic_type_);

From f12609440f081f19b0b21fdd15229cfdbb7cbb3d Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Mon, 12 Aug 2024 23:09:57 +0000
Subject: [PATCH 240/265] fashion

---
 src/Storages/MaterializedView/RefreshSet.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp
index 43aa0ada99b..7536f59c1e4 100644
--- a/src/Storages/MaterializedView/RefreshSet.cpp
+++ b/src/Storages/MaterializedView/RefreshSet.cpp
@@ -9,11 +9,6 @@ namespace CurrentMetrics
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 RefreshSet::Handle::Handle(Handle && other) noexcept
 {
     *this = std::move(other);

From a517bc90cd9e369a4385f367e9f5e9688520c8bb Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
Date: Mon, 12 Aug 2024 21:42:47 -0400
Subject: [PATCH 241/265] Update PULL_REQUEST_TEMPLATE.md

---
 .github/PULL_REQUEST_TEMPLATE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 8b6e957e1d8..3dcce68ab46 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -60,7 +60,7 @@ At a minimum, the following information should be added (but add more as needed)
 - [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
 ---
 - [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
-- [ ] <!---ci_exclude_ast--> Exclude AST fuzzers
+- [ ] <!---ci_exclude_ast--> Exclude: AST fuzzers
 ---
 - [ ] <!---do_not_test--> Do not test
 - [ ] <!---woolen_wolfdog--> Woolen Wolfdog

From 5812dbcf2e4a9eada33a611ea7b63172f6ed0905 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 13 Aug 2024 11:53:33 +0200
Subject: [PATCH 242/265] Update 03210_dynamic_squashing.sql

---
 tests/queries/0_stateless/03210_dynamic_squashing.sql | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.sql b/tests/queries/0_stateless/03210_dynamic_squashing.sql
index da3b911e796..d9ebc28fc43 100644
--- a/tests/queries/0_stateless/03210_dynamic_squashing.sql
+++ b/tests/queries/0_stateless/03210_dynamic_squashing.sql
@@ -1,3 +1,5 @@
+-- Tags: long
+
 set allow_experimental_dynamic_type = 1;
 set max_block_size = 1000;
 

From d2be1bf693045bebec341a850685b377ee3d88a9 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 13 Aug 2024 12:33:44 +0000
Subject: [PATCH 243/265] Fix FullSortingJoinTest.AsofGreaterGeneratedTestData
 with empty data

---
 src/Processors/tests/gtest_full_sorting_join.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/Processors/tests/gtest_full_sorting_join.cpp b/src/Processors/tests/gtest_full_sorting_join.cpp
index f678d7984e8..befe5e28b5d 100644
--- a/src/Processors/tests/gtest_full_sorting_join.cpp
+++ b/src/Processors/tests/gtest_full_sorting_join.cpp
@@ -208,6 +208,12 @@ Block executePipeline(QueryPipeline && pipeline)
 template <typename T>
 void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
 {
+    if (expected.empty())
+    {
+        ASSERT_TRUE(block.columns() == 0);
+        return;
+    }
+
     const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
     ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();
 
@@ -230,6 +236,12 @@ void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected,
 template <typename T>
 void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
 {
+    if (expected.empty())
+    {
+        ASSERT_TRUE(block.columns() == 0);
+        return;
+    }
+
     const ColumnPtr & actual = block.getByName(name).column;
     ASSERT_TRUE(checkColumn<T>(*actual));
     ASSERT_TRUE(checkColumn<T>(expected));

From 52dea79a906ecc3d9a19599612b1c2c7708876b6 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:20:37 +0200
Subject: [PATCH 244/265] Update 03210_dynamic_squashing.sql

---
 tests/queries/0_stateless/03210_dynamic_squashing.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03210_dynamic_squashing.sql b/tests/queries/0_stateless/03210_dynamic_squashing.sql
index d9ebc28fc43..71d09263fda 100644
--- a/tests/queries/0_stateless/03210_dynamic_squashing.sql
+++ b/tests/queries/0_stateless/03210_dynamic_squashing.sql
@@ -6,18 +6,18 @@ set max_block_size = 1000;
 drop table if exists test;
 
 create table test (d Dynamic) engine=MergeTree order by tuple();
-insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
+insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(100000);
 select '1';
 select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
 
 drop table test;
 create table test (d Dynamic(max_types=1)) engine=MergeTree order by tuple();
-insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
+insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(100000);
 select '2';
 select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
 
 truncate table test;
-insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
+insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(100000);
 select '3';
 select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
 

From 973b2405794cebeabf9497e3b10ed6180130b891 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 13 Aug 2024 15:35:14 +0200
Subject: [PATCH 245/265] Fix min/max time columns

---
 src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp | 7 +++++--
 .../0_stateless/03222_create_timeseries_table.reference    | 0
 .../queries/0_stateless/03222_create_timeseries_table.sql  | 7 +++++++
 3 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03222_create_timeseries_table.reference
 create mode 100644 tests/queries/0_stateless/03222_create_timeseries_table.sql

diff --git a/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp b/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp
index f9e7290e514..746a6a28274 100644
--- a/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp
+++ b/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp
@@ -227,8 +227,11 @@ void TimeSeriesDefinitionNormalizer::addMissingColumns(ASTCreateQuery & create)
         /// We use Nullable(DateTime64(3)) as the default type of the `min_time` and `max_time` columns.
         /// It's nullable because it allows the aggregation (see aggregate_min_time_and_max_time) work correctly even
         /// for rows in the "tags" table which doesn't have `min_time` and `max_time` (because they have no matching rows in the "data" table).
-        make_new_column(TimeSeriesColumnNames::MinTime, make_nullable(timestamp_type));
-        make_new_column(TimeSeriesColumnNames::MaxTime, make_nullable(timestamp_type));
+
+        if (!is_next_column_named(TimeSeriesColumnNames::MinTime))
+            make_new_column(TimeSeriesColumnNames::MinTime, make_nullable(timestamp_type));
+        if (!is_next_column_named(TimeSeriesColumnNames::MaxTime))
+            make_new_column(TimeSeriesColumnNames::MaxTime, make_nullable(timestamp_type));
     }
 
     /// Add missing columns for the "metrics" table.
diff --git a/tests/queries/0_stateless/03222_create_timeseries_table.reference b/tests/queries/0_stateless/03222_create_timeseries_table.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03222_create_timeseries_table.sql b/tests/queries/0_stateless/03222_create_timeseries_table.sql
new file mode 100644
index 00000000000..bdb29e7d366
--- /dev/null
+++ b/tests/queries/0_stateless/03222_create_timeseries_table.sql
@@ -0,0 +1,7 @@
+SET allow_experimental_time_series_table = 1;
+
+CREATE TABLE 03222_timeseries_table1 ENGINE = TimeSeries FORMAT Null;
+CREATE TABLE 03222_timeseries_table2 ENGINE = TimeSeries SETTINGS store_min_time_and_max_time = 1, aggregate_min_time_and_max_time = 1 FORMAT Null;
+--- This doesn't work because allow_nullable_key cannot be set in query for the internal MergeTree tables
+--- CREATE TABLE 03222_timeseries_table3 ENGINE = TimeSeries SETTINGS store_min_time_and_max_time = 1, aggregate_min_time_and_max_time = 0;
+CREATE TABLE 03222_timeseries_table4 ENGINE = TimeSeries SETTINGS store_min_time_and_max_time = 0 FORMAT Null;

From 5da5bea8dfb768d3f6fd42f081a3d82e1c782e64 Mon Sep 17 00:00:00 2001
From: divanik <dan.ivanik@clickhouse.com>
Date: Tue, 13 Aug 2024 13:47:20 +0000
Subject: [PATCH 246/265] Reduce flakiness of a test

---
 .../0_stateless/00652_mergetree_mutations.sh     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/queries/0_stateless/00652_mergetree_mutations.sh b/tests/queries/0_stateless/00652_mergetree_mutations.sh
index a9d7908a1af..3b0966dd2c3 100755
--- a/tests/queries/0_stateless/00652_mergetree_mutations.sh
+++ b/tests/queries/0_stateless/00652_mergetree_mutations.sh
@@ -70,7 +70,23 @@ sleep 1
 ${CLICKHOUSE_CLIENT} --query="INSERT INTO mutations_cleaner(x) VALUES (4)"
 sleep 0.1
 
+for i in {1..10}
+do
+
+    if [ $(${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' and table = 'mutations_cleaner'") -eq 2 ]; then
+        break
+    fi
+
+    if [[ $i -eq 100 ]]; then
+        echo "Timed out while waiting for outdated mutation record to be deleted!"
+    fi
+
+    sleep 1
+    ${CLICKHOUSE_CLIENT} --query="INSERT INTO mutations_cleaner(x) VALUES (4)"  
+done
+
 # Check that the first mutation is cleaned
 ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, is_done FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' and table = 'mutations_cleaner' ORDER BY mutation_id"
 
 ${CLICKHOUSE_CLIENT} --query="DROP TABLE mutations_cleaner"
+

From 16fd24fb1f82f109e3ac34665941c2acea7bf697 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Tue, 13 Aug 2024 13:51:24 +0000
Subject: [PATCH 247/265] Update fuzzer dictionary as per commit
 99282e526a8aeb175e9f3e69fc9385070d03798a

Also update README so that we have consistent behavior of sort
across macOS and Linux.
---
 tests/fuzz/README.md                          |    4 +-
 tests/fuzz/all.dict                           |  926 ++--
 tests/fuzz/dictionaries/datatypes.dict        | 4416 ++++++++++++++++-
 tests/fuzz/dictionaries/functions.dict        |  110 +
 .../{key_words.dict => keywords.dict}         |   25 +-
 5 files changed, 4895 insertions(+), 586 deletions(-)
 rename tests/fuzz/dictionaries/{key_words.dict => keywords.dict} (95%)

diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
index 6b5b161b2d5..576ad66ed93 100644
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@@ -13,11 +13,11 @@ The list of datatypes generated via the following query:
 The list of keywords generated via the following query:
 
 ```
-    clickhouse client -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > key_words.dict
+    clickhouse client -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > keywords.dict
 ```
 
 Then merge all dictionaries into one (all.dict)
 
 ```
-    cat ./dictionaries/* | sort | uniq > all.dict
+    cat ./dictionaries/* | LC_ALL=C sort | uniq > all.dict
 ```
\ No newline at end of file
diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict
index f08e319f0d4..1c3c657d6b0 100644
--- a/tests/fuzz/all.dict
+++ b/tests/fuzz/all.dict
@@ -4,26 +4,26 @@
 "accurateCastOrNull"
 "acos"
 "acosh"
-"ADD"
 "ADD COLUMN"
 "ADD CONSTRAINT"
+"ADD INDEX"
+"ADD PROJECTION"
+"ADD STATISTICS"
+"ADD"
 "addDate"
 "addDays"
 "addHours"
-"ADD INDEX"
 "addInterval"
 "addMicroseconds"
 "addMilliseconds"
 "addMinutes"
 "addMonths"
 "addNanoseconds"
-"ADD PROJECTION"
 "addQuarters"
 "addressToLine"
 "addressToLineWithInlines"
 "addressToSymbol"
 "addSeconds"
-"ADD STATISTIC"
 "addTupleOfIntervals"
 "addWeeks"
 "addYears"
@@ -53,7 +53,6 @@
 "ALL"
 "ALLOWED_LATENESS"
 "alphaTokens"
-"ALTER"
 "ALTER COLUMN"
 "ALTER DATABASE"
 "ALTER LIVE VIEW"
@@ -66,6 +65,7 @@
 "ALTER TABLE"
 "ALTER TEMPORARY TABLE"
 "ALTER USER"
+"ALTER"
 "analysisOfVariance"
 "analysisOfVarianceArgMax"
 "analysisOfVarianceArgMin"
@@ -81,9 +81,9 @@
 "analysisOfVarianceResample"
 "analysisOfVarianceSimpleState"
 "analysisOfVarianceState"
-"and"
-"AND"
 "AND STDOUT"
+"AND"
+"and"
 "anova"
 "anovaArgMax"
 "anovaArgMin"
@@ -100,8 +100,8 @@
 "anovaSimpleState"
 "anovaState"
 "ANTI"
-"any"
 "ANY"
+"any"
 "anyArgMax"
 "anyArgMin"
 "anyArray"
@@ -136,6 +136,8 @@
 "anyLastOrDefault"
 "anyLastOrNull"
 "anyLastResample"
+"anyLastSimpleState"
+"anyLastState"
 "anyLast_respect_nulls"
 "anyLast_respect_nullsArgMax"
 "anyLast_respect_nullsArgMin"
@@ -151,14 +153,14 @@
 "anyLast_respect_nullsResample"
 "anyLast_respect_nullsSimpleState"
 "anyLast_respect_nullsState"
-"anyLastSimpleState"
-"anyLastState"
 "anyMap"
 "anyMerge"
 "anyNull"
 "anyOrDefault"
 "anyOrNull"
 "anyResample"
+"anySimpleState"
+"anyState"
 "any_respect_nulls"
 "any_respect_nullsArgMax"
 "any_respect_nullsArgMin"
@@ -174,8 +176,6 @@
 "any_respect_nullsResample"
 "any_respect_nullsSimpleState"
 "any_respect_nullsState"
-"anySimpleState"
-"anyState"
 "any_value"
 "any_valueArgMax"
 "any_valueArgMin"
@@ -189,6 +189,8 @@
 "any_valueOrDefault"
 "any_valueOrNull"
 "any_valueResample"
+"any_valueSimpleState"
+"any_valueState"
 "any_value_respect_nulls"
 "any_value_respect_nullsArgMax"
 "any_value_respect_nullsArgMin"
@@ -204,12 +206,10 @@
 "any_value_respect_nullsResample"
 "any_value_respect_nullsSimpleState"
 "any_value_respect_nullsState"
-"any_valueSimpleState"
-"any_valueState"
 "APPEND"
 "appendTrailingCharIfAbsent"
-"APPLY"
 "APPLY DELETED MASK"
+"APPLY"
 "approx_top_count"
 "approx_top_countArgMax"
 "approx_top_countArgMin"
@@ -285,43 +285,14 @@
 "argMinResample"
 "argMinSimpleState"
 "argMinState"
-"array"
+"ARRAY JOIN"
 "Array"
-"array_agg"
-"array_aggArgMax"
-"array_aggArgMin"
-"array_aggArray"
-"array_aggDistinct"
-"array_aggForEach"
-"array_aggIf"
-"array_aggMap"
-"array_aggMerge"
-"array_aggNull"
-"array_aggOrDefault"
-"array_aggOrNull"
-"array_aggResample"
-"array_aggSimpleState"
-"array_aggState"
+"array"
 "arrayAll"
 "arrayAUC"
 "arrayAvg"
 "arrayCompact"
 "arrayConcat"
-"array_concat_agg"
-"array_concat_aggArgMax"
-"array_concat_aggArgMin"
-"array_concat_aggArray"
-"array_concat_aggDistinct"
-"array_concat_aggForEach"
-"array_concat_aggIf"
-"array_concat_aggMap"
-"array_concat_aggMerge"
-"array_concat_aggNull"
-"array_concat_aggOrDefault"
-"array_concat_aggOrNull"
-"array_concat_aggResample"
-"array_concat_aggSimpleState"
-"array_concat_aggState"
 "arrayCount"
 "arrayCumSum"
 "arrayCumSumNonNegative"
@@ -345,7 +316,6 @@
 "arrayIntersect"
 "arrayJaccardIndex"
 "arrayJoin"
-"ARRAY JOIN"
 "arrayLast"
 "arrayLastIndex"
 "arrayLastOrNull"
@@ -382,6 +352,36 @@
 "arrayUniq"
 "arrayWithConstant"
 "arrayZip"
+"array_agg"
+"array_aggArgMax"
+"array_aggArgMin"
+"array_aggArray"
+"array_aggDistinct"
+"array_aggForEach"
+"array_aggIf"
+"array_aggMap"
+"array_aggMerge"
+"array_aggNull"
+"array_aggOrDefault"
+"array_aggOrNull"
+"array_aggResample"
+"array_aggSimpleState"
+"array_aggState"
+"array_concat_agg"
+"array_concat_aggArgMax"
+"array_concat_aggArgMin"
+"array_concat_aggArray"
+"array_concat_aggDistinct"
+"array_concat_aggForEach"
+"array_concat_aggIf"
+"array_concat_aggMap"
+"array_concat_aggMerge"
+"array_concat_aggNull"
+"array_concat_aggOrDefault"
+"array_concat_aggOrNull"
+"array_concat_aggResample"
+"array_concat_aggSimpleState"
+"array_concat_aggState"
 "AS"
 "ASC"
 "ASCENDING"
@@ -396,7 +396,6 @@
 "atan"
 "atan2"
 "atanh"
-"ATTACH"
 "ATTACH PART"
 "ATTACH PARTITION"
 "ATTACH POLICY"
@@ -406,6 +405,7 @@
 "ATTACH ROW POLICY"
 "ATTACH SETTINGS PROFILE"
 "ATTACH USER"
+"ATTACH"
 "AUTO_INCREMENT"
 "avg"
 "avgArgMax"
@@ -445,39 +445,17 @@
 "base58Encode"
 "base64Decode"
 "base64Encode"
-"base_backup"
+"base64URLDecode"
+"base64URLEncode"
 "basename"
+"base_backup"
 "BCRYPT_HASH"
 "BCRYPT_PASSWORD"
 "BEGIN TRANSACTION"
 "BETWEEN"
 "BIDIRECTIONAL"
-"BIGINT"
-"BIGINT SIGNED"
-"BIGINT UNSIGNED"
 "bin"
-"BINARY"
-"BINARY LARGE OBJECT"
-"BINARY VARYING"
-"BIT"
 "bitAnd"
-"BIT_AND"
-"BIT_ANDArgMax"
-"BIT_ANDArgMin"
-"BIT_ANDArray"
-"BIT_ANDDistinct"
-"BIT_ANDForEach"
-"BIT_ANDIf"
-"BIT_ANDMap"
-"BIT_ANDMerge"
-"BIT_ANDNull"
-"BIT_ANDOrDefault"
-"BIT_ANDOrNull"
-"BIT_ANDResample"
-"BIT_ANDSimpleState"
-"BIT_ANDState"
-"__bitBoolMaskAnd"
-"__bitBoolMaskOr"
 "bitCount"
 "bitHammingDistance"
 "bitmapAnd"
@@ -503,6 +481,31 @@
 "bitmaskToList"
 "bitNot"
 "bitOr"
+"bitPositionsToArray"
+"bitRotateLeft"
+"bitRotateRight"
+"bitShiftLeft"
+"bitShiftRight"
+"bitSlice"
+"bitTest"
+"bitTestAll"
+"bitTestAny"
+"bitXor"
+"BIT_AND"
+"BIT_ANDArgMax"
+"BIT_ANDArgMin"
+"BIT_ANDArray"
+"BIT_ANDDistinct"
+"BIT_ANDForEach"
+"BIT_ANDIf"
+"BIT_ANDMap"
+"BIT_ANDMerge"
+"BIT_ANDNull"
+"BIT_ANDOrDefault"
+"BIT_ANDOrNull"
+"BIT_ANDResample"
+"BIT_ANDSimpleState"
+"BIT_ANDState"
 "BIT_OR"
 "BIT_ORArgMax"
 "BIT_ORArgMin"
@@ -518,18 +521,6 @@
 "BIT_ORResample"
 "BIT_ORSimpleState"
 "BIT_ORState"
-"bitPositionsToArray"
-"bitRotateLeft"
-"bitRotateRight"
-"bitShiftLeft"
-"bitShiftRight"
-"bitSlice"
-"__bitSwapLastTwo"
-"bitTest"
-"bitTestAll"
-"bitTestAny"
-"__bitWrapperFunc"
-"bitXor"
 "BIT_XOR"
 "BIT_XORArgMax"
 "BIT_XORArgMin"
@@ -546,13 +537,9 @@
 "BIT_XORSimpleState"
 "BIT_XORState"
 "BLAKE3"
-"BLOB"
 "blockNumber"
 "blockSerializedSize"
 "blockSize"
-"bool"
-"Bool"
-"boolean"
 "BOTH"
 "boundingRatio"
 "boundingRatioArgMax"
@@ -571,8 +558,6 @@
 "boundingRatioState"
 "buildId"
 "BY"
-"BYTE"
-"BYTEA"
 "byteHammingDistance"
 "byteSize"
 "byteSlice"
@@ -583,7 +568,6 @@
 "caseWithExpression"
 "caseWithoutExpr"
 "caseWithoutExpression"
-"_CAST"
 "CAST"
 "catboostEvaluate"
 "categoricalInformationValue"
@@ -607,36 +591,41 @@
 "CHANGE"
 "CHANGEABLE_IN_READONLY"
 "CHANGED"
-"char"
-"CHAR"
-"CHARACTER"
-"CHARACTER LARGE OBJECT"
-"CHARACTER_LENGTH"
-"CHARACTER VARYING"
-"CHAR LARGE OBJECT"
-"CHAR_LENGTH"
+"changeDay"
+"changeHour"
+"changeMinute"
+"changeMonth"
+"changeSecond"
+"changeYear"
 "CHAR VARYING"
-"CHECK"
+"CHAR"
+"char"
+"CHARACTER LARGE OBJECT"
+"CHARACTER VARYING"
+"CHARACTER"
+"CHARACTER_LENGTH"
+"CHAR_LENGTH"
 "CHECK ALL TABLES"
 "CHECK TABLE"
+"CHECK"
 "cityHash64"
+"clamp"
 "CLEANUP"
 "CLEAR COLUMN"
 "CLEAR INDEX"
 "CLEAR PROJECTION"
-"CLEAR STATISTIC"
-"CLOB"
+"CLEAR STATISTICS"
 "CLUSTER"
-"cluster_host_ids"
 "CLUSTERS"
+"cluster_host_ids"
 "CN"
 "coalesce"
 "CODEC"
 "COLLATE"
 "COLUMN"
 "COLUMNS"
-"COMMENT"
 "COMMENT COLUMN"
+"COMMENT"
 "COMMIT"
 "COMPRESSION"
 "concat"
@@ -644,8 +633,8 @@
 "concatWithSeparator"
 "concatWithSeparatorAssumeInjective"
 "concat_ws"
-"connection_id"
 "connectionId"
+"connection_id"
 "CONST"
 "CONSTRAINT"
 "contingency"
@@ -735,21 +724,13 @@
 "countSubstringsCaseInsensitive"
 "countSubstringsCaseInsensitiveUTF8"
 "covarPop"
-"COVAR_POP"
 "covarPopArgMax"
-"COVAR_POPArgMax"
 "covarPopArgMin"
-"COVAR_POPArgMin"
 "covarPopArray"
-"COVAR_POPArray"
 "covarPopDistinct"
-"COVAR_POPDistinct"
 "covarPopForEach"
-"COVAR_POPForEach"
 "covarPopIf"
-"COVAR_POPIf"
 "covarPopMap"
-"COVAR_POPMap"
 "covarPopMatrix"
 "covarPopMatrixArgMax"
 "covarPopMatrixArgMin"
@@ -766,17 +747,11 @@
 "covarPopMatrixSimpleState"
 "covarPopMatrixState"
 "covarPopMerge"
-"COVAR_POPMerge"
 "covarPopNull"
-"COVAR_POPNull"
 "covarPopOrDefault"
-"COVAR_POPOrDefault"
 "covarPopOrNull"
-"COVAR_POPOrNull"
 "covarPopResample"
-"COVAR_POPResample"
 "covarPopSimpleState"
-"COVAR_POPSimpleState"
 "covarPopStable"
 "covarPopStableArgMax"
 "covarPopStableArgMin"
@@ -793,23 +768,14 @@
 "covarPopStableSimpleState"
 "covarPopStableState"
 "covarPopState"
-"COVAR_POPState"
 "covarSamp"
-"COVAR_SAMP"
 "covarSampArgMax"
-"COVAR_SAMPArgMax"
 "covarSampArgMin"
-"COVAR_SAMPArgMin"
 "covarSampArray"
-"COVAR_SAMPArray"
 "covarSampDistinct"
-"COVAR_SAMPDistinct"
 "covarSampForEach"
-"COVAR_SAMPForEach"
 "covarSampIf"
-"COVAR_SAMPIf"
 "covarSampMap"
-"COVAR_SAMPMap"
 "covarSampMatrix"
 "covarSampMatrixArgMax"
 "covarSampMatrixArgMin"
@@ -826,17 +792,11 @@
 "covarSampMatrixSimpleState"
 "covarSampMatrixState"
 "covarSampMerge"
-"COVAR_SAMPMerge"
 "covarSampNull"
-"COVAR_SAMPNull"
 "covarSampOrDefault"
-"COVAR_SAMPOrDefault"
 "covarSampOrNull"
-"COVAR_SAMPOrNull"
 "covarSampResample"
-"COVAR_SAMPResample"
 "covarSampSimpleState"
-"COVAR_SAMPSimpleState"
 "covarSampStable"
 "covarSampStableArgMax"
 "covarSampStableArgMin"
@@ -853,6 +813,35 @@
 "covarSampStableSimpleState"
 "covarSampStableState"
 "covarSampState"
+"COVAR_POP"
+"COVAR_POPArgMax"
+"COVAR_POPArgMin"
+"COVAR_POPArray"
+"COVAR_POPDistinct"
+"COVAR_POPForEach"
+"COVAR_POPIf"
+"COVAR_POPMap"
+"COVAR_POPMerge"
+"COVAR_POPNull"
+"COVAR_POPOrDefault"
+"COVAR_POPOrNull"
+"COVAR_POPResample"
+"COVAR_POPSimpleState"
+"COVAR_POPState"
+"COVAR_SAMP"
+"COVAR_SAMPArgMax"
+"COVAR_SAMPArgMin"
+"COVAR_SAMPArray"
+"COVAR_SAMPDistinct"
+"COVAR_SAMPForEach"
+"COVAR_SAMPIf"
+"COVAR_SAMPMap"
+"COVAR_SAMPMerge"
+"COVAR_SAMPNull"
+"COVAR_SAMPOrDefault"
+"COVAR_SAMPOrNull"
+"COVAR_SAMPResample"
+"COVAR_SAMPSimpleState"
 "COVAR_SAMPState"
 "cramersV"
 "cramersVArgMax"
@@ -887,7 +876,6 @@
 "CRC32"
 "CRC32IEEE"
 "CRC64"
-"CREATE"
 "CREATE POLICY"
 "CREATE PROFILE"
 "CREATE QUOTA"
@@ -897,25 +885,27 @@
 "CREATE TABLE"
 "CREATE TEMPORARY TABLE"
 "CREATE USER"
+"CREATE"
 "CROSS"
 "CUBE"
 "curdate"
-"current_database"
-"currentDatabase"
-"current_date"
 "CURRENT GRANTS"
-"currentProfiles"
 "CURRENT QUOTA"
-"currentRoles"
 "CURRENT ROLES"
 "CURRENT ROW"
-"current_schemas"
-"currentSchemas"
-"current_timestamp"
 "CURRENT TRANSACTION"
-"currentUser"
-"CURRENT_USER"
+"currentDatabase"
+"currentProfiles"
+"currentRoles"
+"currentSchemas"
 "CURRENTUSER"
+"currentUser"
+"current_database"
+"current_date"
+"current_schemas"
+"current_timestamp"
+"CURRENT_USER"
+"current_user"
 "cutFragment"
 "cutIPv6"
 "cutQueryString"
@@ -932,27 +922,29 @@
 "cutWWW"
 "D"
 "damerauLevenshteinDistance"
+"DATA INNER UUID"
+"DATA"
 "DATABASE"
 "DATABASES"
-"Date"
 "DATE"
-"Date32"
-"DATE_ADD"
+"Date"
 "DATEADD"
-"date_diff"
-"dateDiff"
-"DATE_DIFF"
 "DATEDIFF"
-"DATE_FORMAT"
+"dateDiff"
 "dateName"
-"DATE_SUB"
 "DATESUB"
 "DateTime"
-"DateTime32"
 "DateTime64"
 "dateTime64ToSnowflake"
+"dateTime64ToSnowflakeID"
 "dateTimeToSnowflake"
+"dateTimeToSnowflakeID"
 "dateTrunc"
+"DATE_ADD"
+"DATE_DIFF"
+"date_diff"
+"DATE_FORMAT"
+"DATE_SUB"
 "DATE_TRUNC"
 "DAY"
 "DAYOFMONTH"
@@ -960,10 +952,8 @@
 "DAYOFYEAR"
 "DAYS"
 "DD"
-"DEC"
 "Decimal"
 "Decimal128"
-"Decimal256"
 "Decimal32"
 "Decimal64"
 "decodeHTMLComponent"
@@ -972,17 +962,17 @@
 "decodeXMLComponent"
 "decrypt"
 "DEDUPLICATE"
-"DEFAULT"
 "DEFAULT DATABASE"
-"defaultProfiles"
 "DEFAULT ROLE"
+"DEFAULT"
+"defaultProfiles"
 "defaultRoles"
 "defaultValueOfArgumentType"
 "defaultValueOfTypeName"
 "DEFINER"
 "degrees"
-"DELETE"
 "DELETE WHERE"
+"DELETE"
 "deltaSum"
 "deltaSumArgMax"
 "deltaSumArgMin"
@@ -1014,6 +1004,21 @@
 "deltaSumTimestampSimpleState"
 "deltaSumTimestampState"
 "demangle"
+"denseRank"
+"denseRankArgMax"
+"denseRankArgMin"
+"denseRankArray"
+"denseRankDistinct"
+"denseRankForEach"
+"denseRankIf"
+"denseRankMap"
+"denseRankMerge"
+"denseRankNull"
+"denseRankOrDefault"
+"denseRankOrNull"
+"denseRankResample"
+"denseRankSimpleState"
+"denseRankState"
 "dense_rank"
 "dense_rankArgMax"
 "dense_rankArgMin"
@@ -1033,9 +1038,9 @@
 "DESC"
 "DESCENDING"
 "DESCRIBE"
-"DETACH"
 "DETACH PART"
 "DETACH PARTITION"
+"DETACH"
 "detectCharset"
 "detectLanguage"
 "detectLanguageMixed"
@@ -1092,8 +1097,8 @@
 "distanceL2Squared"
 "distanceLinf"
 "distanceLp"
-"DISTINCT"
 "DISTINCT ON"
+"DISTINCT"
 "DIV"
 "divide"
 "divideDecimal"
@@ -1102,11 +1107,8 @@
 "domainWithoutWWW"
 "domainWithoutWWWRFC"
 "dotProduct"
-"DOUBLE"
-"DOUBLE PRECISION"
 "DOUBLE_SHA1_HASH"
 "DOUBLE_SHA1_PASSWORD"
-"DROP"
 "DROP COLUMN"
 "DROP CONSTRAINT"
 "DROP DEFAULT"
@@ -1116,15 +1118,20 @@
 "DROP PART"
 "DROP PARTITION"
 "DROP PROJECTION"
-"DROP STATISTIC"
+"DROP STATISTICS"
 "DROP TABLE"
 "DROP TEMPORARY TABLE"
+"DROP"
 "dumpColumnStructure"
+"dynamicElement"
+"dynamicType"
 "e"
 "editDistance"
+"editDistanceUTF8"
 "ELSE"
-"empty"
+"EMPTY AS"
 "EMPTY"
+"empty"
 "emptyArrayDate"
 "emptyArrayDateTime"
 "emptyArrayFloat32"
@@ -1139,10 +1146,9 @@
 "emptyArrayUInt32"
 "emptyArrayUInt64"
 "emptyArrayUInt8"
-"EMPTY AS"
+"ENABLED ROLES"
 "enabledProfiles"
 "enabledRoles"
-"ENABLED ROLES"
 "encodeURLComponent"
 "encodeURLFormComponent"
 "encodeXMLComponent"
@@ -1168,11 +1174,10 @@
 "entropySimpleState"
 "entropyState"
 "Enum"
-"ENUM"
 "Enum16"
 "Enum8"
-"EPHEMERAL"
 "EPHEMERAL SEQUENTIAL"
+"EPHEMERAL"
 "equals"
 "erf"
 "erfc"
@@ -1182,11 +1187,11 @@
 "EVENT"
 "EVENTS"
 "EVERY"
-"EXCEPT"
 "EXCEPT DATABASE"
 "EXCEPT DATABASES"
 "EXCEPT TABLE"
 "EXCEPT TABLES"
+"EXCEPT"
 "EXCHANGE DICTIONARIES"
 "EXCHANGE TABLES"
 "EXISTS"
@@ -1272,8 +1277,8 @@
 "EXPRESSION"
 "EXTENDED"
 "EXTERNAL DDL FROM"
-"extract"
 "EXTRACT"
+"extract"
 "extractAll"
 "extractAllGroups"
 "extractAllGroupsHorizontal"
@@ -1289,15 +1294,15 @@
 "FALSE"
 "farmFingerprint64"
 "farmHash64"
-"FETCH"
 "FETCH PART"
 "FETCH PARTITION"
+"FETCH"
 "FIELDS"
-"file"
 "FILE"
-"filesystemAvailable"
+"file"
 "FILESYSTEM CACHE"
 "FILESYSTEM CACHES"
+"filesystemAvailable"
 "filesystemCapacity"
 "filesystemUnreserved"
 "FILTER"
@@ -1322,6 +1327,8 @@
 "first_valueOrDefault"
 "first_valueOrNull"
 "first_valueResample"
+"first_valueSimpleState"
+"first_valueState"
 "first_value_respect_nulls"
 "first_value_respect_nullsArgMax"
 "first_value_respect_nullsArgMin"
@@ -1337,9 +1344,6 @@
 "first_value_respect_nullsResample"
 "first_value_respect_nullsSimpleState"
 "first_value_respect_nullsState"
-"first_valueSimpleState"
-"first_valueState"
-"FIXED"
 "FixedString"
 "flameGraph"
 "flameGraphArgMax"
@@ -1358,19 +1362,17 @@
 "flameGraphState"
 "flatten"
 "flattenTuple"
-"FLOAT"
 "Float32"
 "Float64"
 "floor"
 "FOLLOWING"
 "FOR"
 "ForEach"
-"FOREIGN"
 "FOREIGN KEY"
+"FOREIGN"
 "FORGET PARTITION"
-"format"
 "FORMAT"
-"FORMAT_BYTES"
+"format"
 "formatDateTime"
 "formatDateTimeInJodaSyntax"
 "formatQuery"
@@ -1383,26 +1385,27 @@
 "formatReadableTimeDelta"
 "formatRow"
 "formatRowNoNewline"
+"FORMAT_BYTES"
 "FQDN"
 "fragment"
 "FREEZE"
+"FROM INFILE"
+"FROM SHARD"
 "FROM"
-"FROM_BASE64"
-"FROM_DAYS"
 "fromDaysSinceYearZero"
 "fromDaysSinceYearZero32"
-"FROM INFILE"
 "fromModifiedJulianDay"
 "fromModifiedJulianDayOrNull"
-"FROM SHARD"
-"FROM_UNIXTIME"
 "fromUnixTimestamp"
 "fromUnixTimestamp64Micro"
 "fromUnixTimestamp64Milli"
 "fromUnixTimestamp64Nano"
 "fromUnixTimestampInJodaSyntax"
-"from_utc_timestamp"
 "fromUTCTimestamp"
+"FROM_BASE64"
+"FROM_DAYS"
+"FROM_UNIXTIME"
+"from_utc_timestamp"
 "FULL"
 "fullHostName"
 "FULLTEXT"
@@ -1411,44 +1414,46 @@
 "gccMurmurHash"
 "gcd"
 "generateRandomStructure"
+"generateSnowflakeID"
 "generateULID"
 "generateUUIDv4"
+"generateUUIDv7"
 "geoDistance"
 "geohashDecode"
 "geohashEncode"
 "geohashesInBox"
-"GEOMETRY"
 "geoToH3"
 "geoToS2"
+"getClientHTTPHeader"
 "getMacro"
 "getOSKernelVersion"
-"__getScalar"
 "getServerPort"
 "getSetting"
 "getSizeOfEnumType"
 "getSubcolumn"
 "getTypeSerializationStreams"
+"GLOBAL IN"
+"GLOBAL NOT IN"
 "GLOBAL"
 "globalIn"
-"GLOBAL IN"
 "globalInIgnoreSet"
 "globalNotIn"
-"GLOBAL NOT IN"
 "globalNotInIgnoreSet"
 "globalNotNullIn"
 "globalNotNullInIgnoreSet"
 "globalNullIn"
 "globalNullInIgnoreSet"
 "globalVariable"
+"GRANT OPTION FOR"
 "GRANT"
 "GRANTEES"
-"GRANT OPTION FOR"
 "GRANULARITY"
 "greatCircleAngle"
 "greatCircleDistance"
 "greater"
 "greaterOrEquals"
 "greatest"
+"GROUP BY"
 "groupArray"
 "groupArrayArgMax"
 "groupArrayArgMin"
@@ -1673,7 +1678,21 @@
 "groupBitXorResample"
 "groupBitXorSimpleState"
 "groupBitXorState"
-"GROUP BY"
+"groupConcat"
+"groupConcatArgMax"
+"groupConcatArgMin"
+"groupConcatArray"
+"groupConcatDistinct"
+"groupConcatForEach"
+"groupConcatIf"
+"groupConcatMap"
+"groupConcatMerge"
+"groupConcatNull"
+"groupConcatOrDefault"
+"groupConcatOrNull"
+"groupConcatResample"
+"groupConcatSimpleState"
+"groupConcatState"
 "GROUPING SETS"
 "GROUPS"
 "groupUniqArray"
@@ -1691,6 +1710,21 @@
 "groupUniqArrayResample"
 "groupUniqArraySimpleState"
 "groupUniqArrayState"
+"group_concat"
+"group_concatArgMax"
+"group_concatArgMin"
+"group_concatArray"
+"group_concatDistinct"
+"group_concatForEach"
+"group_concatIf"
+"group_concatMap"
+"group_concatMerge"
+"group_concatNull"
+"group_concatOrDefault"
+"group_concatOrNull"
+"group_concatResample"
+"group_concatSimpleState"
+"group_concatState"
 "H"
 "h3CellAreaM2"
 "h3CellAreaRads2"
@@ -1753,6 +1787,8 @@
 "hex"
 "HH"
 "HIERARCHICAL"
+"hilbertDecode"
+"hilbertEncode"
 "histogram"
 "histogramArgMax"
 "histogramArgMin"
@@ -1773,8 +1809,8 @@
 "hopEnd"
 "hopStart"
 "HOST"
-"hostname"
 "hostName"
+"hostname"
 "HOUR"
 "HOURS"
 "HTTP"
@@ -1784,25 +1820,24 @@
 "identity"
 "idnaDecode"
 "idnaEncode"
-"if"
 "IF EMPTY"
 "IF EXISTS"
 "IF NOT EXISTS"
+"if"
 "ifNotFinite"
 "ifNull"
-"ignore"
 "IGNORE NULLS"
-"ilike"
+"ignore"
 "ILIKE"
-"in"
+"ilike"
+"IN PARTITION"
 "IN"
+"in"
 "INDEX"
 "INDEXES"
 "indexHint"
 "indexOf"
 "INDICES"
-"INET4"
-"INET6"
 "INET6_ATON"
 "INET6_NTOA"
 "INET_ATON"
@@ -1812,28 +1847,18 @@
 "initcap"
 "initcapUTF8"
 "initializeAggregation"
-"initial_query_id"
 "initialQueryID"
+"initial_query_id"
 "INJECTIVE"
 "INNER"
-"IN PARTITION"
 "INSERT INTO"
 "instr"
-"INT"
-"INT1"
-"Int128"
 "Int16"
-"INT1 SIGNED"
-"INT1 UNSIGNED"
-"Int256"
 "Int32"
 "Int64"
 "Int8"
 "intDiv"
 "intDivOrZero"
-"INTEGER"
-"INTEGER SIGNED"
-"INTEGER UNSIGNED"
 "INTERPOLATE"
 "INTERSECT"
 "INTERVAL"
@@ -1854,11 +1879,8 @@
 "intervalLengthSumResample"
 "intervalLengthSumSimpleState"
 "intervalLengthSumState"
-"IntervalMicrosecond"
-"IntervalMillisecond"
 "IntervalMinute"
 "IntervalMonth"
-"IntervalNanosecond"
 "IntervalQuarter"
 "IntervalSecond"
 "IntervalWeek"
@@ -1868,12 +1890,9 @@
 "intHash32"
 "intHash64"
 "INTO OUTFILE"
-"INT SIGNED"
-"INT UNSIGNED"
 "INVISIBLE"
 "INVOKER"
 "IP"
-"IPv4"
 "IPv4CIDRToRange"
 "IPv4NumToString"
 "IPv4NumToStringClassC"
@@ -1881,12 +1900,14 @@
 "IPv4StringToNumOrDefault"
 "IPv4StringToNumOrNull"
 "IPv4ToIPv6"
-"IPv6"
 "IPv6CIDRToRange"
 "IPv6NumToString"
 "IPv6StringToNum"
 "IPv6StringToNumOrDefault"
 "IPv6StringToNumOrNull"
+"IS NOT DISTINCT FROM"
+"IS NOT NULL"
+"IS NULL"
 "isConstant"
 "isDecimalOverflow"
 "isFinite"
@@ -1896,16 +1917,13 @@
 "isIPv6String"
 "isNaN"
 "isNotDistinctFrom"
-"IS NOT DISTINCT FROM"
 "isNotNull"
-"IS NOT NULL"
 "isNull"
-"IS NULL"
 "isNullable"
-"IS_OBJECT_ID"
 "isValidJSON"
 "isValidUTF8"
 "isZeroOrNull"
+"IS_OBJECT_ID"
 "jaroSimilarity"
 "jaroWinklerSimilarity"
 "javaHash"
@@ -1913,10 +1931,7 @@
 "JOIN"
 "joinGet"
 "joinGetOrNull"
-"JSON"
 "JSONArrayLength"
-"JSON_ARRAY_LENGTH"
-"JSON_EXISTS"
 "JSONExtract"
 "JSONExtractArrayRaw"
 "JSONExtractBool"
@@ -1931,15 +1946,19 @@
 "JSONHas"
 "JSONKey"
 "JSONLength"
+"JSONMergePatch"
 "jsonMergePatch"
-"JSON_QUERY"
 "JSONType"
+"JSON_ARRAY_LENGTH"
+"JSON_EXISTS"
+"JSON_QUERY"
 "JSON_VALUE"
 "jumpConsistentHash"
+"JWT"
 "kafkaMurmurHash"
 "KERBEROS"
-"KEY"
 "KEY BY"
+"KEY"
 "KEYED BY"
 "KEYS"
 "KILL"
@@ -2046,6 +2065,8 @@
 "last_valueOrDefault"
 "last_valueOrNull"
 "last_valueResample"
+"last_valueSimpleState"
+"last_valueState"
 "last_value_respect_nulls"
 "last_value_respect_nullsArgMax"
 "last_value_respect_nullsArgMin"
@@ -2061,8 +2082,6 @@
 "last_value_respect_nullsResample"
 "last_value_respect_nullsSimpleState"
 "last_value_respect_nullsState"
-"last_valueSimpleState"
-"last_valueState"
 "LAYOUT"
 "lcase"
 "lcm"
@@ -2084,25 +2103,26 @@
 "leadInFrameState"
 "LEADING"
 "least"
-"left"
-"LEFT"
 "LEFT ARRAY JOIN"
+"LEFT"
+"left"
 "leftPad"
 "leftPadUTF8"
 "leftUTF8"
 "lemmatize"
 "length"
 "lengthUTF8"
+"LESS THAN"
 "less"
 "lessOrEquals"
-"LESS THAN"
 "LEVEL"
 "levenshteinDistance"
+"levenshteinDistanceUTF8"
 "lgamma"
 "LIFETIME"
 "LIGHTWEIGHT"
-"like"
 "LIKE"
+"like"
 "LIMIT"
 "LINEAR"
 "LinfDistance"
@@ -2118,8 +2138,6 @@
 "log1p"
 "log2"
 "logTrace"
-"LONGBLOB"
-"LONGTEXT"
 "LowCardinality"
 "lowCardinalityIndices"
 "lowCardinalityKeys"
@@ -2129,8 +2147,8 @@
 "LpDistance"
 "LpNorm"
 "LpNormalize"
-"ltrim"
 "LTRIM"
+"ltrim"
 "lttb"
 "lttbArgMax"
 "lttbArgMin"
@@ -2170,7 +2188,6 @@
 "mannWhitneyUTestSimpleState"
 "mannWhitneyUTestState"
 "map"
-"Map"
 "mapAdd"
 "mapAll"
 "mapApply"
@@ -2181,7 +2198,6 @@
 "mapExtractKeyLike"
 "mapFilter"
 "mapFromArrays"
-"MAP_FROM_ARRAYS"
 "mapFromString"
 "mapKeys"
 "mapPartialReverseSort"
@@ -2192,18 +2208,19 @@
 "mapSubtract"
 "mapUpdate"
 "mapValues"
-"match"
+"MAP_FROM_ARRAYS"
 "MATCH"
-"materialize"
-"MATERIALIZE"
+"match"
 "MATERIALIZE COLUMN"
-"MATERIALIZED"
 "MATERIALIZE INDEX"
 "MATERIALIZE PROJECTION"
-"MATERIALIZE STATISTIC"
+"MATERIALIZE STATISTICS"
 "MATERIALIZE TTL"
-"max"
+"MATERIALIZE"
+"materialize"
+"MATERIALIZED"
 "MAX"
+"max"
 "max2"
 "maxArgMax"
 "maxArgMin"
@@ -2507,14 +2524,11 @@
 "medianTimingWeightedResample"
 "medianTimingWeightedSimpleState"
 "medianTimingWeightedState"
-"MEDIUMBLOB"
-"MEDIUMINT"
-"MEDIUMINT SIGNED"
-"MEDIUMINT UNSIGNED"
-"MEDIUMTEXT"
 "MEMORY"
 "Merge"
 "MERGES"
+"METRICS INNER UUID"
+"METRICS"
 "metroHash64"
 "MI"
 "MICROSECOND"
@@ -2522,8 +2536,8 @@
 "mid"
 "MILLISECOND"
 "MILLISECONDS"
-"min"
 "MIN"
+"min"
 "min2"
 "minArgMax"
 "minArgMin"
@@ -2562,18 +2576,20 @@
 "MINUTES"
 "mismatches"
 "MM"
-"mod"
 "MOD"
-"MODIFY"
+"mod"
 "MODIFY COLUMN"
 "MODIFY COMMENT"
+"MODIFY DEFINER"
 "MODIFY ORDER BY"
 "MODIFY QUERY"
 "MODIFY REFRESH"
 "MODIFY SAMPLE BY"
 "MODIFY SETTING"
 "MODIFY SQL SECURITY"
+"MODIFY STATISTICS"
 "MODIFY TTL"
+"MODIFY"
 "modulo"
 "moduloLegacy"
 "moduloOrZero"
@@ -2582,9 +2598,9 @@
 "MONTHS"
 "mortonDecode"
 "mortonEncode"
-"MOVE"
 "MOVE PART"
 "MOVE PARTITION"
+"MOVE"
 "movingXXX"
 "MS"
 "multiFuzzyMatchAllIndices"
@@ -2596,7 +2612,6 @@
 "multiMatchAnyIndex"
 "multiply"
 "multiplyDecimal"
-"MultiPolygon"
 "multiSearchAllPositions"
 "multiSearchAllPositionsCaseInsensitive"
 "multiSearchAllPositionsCaseInsensitiveUTF8"
@@ -2624,18 +2639,10 @@
 "NAMED COLLECTION"
 "NANOSECOND"
 "NANOSECONDS"
-"NATIONAL CHAR"
-"NATIONAL CHARACTER"
-"NATIONAL CHARACTER LARGE OBJECT"
-"NATIONAL CHARACTER VARYING"
-"NATIONAL CHAR VARYING"
-"NCHAR"
-"NCHAR LARGE OBJECT"
-"NCHAR VARYING"
 "negate"
 "neighbor"
-"nested"
 "Nested"
+"nested"
 "netloc"
 "NEXT"
 "ngramDistance"
@@ -2678,7 +2685,6 @@
 "nonNegativeDerivativeResample"
 "nonNegativeDerivativeSimpleState"
 "nonNegativeDerivativeState"
-"NO_PASSWORD"
 "normalizedQueryHash"
 "normalizedQueryHashKeepNames"
 "normalizeL1"
@@ -2696,13 +2702,19 @@
 "normL2Squared"
 "normLinf"
 "normLp"
-"not"
-"NOT"
 "NOT BETWEEN"
+"NOT IDENTIFIED"
+"NOT ILIKE"
+"NOT IN"
+"NOT KEYED"
+"NOT LIKE"
+"NOT OVERRIDABLE"
+"NOT"
+"not"
 "notEmpty"
 "notEquals"
-"nothing"
 "Nothing"
+"nothing"
 "nothingArgMax"
 "nothingArgMin"
 "nothingArray"
@@ -2746,21 +2758,16 @@
 "nothingUInt64Resample"
 "nothingUInt64SimpleState"
 "nothingUInt64State"
-"NOT IDENTIFIED"
 "notILike"
-"NOT ILIKE"
 "notIn"
-"NOT IN"
 "notInIgnoreSet"
-"NOT KEYED"
 "notLike"
-"NOT LIKE"
 "notNullIn"
 "notNullInIgnoreSet"
-"NOT OVERRIDABLE"
 "now"
 "now64"
 "nowInBlock"
+"NO_PASSWORD"
 "NS"
 "nth_value"
 "nth_valueArgMax"
@@ -2792,28 +2799,25 @@
 "ntileResample"
 "ntileSimpleState"
 "ntileState"
-"Null"
 "NULL"
+"Null"
 "Nullable"
 "nullIf"
 "nullIn"
 "nullInIgnoreSet"
 "NULLS"
-"NUMERIC"
-"NVARCHAR"
-"Object"
 "OCTET_LENGTH"
 "OFFSET"
-"ON"
 "ON DELETE"
-"ONLY"
 "ON UPDATE"
 "ON VOLUME"
+"ON"
+"ONLY"
 "OPTIMIZE TABLE"
-"or"
-"OR"
-"ORDER BY"
 "OR REPLACE"
+"OR"
+"or"
+"ORDER BY"
 "OUTER"
 "OVER"
 "OVERRIDABLE"
@@ -2838,32 +2842,64 @@
 "parseDateTimeInJodaSyntaxOrZero"
 "parseDateTimeOrNull"
 "parseDateTimeOrZero"
+"parseReadableSize"
+"parseReadableSizeOrNull"
+"parseReadableSizeOrZero"
 "parseTimeDelta"
 "PART"
 "PARTIAL"
-"PARTITION"
 "PARTITION BY"
+"PARTITION"
+"partitionID"
 "partitionId"
 "PARTITIONS"
 "PART_MOVE_TO_SHARD"
 "PASTE"
 "path"
 "pathFull"
+"percentRank"
+"percentRankArgMax"
+"percentRankArgMin"
+"percentRankArray"
+"percentRankDistinct"
+"percentRankForEach"
+"percentRankIf"
+"percentRankMap"
+"percentRankMerge"
+"percentRankNull"
+"percentRankOrDefault"
+"percentRankOrNull"
+"percentRankResample"
+"percentRankSimpleState"
+"percentRankState"
+"percent_rank"
+"percent_rankArgMax"
+"percent_rankArgMin"
+"percent_rankArray"
+"percent_rankDistinct"
+"percent_rankForEach"
+"percent_rankIf"
+"percent_rankMap"
+"percent_rankMerge"
+"percent_rankNull"
+"percent_rankOrDefault"
+"percent_rankOrNull"
+"percent_rankResample"
+"percent_rankSimpleState"
+"percent_rankState"
 "PERIODIC REFRESH"
 "PERMANENTLY"
 "PERMISSIVE"
-"PERSISTENT"
 "PERSISTENT SEQUENTIAL"
+"PERSISTENT"
 "pi"
 "PIPELINE"
 "PLAINTEXT_PASSWORD"
 "PLAN"
 "plus"
 "pmod"
-"Point"
 "pointInEllipses"
 "pointInPolygon"
-"Polygon"
 "polygonAreaCartesian"
 "polygonAreaSpherical"
 "polygonConvexHullCartesian"
@@ -2887,15 +2923,16 @@
 "positionCaseInsensitive"
 "positionCaseInsensitiveUTF8"
 "positionUTF8"
-"positive_modulo"
 "positiveModulo"
+"positive_modulo"
 "pow"
 "power"
 "PRECEDING"
 "PRECISION"
 "PREWHERE"
-"PRIMARY"
 "PRIMARY KEY"
+"PRIMARY"
+"printf"
 "PROFILE"
 "PROJECTION"
 "proportionsZTest"
@@ -2906,6 +2943,7 @@
 "punycodeEncode"
 "Q"
 "QQ"
+"QUALIFY"
 "quantile"
 "quantileArgMax"
 "quantileArgMin"
@@ -3418,12 +3456,12 @@
 "quantileTimingWeightedState"
 "QUARTER"
 "QUARTERS"
+"QUERY TREE"
 "QUERY"
-"query_id"
 "queryID"
 "queryString"
 "queryStringAndFragment"
-"QUERY TREE"
+"query_id"
 "QUOTA"
 "radians"
 "rand"
@@ -3440,16 +3478,16 @@
 "randNegativeBinomial"
 "randNormal"
 "randomFixedString"
-"RANDOMIZED"
 "RANDOMIZE FOR"
+"RANDOMIZED"
 "randomPrintableASCII"
 "randomString"
 "randomStringUTF8"
 "randPoisson"
 "randStudentT"
 "randUniform"
-"range"
 "RANGE"
+"range"
 "rank"
 "rankArgMax"
 "rankArgMin"
@@ -3481,20 +3519,22 @@
 "rankSimpleState"
 "rankState"
 "READONLY"
+"readWKTLineString"
+"readWKTMultiLineString"
 "readWKTMultiPolygon"
 "readWKTPoint"
 "readWKTPolygon"
 "readWKTRing"
-"REAL"
 "REALM"
 "RECOMPRESS"
+"RECURSIVE"
 "REFERENCES"
 "REFRESH"
 "REGEXP"
 "regexpExtract"
+"regexpQuoteMeta"
 "REGEXP_EXTRACT"
 "REGEXP_MATCHES"
-"regexpQuoteMeta"
 "REGEXP_REPLACE"
 "regionHierarchy"
 "regionIn"
@@ -3526,21 +3566,21 @@
 "reinterpretAsUInt64"
 "reinterpretAsUInt8"
 "reinterpretAsUUID"
-"REMOVE"
 "REMOVE SAMPLE BY"
 "REMOVE TTL"
-"RENAME"
+"REMOVE"
 "RENAME COLUMN"
 "RENAME DATABASE"
 "RENAME DICTIONARY"
 "RENAME TABLE"
 "RENAME TO"
+"RENAME"
 "repeat"
-"replace"
+"REPLACE PARTITION"
 "REPLACE"
+"replace"
 "replaceAll"
 "replaceOne"
-"REPLACE PARTITION"
 "replaceRegexpAll"
 "replaceRegexpOne"
 "replicate"
@@ -3570,12 +3610,11 @@
 "reverseUTF8"
 "revision"
 "REVOKE"
-"right"
 "RIGHT"
+"right"
 "rightPad"
 "rightPadUTF8"
 "rightUTF8"
-"Ring"
 "ROLLBACK"
 "ROLLUP"
 "round"
@@ -3585,6 +3624,9 @@
 "roundDuration"
 "roundToExp2"
 "ROW"
+"rowNumberInAllBlocks"
+"rowNumberInBlock"
+"ROWS"
 "row_number"
 "row_numberArgMax"
 "row_numberArgMin"
@@ -3592,8 +3634,6 @@
 "row_numberDistinct"
 "row_numberForEach"
 "row_numberIf"
-"rowNumberInAllBlocks"
-"rowNumberInBlock"
 "row_numberMap"
 "row_numberMerge"
 "row_numberNull"
@@ -3602,10 +3642,9 @@
 "row_numberResample"
 "row_numberSimpleState"
 "row_numberState"
-"ROWS"
 "rpad"
-"rtrim"
 "RTRIM"
+"rtrim"
 "runningAccumulate"
 "runningConcurrency"
 "runningDifference"
@@ -3622,10 +3661,10 @@
 "s2ToGeo"
 "S3"
 "SALT"
-"SAMPLE"
 "SAMPLE BY"
+"SAMPLE"
+"SAN"
 "scalarProduct"
-"__scalarSubqueryResult"
 "SCHEMA"
 "SCHEME"
 "SECOND"
@@ -3681,18 +3720,18 @@
 "seriesOutliersDetectTukey"
 "seriesPeriodDetectFFT"
 "SERVER"
-"serverTimezone"
 "serverTimeZone"
+"serverTimezone"
 "serverUUID"
-"SET"
-"SET DEFAULT"
 "SET DEFAULT ROLE"
+"SET DEFAULT"
 "SET FAKE TIME"
 "SET NULL"
-"SET ROLE"
 "SET ROLE DEFAULT"
-"SETTINGS"
+"SET ROLE"
 "SET TRANSACTION SNAPSHOT"
+"SET"
+"SETTINGS"
 "SHA1"
 "SHA224"
 "SHA256"
@@ -3703,9 +3742,7 @@
 "SHA512_256"
 "shardCount"
 "shardNum"
-"SHOW"
 "SHOW ACCESS"
-"showCertificate"
 "SHOW CREATE"
 "SHOW ENGINES"
 "SHOW FUNCTIONS"
@@ -3713,11 +3750,12 @@
 "SHOW PRIVILEGES"
 "SHOW PROCESSLIST"
 "SHOW SETTING"
+"SHOW"
+"showCertificate"
 "sigmoid"
 "sign"
 "SIGNED"
 "SIMPLE"
-"SimpleAggregateFunction"
 "simpleJSONExtractBool"
 "simpleJSONExtractFloat"
 "simpleJSONExtractInt"
@@ -3741,7 +3779,6 @@
 "simpleLinearRegressionSimpleState"
 "simpleLinearRegressionState"
 "sin"
-"SINGLE"
 "singleValueOrNull"
 "singleValueOrNullArgMax"
 "singleValueOrNullArgMin"
@@ -3796,44 +3833,43 @@
 "skewSampState"
 "sleep"
 "sleepEachRow"
-"SMALLINT"
-"SMALLINT SIGNED"
-"SMALLINT UNSIGNED"
+"snowflakeIDToDateTime"
+"snowflakeIDToDateTime64"
 "snowflakeToDateTime"
 "snowflakeToDateTime64"
 "soundex"
 "SOURCE"
 "space"
-"sparkbar"
 "sparkBar"
-"sparkbarArgMax"
+"sparkbar"
 "sparkBarArgMax"
-"sparkbarArgMin"
+"sparkbarArgMax"
 "sparkBarArgMin"
-"sparkbarArray"
+"sparkbarArgMin"
 "sparkBarArray"
-"sparkbarDistinct"
+"sparkbarArray"
 "sparkBarDistinct"
-"sparkbarForEach"
+"sparkbarDistinct"
 "sparkBarForEach"
-"sparkbarIf"
+"sparkbarForEach"
 "sparkBarIf"
-"sparkbarMap"
+"sparkbarIf"
 "sparkBarMap"
-"sparkbarMerge"
+"sparkbarMap"
 "sparkBarMerge"
-"sparkbarNull"
+"sparkbarMerge"
 "sparkBarNull"
-"sparkbarOrDefault"
+"sparkbarNull"
 "sparkBarOrDefault"
-"sparkbarOrNull"
+"sparkbarOrDefault"
 "sparkBarOrNull"
-"sparkbarResample"
+"sparkbarOrNull"
 "sparkBarResample"
-"sparkbarSimpleState"
+"sparkbarResample"
 "sparkBarSimpleState"
-"sparkbarState"
+"sparkbarSimpleState"
 "sparkBarState"
+"sparkbarState"
 "SPATIAL"
 "splitByAlpha"
 "splitByChar"
@@ -3860,42 +3896,29 @@
 "SS"
 "SSH_KEY"
 "SSL_CERTIFICATE"
+"START TRANSACTION"
 "startsWith"
 "startsWithUTF8"
 "State"
-"STATISTIC"
+"STATISTICS"
 "STD"
 "STDArgMax"
 "STDArgMin"
 "STDArray"
 "stddevPop"
-"STDDEV_POP"
 "stddevPopArgMax"
-"STDDEV_POPArgMax"
 "stddevPopArgMin"
-"STDDEV_POPArgMin"
 "stddevPopArray"
-"STDDEV_POPArray"
 "stddevPopDistinct"
-"STDDEV_POPDistinct"
 "stddevPopForEach"
-"STDDEV_POPForEach"
 "stddevPopIf"
-"STDDEV_POPIf"
 "stddevPopMap"
-"STDDEV_POPMap"
 "stddevPopMerge"
-"STDDEV_POPMerge"
 "stddevPopNull"
-"STDDEV_POPNull"
 "stddevPopOrDefault"
-"STDDEV_POPOrDefault"
 "stddevPopOrNull"
-"STDDEV_POPOrNull"
 "stddevPopResample"
-"STDDEV_POPResample"
 "stddevPopSimpleState"
-"STDDEV_POPSimpleState"
 "stddevPopStable"
 "stddevPopStableArgMax"
 "stddevPopStableArgMin"
@@ -3912,35 +3935,20 @@
 "stddevPopStableSimpleState"
 "stddevPopStableState"
 "stddevPopState"
-"STDDEV_POPState"
 "stddevSamp"
-"STDDEV_SAMP"
 "stddevSampArgMax"
-"STDDEV_SAMPArgMax"
 "stddevSampArgMin"
-"STDDEV_SAMPArgMin"
 "stddevSampArray"
-"STDDEV_SAMPArray"
 "stddevSampDistinct"
-"STDDEV_SAMPDistinct"
 "stddevSampForEach"
-"STDDEV_SAMPForEach"
 "stddevSampIf"
-"STDDEV_SAMPIf"
 "stddevSampMap"
-"STDDEV_SAMPMap"
 "stddevSampMerge"
-"STDDEV_SAMPMerge"
 "stddevSampNull"
-"STDDEV_SAMPNull"
 "stddevSampOrDefault"
-"STDDEV_SAMPOrDefault"
 "stddevSampOrNull"
-"STDDEV_SAMPOrNull"
 "stddevSampResample"
-"STDDEV_SAMPResample"
 "stddevSampSimpleState"
-"STDDEV_SAMPSimpleState"
 "stddevSampStable"
 "stddevSampStableArgMax"
 "stddevSampStableArgMin"
@@ -3957,6 +3965,35 @@
 "stddevSampStableSimpleState"
 "stddevSampStableState"
 "stddevSampState"
+"STDDEV_POP"
+"STDDEV_POPArgMax"
+"STDDEV_POPArgMin"
+"STDDEV_POPArray"
+"STDDEV_POPDistinct"
+"STDDEV_POPForEach"
+"STDDEV_POPIf"
+"STDDEV_POPMap"
+"STDDEV_POPMerge"
+"STDDEV_POPNull"
+"STDDEV_POPOrDefault"
+"STDDEV_POPOrNull"
+"STDDEV_POPResample"
+"STDDEV_POPSimpleState"
+"STDDEV_POPState"
+"STDDEV_SAMP"
+"STDDEV_SAMPArgMax"
+"STDDEV_SAMPArgMin"
+"STDDEV_SAMPArray"
+"STDDEV_SAMPDistinct"
+"STDDEV_SAMPForEach"
+"STDDEV_SAMPIf"
+"STDDEV_SAMPMap"
+"STDDEV_SAMPMerge"
+"STDDEV_SAMPNull"
+"STDDEV_SAMPOrDefault"
+"STDDEV_SAMPOrNull"
+"STDDEV_SAMPResample"
+"STDDEV_SAMPSimpleState"
 "STDDEV_SAMPState"
 "STDDistinct"
 "STDForEach"
@@ -4008,10 +4045,10 @@
 "stringJaccardIndex"
 "stringJaccardIndexUTF8"
 "stringToH3"
-"str_to_date"
-"str_to_map"
 "structureToCapnProtoSchema"
 "structureToProtobufSchema"
+"str_to_date"
+"str_to_map"
 "studentTTest"
 "studentTTestArgMax"
 "studentTTestArgMin"
@@ -4029,16 +4066,16 @@
 "studentTTestState"
 "subBitmap"
 "subDate"
-"SUBPARTITION"
 "SUBPARTITION BY"
+"SUBPARTITION"
 "SUBPARTITIONS"
 "substr"
-"substring"
 "SUBSTRING"
+"substring"
 "substringIndex"
-"SUBSTRING_INDEX"
 "substringIndexUTF8"
 "substringUTF8"
+"SUBSTRING_INDEX"
 "subtractDays"
 "subtractHours"
 "subtractInterval"
@@ -4173,22 +4210,23 @@
 "sumWithOverflowSimpleState"
 "sumWithOverflowState"
 "SUSPEND"
-"svg"
 "SVG"
+"svg"
 "SYNC"
 "synonyms"
 "SYNTAX"
 "SYSTEM"
-"TABLE"
 "TABLE OVERRIDE"
+"TABLE"
 "TABLES"
+"TAGS INNER UUID"
+"TAGS"
 "tan"
 "tanh"
 "tcpPort"
-"TEMPORARY"
 "TEMPORARY TABLE"
+"TEMPORARY"
 "TEST"
-"TEXT"
 "tgamma"
 "theilsU"
 "theilsUArgMax"
@@ -4208,33 +4246,31 @@
 "THEN"
 "throwIf"
 "tid"
-"TIME"
 "timeDiff"
 "timeSlot"
 "timeSlots"
-"timestamp"
 "TIMESTAMP"
-"TIMESTAMP_ADD"
+"timestamp"
 "TIMESTAMPADD"
-"timestamp_diff"
-"timestampDiff"
-"TIMESTAMP_DIFF"
 "TIMESTAMPDIFF"
-"TIMESTAMP_SUB"
+"timestampDiff"
 "TIMESTAMPSUB"
-"timezone"
+"TIMESTAMP_ADD"
+"TIMESTAMP_DIFF"
+"timestamp_diff"
+"TIMESTAMP_SUB"
 "timeZone"
-"timezoneOf"
+"timezone"
 "timeZoneOf"
-"timezoneOffset"
+"timezoneOf"
 "timeZoneOffset"
-"TINYBLOB"
-"TINYINT"
-"TINYINT SIGNED"
-"TINYINT UNSIGNED"
-"TINYTEXT"
+"timezoneOffset"
+"TO DISK"
+"TO INNER UUID"
+"TO SHARD"
+"TO TABLE"
+"TO VOLUME"
 "TO"
-"TO_BASE64"
 "toBool"
 "toColumnTypeName"
 "toDate"
@@ -4258,7 +4294,6 @@
 "toDayOfMonth"
 "toDayOfWeek"
 "toDayOfYear"
-"TO_DAYS"
 "toDaysSinceYearZero"
 "toDecimal128"
 "toDecimal128OrDefault"
@@ -4277,7 +4312,6 @@
 "toDecimal64OrNull"
 "toDecimal64OrZero"
 "toDecimalString"
-"TO DISK"
 "toFixedString"
 "toFloat32"
 "toFloat32OrDefault"
@@ -4288,7 +4322,6 @@
 "toFloat64OrNull"
 "toFloat64OrZero"
 "toHour"
-"TO INNER UUID"
 "toInt128"
 "toInt128OrDefault"
 "toInt128OrNull"
@@ -4389,7 +4422,6 @@
 "toRelativeWeekNum"
 "toRelativeYearNum"
 "toSecond"
-"TO SHARD"
 "toStartOfDay"
 "toStartOfFifteenMinutes"
 "toStartOfFiveMinute"
@@ -4409,11 +4441,10 @@
 "toStartOfYear"
 "toString"
 "toStringCutToZero"
-"TO TABLE"
 "TOTALS"
 "toTime"
-"toTimezone"
 "toTimeZone"
+"toTimezone"
 "toTypeName"
 "toUInt128"
 "toUInt128OrDefault"
@@ -4439,25 +4470,26 @@
 "toUInt8OrDefault"
 "toUInt8OrNull"
 "toUInt8OrZero"
-"TO_UNIXTIME"
 "toUnixTimestamp"
 "toUnixTimestamp64Micro"
 "toUnixTimestamp64Milli"
 "toUnixTimestamp64Nano"
-"to_utc_timestamp"
 "toUTCTimestamp"
 "toUUID"
 "toUUIDOrDefault"
 "toUUIDOrNull"
 "toUUIDOrZero"
 "toValidUTF8"
-"TO VOLUME"
 "toWeek"
 "toYear"
 "toYearWeek"
 "toYYYYMM"
 "toYYYYMMDD"
 "toYYYYMMDDhhmmss"
+"TO_BASE64"
+"TO_DAYS"
+"TO_UNIXTIME"
+"to_utc_timestamp"
 "TRACKING ONLY"
 "TRAILING"
 "TRANSACTION"
@@ -4468,17 +4500,18 @@
 "translate"
 "translateUTF8"
 "TRIGGER"
-"trim"
 "TRIM"
+"trim"
 "trimBoth"
 "trimLeft"
 "trimRight"
 "TRUE"
 "trunc"
-"truncate"
 "TRUNCATE"
+"truncate"
 "tryBase58Decode"
 "tryBase64Decode"
+"tryBase64URLDecode"
 "tryDecrypt"
 "tryIdnaEncode"
 "tryPunycodeDecode"
@@ -4486,8 +4519,8 @@
 "tumble"
 "tumbleEnd"
 "tumbleStart"
-"tuple"
 "Tuple"
+"tuple"
 "tupleConcat"
 "tupleDivide"
 "tupleDivideByNumber"
@@ -4502,15 +4535,14 @@
 "tupleModuloByNumber"
 "tupleMultiply"
 "tupleMultiplyByNumber"
+"tupleNames"
 "tupleNegate"
 "tuplePlus"
 "tupleToNameValuePairs"
 "TYPE"
 "TYPEOF"
 "ucase"
-"UInt128"
 "UInt16"
-"UInt256"
 "UInt32"
 "UInt64"
 "UInt8"
@@ -4643,48 +4675,32 @@
 "USE"
 "user"
 "USING"
-"UTC_timestamp"
 "UTCTimestamp"
+"UTC_timestamp"
 "UUID"
 "UUIDNumToString"
 "UUIDStringToNum"
-"validateNestedArraySizes"
+"UUIDToNum"
+"UUIDv7ToDateTime"
 "VALID UNTIL"
+"validateNestedArraySizes"
 "VALUES"
-"VARBINARY"
-"VARCHAR"
-"VARCHAR2"
-"Variant"
 "variantElement"
 "variantType"
 "varPop"
-"VAR_POP"
 "varPopArgMax"
-"VAR_POPArgMax"
 "varPopArgMin"
-"VAR_POPArgMin"
 "varPopArray"
-"VAR_POPArray"
 "varPopDistinct"
-"VAR_POPDistinct"
 "varPopForEach"
-"VAR_POPForEach"
 "varPopIf"
-"VAR_POPIf"
 "varPopMap"
-"VAR_POPMap"
 "varPopMerge"
-"VAR_POPMerge"
 "varPopNull"
-"VAR_POPNull"
 "varPopOrDefault"
-"VAR_POPOrDefault"
 "varPopOrNull"
-"VAR_POPOrNull"
 "varPopResample"
-"VAR_POPResample"
 "varPopSimpleState"
-"VAR_POPSimpleState"
 "varPopStable"
 "varPopStableArgMax"
 "varPopStableArgMin"
@@ -4701,35 +4717,20 @@
 "varPopStableSimpleState"
 "varPopStableState"
 "varPopState"
-"VAR_POPState"
 "varSamp"
-"VAR_SAMP"
 "varSampArgMax"
-"VAR_SAMPArgMax"
 "varSampArgMin"
-"VAR_SAMPArgMin"
 "varSampArray"
-"VAR_SAMPArray"
 "varSampDistinct"
-"VAR_SAMPDistinct"
 "varSampForEach"
-"VAR_SAMPForEach"
 "varSampIf"
-"VAR_SAMPIf"
 "varSampMap"
-"VAR_SAMPMap"
 "varSampMerge"
-"VAR_SAMPMerge"
 "varSampNull"
-"VAR_SAMPNull"
 "varSampOrDefault"
-"VAR_SAMPOrDefault"
 "varSampOrNull"
-"VAR_SAMPOrNull"
 "varSampResample"
-"VAR_SAMPResample"
 "varSampSimpleState"
-"VAR_SAMPSimpleState"
 "varSampStable"
 "varSampStableArgMax"
 "varSampStableArgMin"
@@ -4746,8 +4747,37 @@
 "varSampStableSimpleState"
 "varSampStableState"
 "varSampState"
-"VAR_SAMPState"
 "VARYING"
+"VAR_POP"
+"VAR_POPArgMax"
+"VAR_POPArgMin"
+"VAR_POPArray"
+"VAR_POPDistinct"
+"VAR_POPForEach"
+"VAR_POPIf"
+"VAR_POPMap"
+"VAR_POPMerge"
+"VAR_POPNull"
+"VAR_POPOrDefault"
+"VAR_POPOrNull"
+"VAR_POPResample"
+"VAR_POPSimpleState"
+"VAR_POPState"
+"VAR_SAMP"
+"VAR_SAMPArgMax"
+"VAR_SAMPArgMin"
+"VAR_SAMPArray"
+"VAR_SAMPDistinct"
+"VAR_SAMPForEach"
+"VAR_SAMPIf"
+"VAR_SAMPMap"
+"VAR_SAMPMerge"
+"VAR_SAMPNull"
+"VAR_SAMPOrDefault"
+"VAR_SAMPOrNull"
+"VAR_SAMPResample"
+"VAR_SAMPSimpleState"
+"VAR_SAMPState"
 "vectorDifference"
 "vectorSum"
 "version"
@@ -4763,8 +4793,8 @@
 "visitParamHas"
 "WATCH"
 "WATERMARK"
-"week"
 "WEEK"
+"week"
 "WEEKS"
 "welchTTest"
 "welchTTestArgMax"
@@ -4783,8 +4813,8 @@
 "welchTTestState"
 "WHEN"
 "WHERE"
-"width_bucket"
 "widthBucket"
+"width_bucket"
 "WINDOW"
 "windowFunnel"
 "windowFunnelArgMax"
@@ -4802,15 +4832,15 @@
 "windowFunnelSimpleState"
 "windowFunnelState"
 "windowID"
-"WITH"
 "WITH ADMIN OPTION"
 "WITH CHECK"
 "WITH FILL"
 "WITH GRANT OPTION"
-"with_itemindex"
 "WITH NAME"
 "WITH REPLACE OPTION"
 "WITH TIES"
+"WITH"
+"WITH_ITEMINDEX"
 "WK"
 "wkt"
 "wordShingleMinHash"
@@ -4845,3 +4875,11 @@
 "YYYYMMDDToDate32"
 "ZKPATH"
 "zookeeperSessionUptime"
+"_CAST"
+"__actionName"
+"__bitBoolMaskAnd"
+"__bitBoolMaskOr"
+"__bitSwapLastTwo"
+"__bitWrapperFunc"
+"__getScalar"
+"__scalarSubqueryResult"
diff --git a/tests/fuzz/dictionaries/datatypes.dict b/tests/fuzz/dictionaries/datatypes.dict
index a01a94fd3e3..e562595fb67 100644
--- a/tests/fuzz/dictionaries/datatypes.dict
+++ b/tests/fuzz/dictionaries/datatypes.dict
@@ -1,135 +1,4283 @@
-"AggregateFunction"
-"Array"
-"BIGINT"
-"BIGINT SIGNED"
-"BIGINT UNSIGNED"
-"BINARY"
-"BINARY LARGE OBJECT"
-"BINARY VARYING"
-"BIT"
-"BLOB"
-"BYTE"
-"BYTEA"
-"Bool"
-"CHAR"
-"CHAR LARGE OBJECT"
-"CHAR VARYING"
-"CHARACTER"
-"CHARACTER LARGE OBJECT"
-"CHARACTER VARYING"
-"CLOB"
-"DEC"
-"DOUBLE"
-"DOUBLE PRECISION"
-"Date"
-"Date32"
-"DateTime"
-"DateTime32"
-"DateTime64"
-"Decimal"
-"Decimal128"
-"Decimal256"
-"Decimal32"
-"Decimal64"
-"ENUM"
-"Enum"
-"Enum16"
-"Enum8"
-"FIXED"
-"FLOAT"
-"FixedString"
-"Float32"
-"Float64"
-"GEOMETRY"
-"INET4"
-"INET6"
-"INT"
-"INT SIGNED"
-"INT UNSIGNED"
-"INT1"
-"INT1 SIGNED"
-"INT1 UNSIGNED"
-"INTEGER"
-"INTEGER SIGNED"
-"INTEGER UNSIGNED"
-"IPv4"
-"IPv6"
-"Int128"
-"Int16"
-"Int256"
-"Int32"
-"Int64"
-"Int8"
-"IntervalDay"
-"IntervalHour"
-"IntervalMicrosecond"
-"IntervalMillisecond"
-"IntervalMinute"
-"IntervalMonth"
-"IntervalNanosecond"
-"IntervalQuarter"
-"IntervalSecond"
-"IntervalWeek"
-"IntervalYear"
-"JSON"
-"LONGBLOB"
-"LONGTEXT"
-"LowCardinality"
-"MEDIUMBLOB"
-"MEDIUMINT"
-"MEDIUMINT SIGNED"
-"MEDIUMINT UNSIGNED"
-"MEDIUMTEXT"
-"Map"
-"MultiPolygon"
-"NATIONAL CHAR"
-"NATIONAL CHAR VARYING"
-"NATIONAL CHARACTER"
-"NATIONAL CHARACTER LARGE OBJECT"
-"NATIONAL CHARACTER VARYING"
-"NCHAR"
-"NCHAR LARGE OBJECT"
-"NCHAR VARYING"
-"NUMERIC"
-"NVARCHAR"
-"Nested"
-"Nothing"
-"Nullable"
-"Object"
-"Point"
-"Polygon"
-"REAL"
-"Ring"
-"SET"
-"SIGNED"
-"SINGLE"
-"SMALLINT"
-"SMALLINT SIGNED"
-"SMALLINT UNSIGNED"
-"SimpleAggregateFunction"
-"String"
-"TEXT"
-"TIME"
-"TIMESTAMP"
-"TINYBLOB"
-"TINYINT"
-"TINYINT SIGNED"
-"TINYINT UNSIGNED"
-"TINYTEXT"
-"Tuple"
-"UInt128"
-"UInt16"
-"UInt256"
-"UInt32"
-"UInt64"
-"UInt8"
-"UNSIGNED"
-"UUID"
-"VARBINARY"
-"VARCHAR"
-"VARCHAR2"
-"Variant"
+"BIT_AND"
+"BIT_ANDArgMax"
+"BIT_ANDArgMin"
+"BIT_ANDArray"
+"BIT_ANDDistinct"
+"BIT_ANDForEach"
+"BIT_ANDIf"
+"BIT_ANDMap"
+"BIT_ANDMerge"
+"BIT_ANDNull"
+"BIT_ANDOrDefault"
+"BIT_ANDOrNull"
+"BIT_ANDResample"
+"BIT_ANDSimpleState"
+"BIT_ANDState"
+"BIT_OR"
+"BIT_ORArgMax"
+"BIT_ORArgMin"
+"BIT_ORArray"
+"BIT_ORDistinct"
+"BIT_ORForEach"
+"BIT_ORIf"
+"BIT_ORMap"
+"BIT_ORMerge"
+"BIT_ORNull"
+"BIT_OROrDefault"
+"BIT_OROrNull"
+"BIT_ORResample"
+"BIT_ORSimpleState"
+"BIT_ORState"
+"BIT_XOR"
+"BIT_XORArgMax"
+"BIT_XORArgMin"
+"BIT_XORArray"
+"BIT_XORDistinct"
+"BIT_XORForEach"
+"BIT_XORIf"
+"BIT_XORMap"
+"BIT_XORMerge"
+"BIT_XORNull"
+"BIT_XOROrDefault"
+"BIT_XOROrNull"
+"BIT_XORResample"
+"BIT_XORSimpleState"
+"BIT_XORState"
+"BLAKE3"
+"CAST"
+"CHARACTER_LENGTH"
+"CHAR_LENGTH"
+"COVAR_POP"
+"COVAR_POPArgMax"
+"COVAR_POPArgMin"
+"COVAR_POPArray"
+"COVAR_POPDistinct"
+"COVAR_POPForEach"
+"COVAR_POPIf"
+"COVAR_POPMap"
+"COVAR_POPMerge"
+"COVAR_POPNull"
+"COVAR_POPOrDefault"
+"COVAR_POPOrNull"
+"COVAR_POPResample"
+"COVAR_POPSimpleState"
+"COVAR_POPState"
+"COVAR_SAMP"
+"COVAR_SAMPArgMax"
+"COVAR_SAMPArgMin"
+"COVAR_SAMPArray"
+"COVAR_SAMPDistinct"
+"COVAR_SAMPForEach"
+"COVAR_SAMPIf"
+"COVAR_SAMPMap"
+"COVAR_SAMPMerge"
+"COVAR_SAMPNull"
+"COVAR_SAMPOrDefault"
+"COVAR_SAMPOrNull"
+"COVAR_SAMPResample"
+"COVAR_SAMPSimpleState"
+"COVAR_SAMPState"
+"CRC32"
+"CRC32IEEE"
+"CRC64"
+"DATABASE"
+"DATE"
+"DATE_DIFF"
+"DATE_FORMAT"
+"DATE_TRUNC"
+"DAY"
+"DAYOFMONTH"
+"DAYOFWEEK"
+"DAYOFYEAR"
+"FORMAT_BYTES"
+"FQDN"
+"FROM_BASE64"
+"FROM_DAYS"
+"FROM_UNIXTIME"
+"HOUR"
+"INET6_ATON"
+"INET6_NTOA"
+"INET_ATON"
+"INET_NTOA"
+"IPv4CIDRToRange"
+"IPv4NumToString"
+"IPv4NumToStringClassC"
+"IPv4StringToNum"
+"IPv4StringToNumOrDefault"
+"IPv4StringToNumOrNull"
+"IPv4ToIPv6"
+"IPv6CIDRToRange"
+"IPv6NumToString"
+"IPv6StringToNum"
+"IPv6StringToNumOrDefault"
+"IPv6StringToNumOrNull"
+"JSONArrayLength"
+"JSONExtract"
+"JSONExtractArrayRaw"
+"JSONExtractBool"
+"JSONExtractFloat"
+"JSONExtractInt"
+"JSONExtractKeys"
+"JSONExtractKeysAndValues"
+"JSONExtractKeysAndValuesRaw"
+"JSONExtractRaw"
+"JSONExtractString"
+"JSONExtractUInt"
+"JSONHas"
+"JSONKey"
+"JSONLength"
+"JSONMergePatch"
+"JSONType"
+"JSON_ARRAY_LENGTH"
+"JSON_EXISTS"
+"JSON_QUERY"
+"JSON_VALUE"
+"L1Distance"
+"L1Norm"
+"L1Normalize"
+"L2Distance"
+"L2Norm"
+"L2Normalize"
+"L2SquaredDistance"
+"L2SquaredNorm"
+"LAST_DAY"
+"LinfDistance"
+"LinfNorm"
+"LinfNormalize"
+"LpDistance"
+"LpNorm"
+"LpNormalize"
+"MACNumToString"
+"MACStringToNum"
+"MACStringToOUI"
+"MAP_FROM_ARRAYS"
+"MD4"
+"MD5"
+"MILLISECOND"
+"MINUTE"
+"MONTH"
+"OCTET_LENGTH"
+"QUARTER"
+"REGEXP_EXTRACT"
+"REGEXP_MATCHES"
+"REGEXP_REPLACE"
+"SCHEMA"
+"SECOND"
+"SHA1"
+"SHA224"
+"SHA256"
+"SHA384"
+"SHA512"
+"SHA512_256"
+"STD"
+"STDArgMax"
+"STDArgMin"
+"STDArray"
+"STDDEV_POP"
+"STDDEV_POPArgMax"
+"STDDEV_POPArgMin"
+"STDDEV_POPArray"
+"STDDEV_POPDistinct"
+"STDDEV_POPForEach"
+"STDDEV_POPIf"
+"STDDEV_POPMap"
+"STDDEV_POPMerge"
+"STDDEV_POPNull"
+"STDDEV_POPOrDefault"
+"STDDEV_POPOrNull"
+"STDDEV_POPResample"
+"STDDEV_POPSimpleState"
+"STDDEV_POPState"
+"STDDEV_SAMP"
+"STDDEV_SAMPArgMax"
+"STDDEV_SAMPArgMin"
+"STDDEV_SAMPArray"
+"STDDEV_SAMPDistinct"
+"STDDEV_SAMPForEach"
+"STDDEV_SAMPIf"
+"STDDEV_SAMPMap"
+"STDDEV_SAMPMerge"
+"STDDEV_SAMPNull"
+"STDDEV_SAMPOrDefault"
+"STDDEV_SAMPOrNull"
+"STDDEV_SAMPResample"
+"STDDEV_SAMPSimpleState"
+"STDDEV_SAMPState"
+"STDDistinct"
+"STDForEach"
+"STDIf"
+"STDMap"
+"STDMerge"
+"STDNull"
+"STDOrDefault"
+"STDOrNull"
+"STDResample"
+"STDSimpleState"
+"STDState"
+"SUBSTRING_INDEX"
+"SVG"
+"TIMESTAMP_DIFF"
+"TO_BASE64"
+"TO_DAYS"
+"TO_UNIXTIME"
+"ULIDStringToDateTime"
+"URLHash"
+"URLHierarchy"
+"URLPathHierarchy"
+"UTCTimestamp"
+"UTC_timestamp"
+"UUIDNumToString"
+"UUIDStringToNum"
+"UUIDToNum"
+"UUIDv7ToDateTime"
+"VAR_POP"
+"VAR_POPArgMax"
+"VAR_POPArgMin"
+"VAR_POPArray"
+"VAR_POPDistinct"
+"VAR_POPForEach"
+"VAR_POPIf"
+"VAR_POPMap"
+"VAR_POPMerge"
+"VAR_POPNull"
+"VAR_POPOrDefault"
+"VAR_POPOrNull"
+"VAR_POPResample"
+"VAR_POPSimpleState"
+"VAR_POPState"
+"VAR_SAMP"
+"VAR_SAMPArgMax"
+"VAR_SAMPArgMin"
+"VAR_SAMPArray"
+"VAR_SAMPDistinct"
+"VAR_SAMPForEach"
+"VAR_SAMPIf"
+"VAR_SAMPMap"
+"VAR_SAMPMerge"
+"VAR_SAMPNull"
+"VAR_SAMPOrDefault"
+"VAR_SAMPOrNull"
+"VAR_SAMPResample"
+"VAR_SAMPSimpleState"
+"VAR_SAMPState"
 "YEAR"
-"bool"
-"boolean"
-"Dynamic"
+"YYYYMMDDToDate"
+"YYYYMMDDToDate32"
+"YYYYMMDDhhmmssToDateTime"
+"YYYYMMDDhhmmssToDateTime64"
+"_CAST"
+"__actionName"
+"__bitBoolMaskAnd"
+"__bitBoolMaskOr"
+"__bitSwapLastTwo"
+"__bitWrapperFunc"
+"__getScalar"
+"__scalarSubqueryResult"
+"abs"
+"accurateCast"
+"accurateCastOrDefault"
+"accurateCastOrNull"
+"acos"
+"acosh"
+"addDate"
+"addDays"
+"addHours"
+"addInterval"
+"addMicroseconds"
+"addMilliseconds"
+"addMinutes"
+"addMonths"
+"addNanoseconds"
+"addQuarters"
+"addSeconds"
+"addTupleOfIntervals"
+"addWeeks"
+"addYears"
+"addressToLine"
+"addressToLineWithInlines"
+"addressToSymbol"
+"aes_decrypt_mysql"
+"aes_encrypt_mysql"
+"age"
+"aggThrow"
+"aggThrowArgMax"
+"aggThrowArgMin"
+"aggThrowArray"
+"aggThrowDistinct"
+"aggThrowForEach"
+"aggThrowIf"
+"aggThrowMap"
+"aggThrowMerge"
+"aggThrowNull"
+"aggThrowOrDefault"
+"aggThrowOrNull"
+"aggThrowResample"
+"aggThrowSimpleState"
+"aggThrowState"
+"alphaTokens"
+"analysisOfVariance"
+"analysisOfVarianceArgMax"
+"analysisOfVarianceArgMin"
+"analysisOfVarianceArray"
+"analysisOfVarianceDistinct"
+"analysisOfVarianceForEach"
+"analysisOfVarianceIf"
+"analysisOfVarianceMap"
+"analysisOfVarianceMerge"
+"analysisOfVarianceNull"
+"analysisOfVarianceOrDefault"
+"analysisOfVarianceOrNull"
+"analysisOfVarianceResample"
+"analysisOfVarianceSimpleState"
+"analysisOfVarianceState"
+"and"
+"anova"
+"anovaArgMax"
+"anovaArgMin"
+"anovaArray"
+"anovaDistinct"
+"anovaForEach"
+"anovaIf"
+"anovaMap"
+"anovaMerge"
+"anovaNull"
+"anovaOrDefault"
+"anovaOrNull"
+"anovaResample"
+"anovaSimpleState"
+"anovaState"
+"any"
+"anyArgMax"
+"anyArgMin"
+"anyArray"
+"anyDistinct"
+"anyForEach"
+"anyHeavy"
+"anyHeavyArgMax"
+"anyHeavyArgMin"
+"anyHeavyArray"
+"anyHeavyDistinct"
+"anyHeavyForEach"
+"anyHeavyIf"
+"anyHeavyMap"
+"anyHeavyMerge"
+"anyHeavyNull"
+"anyHeavyOrDefault"
+"anyHeavyOrNull"
+"anyHeavyResample"
+"anyHeavySimpleState"
+"anyHeavyState"
+"anyIf"
+"anyLast"
+"anyLastArgMax"
+"anyLastArgMin"
+"anyLastArray"
+"anyLastDistinct"
+"anyLastForEach"
+"anyLastIf"
+"anyLastMap"
+"anyLastMerge"
+"anyLastNull"
+"anyLastOrDefault"
+"anyLastOrNull"
+"anyLastResample"
+"anyLastSimpleState"
+"anyLastState"
+"anyLast_respect_nulls"
+"anyLast_respect_nullsArgMax"
+"anyLast_respect_nullsArgMin"
+"anyLast_respect_nullsArray"
+"anyLast_respect_nullsDistinct"
+"anyLast_respect_nullsForEach"
+"anyLast_respect_nullsIf"
+"anyLast_respect_nullsMap"
+"anyLast_respect_nullsMerge"
+"anyLast_respect_nullsNull"
+"anyLast_respect_nullsOrDefault"
+"anyLast_respect_nullsOrNull"
+"anyLast_respect_nullsResample"
+"anyLast_respect_nullsSimpleState"
+"anyLast_respect_nullsState"
+"anyMap"
+"anyMerge"
+"anyNull"
+"anyOrDefault"
+"anyOrNull"
+"anyResample"
+"anySimpleState"
+"anyState"
+"any_respect_nulls"
+"any_respect_nullsArgMax"
+"any_respect_nullsArgMin"
+"any_respect_nullsArray"
+"any_respect_nullsDistinct"
+"any_respect_nullsForEach"
+"any_respect_nullsIf"
+"any_respect_nullsMap"
+"any_respect_nullsMerge"
+"any_respect_nullsNull"
+"any_respect_nullsOrDefault"
+"any_respect_nullsOrNull"
+"any_respect_nullsResample"
+"any_respect_nullsSimpleState"
+"any_respect_nullsState"
+"any_value"
+"any_valueArgMax"
+"any_valueArgMin"
+"any_valueArray"
+"any_valueDistinct"
+"any_valueForEach"
+"any_valueIf"
+"any_valueMap"
+"any_valueMerge"
+"any_valueNull"
+"any_valueOrDefault"
+"any_valueOrNull"
+"any_valueResample"
+"any_valueSimpleState"
+"any_valueState"
+"any_value_respect_nulls"
+"any_value_respect_nullsArgMax"
+"any_value_respect_nullsArgMin"
+"any_value_respect_nullsArray"
+"any_value_respect_nullsDistinct"
+"any_value_respect_nullsForEach"
+"any_value_respect_nullsIf"
+"any_value_respect_nullsMap"
+"any_value_respect_nullsMerge"
+"any_value_respect_nullsNull"
+"any_value_respect_nullsOrDefault"
+"any_value_respect_nullsOrNull"
+"any_value_respect_nullsResample"
+"any_value_respect_nullsSimpleState"
+"any_value_respect_nullsState"
+"appendTrailingCharIfAbsent"
+"approx_top_count"
+"approx_top_countArgMax"
+"approx_top_countArgMin"
+"approx_top_countArray"
+"approx_top_countDistinct"
+"approx_top_countForEach"
+"approx_top_countIf"
+"approx_top_countMap"
+"approx_top_countMerge"
+"approx_top_countNull"
+"approx_top_countOrDefault"
+"approx_top_countOrNull"
+"approx_top_countResample"
+"approx_top_countSimpleState"
+"approx_top_countState"
+"approx_top_k"
+"approx_top_kArgMax"
+"approx_top_kArgMin"
+"approx_top_kArray"
+"approx_top_kDistinct"
+"approx_top_kForEach"
+"approx_top_kIf"
+"approx_top_kMap"
+"approx_top_kMerge"
+"approx_top_kNull"
+"approx_top_kOrDefault"
+"approx_top_kOrNull"
+"approx_top_kResample"
+"approx_top_kSimpleState"
+"approx_top_kState"
+"approx_top_sum"
+"approx_top_sumArgMax"
+"approx_top_sumArgMin"
+"approx_top_sumArray"
+"approx_top_sumDistinct"
+"approx_top_sumForEach"
+"approx_top_sumIf"
+"approx_top_sumMap"
+"approx_top_sumMerge"
+"approx_top_sumNull"
+"approx_top_sumOrDefault"
+"approx_top_sumOrNull"
+"approx_top_sumResample"
+"approx_top_sumSimpleState"
+"approx_top_sumState"
+"argMax"
+"argMaxArgMax"
+"argMaxArgMin"
+"argMaxArray"
+"argMaxDistinct"
+"argMaxForEach"
+"argMaxIf"
+"argMaxMap"
+"argMaxMerge"
+"argMaxNull"
+"argMaxOrDefault"
+"argMaxOrNull"
+"argMaxResample"
+"argMaxSimpleState"
+"argMaxState"
+"argMin"
+"argMinArgMax"
+"argMinArgMin"
+"argMinArray"
+"argMinDistinct"
+"argMinForEach"
+"argMinIf"
+"argMinMap"
+"argMinMerge"
+"argMinNull"
+"argMinOrDefault"
+"argMinOrNull"
+"argMinResample"
+"argMinSimpleState"
+"argMinState"
+"array"
+"arrayAUC"
+"arrayAll"
+"arrayAvg"
+"arrayCompact"
+"arrayConcat"
+"arrayCount"
+"arrayCumSum"
+"arrayCumSumNonNegative"
+"arrayDifference"
+"arrayDistinct"
+"arrayDotProduct"
+"arrayElement"
+"arrayEnumerate"
+"arrayEnumerateDense"
+"arrayEnumerateDenseRanked"
+"arrayEnumerateUniq"
+"arrayEnumerateUniqRanked"
+"arrayExists"
+"arrayFill"
+"arrayFilter"
+"arrayFirst"
+"arrayFirstIndex"
+"arrayFirstOrNull"
+"arrayFlatten"
+"arrayFold"
+"arrayIntersect"
+"arrayJaccardIndex"
+"arrayJoin"
+"arrayLast"
+"arrayLastIndex"
+"arrayLastOrNull"
+"arrayMap"
+"arrayMax"
+"arrayMin"
+"arrayPartialReverseSort"
+"arrayPartialShuffle"
+"arrayPartialSort"
+"arrayPopBack"
+"arrayPopFront"
+"arrayProduct"
+"arrayPushBack"
+"arrayPushFront"
+"arrayRandomSample"
+"arrayReduce"
+"arrayReduceInRanges"
+"arrayResize"
+"arrayReverse"
+"arrayReverseFill"
+"arrayReverseSort"
+"arrayReverseSplit"
+"arrayRotateLeft"
+"arrayRotateRight"
+"arrayShiftLeft"
+"arrayShiftRight"
+"arrayShingles"
+"arrayShuffle"
+"arraySlice"
+"arraySort"
+"arraySplit"
+"arrayStringConcat"
+"arraySum"
+"arrayUniq"
+"arrayWithConstant"
+"arrayZip"
+"array_agg"
+"array_aggArgMax"
+"array_aggArgMin"
+"array_aggArray"
+"array_aggDistinct"
+"array_aggForEach"
+"array_aggIf"
+"array_aggMap"
+"array_aggMerge"
+"array_aggNull"
+"array_aggOrDefault"
+"array_aggOrNull"
+"array_aggResample"
+"array_aggSimpleState"
+"array_aggState"
+"array_concat_agg"
+"array_concat_aggArgMax"
+"array_concat_aggArgMin"
+"array_concat_aggArray"
+"array_concat_aggDistinct"
+"array_concat_aggForEach"
+"array_concat_aggIf"
+"array_concat_aggMap"
+"array_concat_aggMerge"
+"array_concat_aggNull"
+"array_concat_aggOrDefault"
+"array_concat_aggOrNull"
+"array_concat_aggResample"
+"array_concat_aggSimpleState"
+"array_concat_aggState"
+"ascii"
+"asin"
+"asinh"
+"assumeNotNull"
+"atan"
+"atan2"
+"atanh"
+"avg"
+"avgArgMax"
+"avgArgMin"
+"avgArray"
+"avgDistinct"
+"avgForEach"
+"avgIf"
+"avgMap"
+"avgMerge"
+"avgNull"
+"avgOrDefault"
+"avgOrNull"
+"avgResample"
+"avgSimpleState"
+"avgState"
+"avgWeighted"
+"avgWeightedArgMax"
+"avgWeightedArgMin"
+"avgWeightedArray"
+"avgWeightedDistinct"
+"avgWeightedForEach"
+"avgWeightedIf"
+"avgWeightedMap"
+"avgWeightedMerge"
+"avgWeightedNull"
+"avgWeightedOrDefault"
+"avgWeightedOrNull"
+"avgWeightedResample"
+"avgWeightedSimpleState"
+"avgWeightedState"
+"bar"
+"base58Decode"
+"base58Encode"
+"base64Decode"
+"base64Encode"
+"base64URLDecode"
+"base64URLEncode"
+"basename"
+"bin"
+"bitAnd"
+"bitCount"
+"bitHammingDistance"
+"bitNot"
+"bitOr"
+"bitPositionsToArray"
+"bitRotateLeft"
+"bitRotateRight"
+"bitShiftLeft"
+"bitShiftRight"
+"bitSlice"
+"bitTest"
+"bitTestAll"
+"bitTestAny"
+"bitXor"
+"bitmapAnd"
+"bitmapAndCardinality"
+"bitmapAndnot"
+"bitmapAndnotCardinality"
+"bitmapBuild"
+"bitmapCardinality"
+"bitmapContains"
+"bitmapHasAll"
+"bitmapHasAny"
+"bitmapMax"
+"bitmapMin"
+"bitmapOr"
+"bitmapOrCardinality"
+"bitmapSubsetInRange"
+"bitmapSubsetLimit"
+"bitmapToArray"
+"bitmapTransform"
+"bitmapXor"
+"bitmapXorCardinality"
+"bitmaskToArray"
+"bitmaskToList"
+"blockNumber"
+"blockSerializedSize"
+"blockSize"
+"boundingRatio"
+"boundingRatioArgMax"
+"boundingRatioArgMin"
+"boundingRatioArray"
+"boundingRatioDistinct"
+"boundingRatioForEach"
+"boundingRatioIf"
+"boundingRatioMap"
+"boundingRatioMerge"
+"boundingRatioNull"
+"boundingRatioOrDefault"
+"boundingRatioOrNull"
+"boundingRatioResample"
+"boundingRatioSimpleState"
+"boundingRatioState"
+"buildId"
+"byteHammingDistance"
+"byteSize"
+"byteSlice"
+"byteSwap"
+"caseWithExpr"
+"caseWithExpression"
+"caseWithoutExpr"
+"caseWithoutExpression"
+"catboostEvaluate"
+"categoricalInformationValue"
+"categoricalInformationValueArgMax"
+"categoricalInformationValueArgMin"
+"categoricalInformationValueArray"
+"categoricalInformationValueDistinct"
+"categoricalInformationValueForEach"
+"categoricalInformationValueIf"
+"categoricalInformationValueMap"
+"categoricalInformationValueMerge"
+"categoricalInformationValueNull"
+"categoricalInformationValueOrDefault"
+"categoricalInformationValueOrNull"
+"categoricalInformationValueResample"
+"categoricalInformationValueSimpleState"
+"categoricalInformationValueState"
+"cbrt"
+"ceil"
+"ceiling"
+"changeDay"
+"changeHour"
+"changeMinute"
+"changeMonth"
+"changeSecond"
+"changeYear"
+"char"
+"cityHash64"
+"clamp"
+"coalesce"
+"concat"
+"concatAssumeInjective"
+"concatWithSeparator"
+"concatWithSeparatorAssumeInjective"
+"concat_ws"
+"connectionId"
+"connection_id"
+"contingency"
+"contingencyArgMax"
+"contingencyArgMin"
+"contingencyArray"
+"contingencyDistinct"
+"contingencyForEach"
+"contingencyIf"
+"contingencyMap"
+"contingencyMerge"
+"contingencyNull"
+"contingencyOrDefault"
+"contingencyOrNull"
+"contingencyResample"
+"contingencySimpleState"
+"contingencyState"
+"convertCharset"
+"corr"
+"corrArgMax"
+"corrArgMin"
+"corrArray"
+"corrDistinct"
+"corrForEach"
+"corrIf"
+"corrMap"
+"corrMatrix"
+"corrMatrixArgMax"
+"corrMatrixArgMin"
+"corrMatrixArray"
+"corrMatrixDistinct"
+"corrMatrixForEach"
+"corrMatrixIf"
+"corrMatrixMap"
+"corrMatrixMerge"
+"corrMatrixNull"
+"corrMatrixOrDefault"
+"corrMatrixOrNull"
+"corrMatrixResample"
+"corrMatrixSimpleState"
+"corrMatrixState"
+"corrMerge"
+"corrNull"
+"corrOrDefault"
+"corrOrNull"
+"corrResample"
+"corrSimpleState"
+"corrStable"
+"corrStableArgMax"
+"corrStableArgMin"
+"corrStableArray"
+"corrStableDistinct"
+"corrStableForEach"
+"corrStableIf"
+"corrStableMap"
+"corrStableMerge"
+"corrStableNull"
+"corrStableOrDefault"
+"corrStableOrNull"
+"corrStableResample"
+"corrStableSimpleState"
+"corrStableState"
+"corrState"
+"cos"
+"cosh"
+"cosineDistance"
+"count"
+"countArgMax"
+"countArgMin"
+"countArray"
+"countDigits"
+"countDistinct"
+"countEqual"
+"countForEach"
+"countIf"
+"countMap"
+"countMatches"
+"countMatchesCaseInsensitive"
+"countMerge"
+"countNull"
+"countOrDefault"
+"countOrNull"
+"countResample"
+"countSimpleState"
+"countState"
+"countSubstrings"
+"countSubstringsCaseInsensitive"
+"countSubstringsCaseInsensitiveUTF8"
+"covarPop"
+"covarPopArgMax"
+"covarPopArgMin"
+"covarPopArray"
+"covarPopDistinct"
+"covarPopForEach"
+"covarPopIf"
+"covarPopMap"
+"covarPopMatrix"
+"covarPopMatrixArgMax"
+"covarPopMatrixArgMin"
+"covarPopMatrixArray"
+"covarPopMatrixDistinct"
+"covarPopMatrixForEach"
+"covarPopMatrixIf"
+"covarPopMatrixMap"
+"covarPopMatrixMerge"
+"covarPopMatrixNull"
+"covarPopMatrixOrDefault"
+"covarPopMatrixOrNull"
+"covarPopMatrixResample"
+"covarPopMatrixSimpleState"
+"covarPopMatrixState"
+"covarPopMerge"
+"covarPopNull"
+"covarPopOrDefault"
+"covarPopOrNull"
+"covarPopResample"
+"covarPopSimpleState"
+"covarPopStable"
+"covarPopStableArgMax"
+"covarPopStableArgMin"
+"covarPopStableArray"
+"covarPopStableDistinct"
+"covarPopStableForEach"
+"covarPopStableIf"
+"covarPopStableMap"
+"covarPopStableMerge"
+"covarPopStableNull"
+"covarPopStableOrDefault"
+"covarPopStableOrNull"
+"covarPopStableResample"
+"covarPopStableSimpleState"
+"covarPopStableState"
+"covarPopState"
+"covarSamp"
+"covarSampArgMax"
+"covarSampArgMin"
+"covarSampArray"
+"covarSampDistinct"
+"covarSampForEach"
+"covarSampIf"
+"covarSampMap"
+"covarSampMatrix"
+"covarSampMatrixArgMax"
+"covarSampMatrixArgMin"
+"covarSampMatrixArray"
+"covarSampMatrixDistinct"
+"covarSampMatrixForEach"
+"covarSampMatrixIf"
+"covarSampMatrixMap"
+"covarSampMatrixMerge"
+"covarSampMatrixNull"
+"covarSampMatrixOrDefault"
+"covarSampMatrixOrNull"
+"covarSampMatrixResample"
+"covarSampMatrixSimpleState"
+"covarSampMatrixState"
+"covarSampMerge"
+"covarSampNull"
+"covarSampOrDefault"
+"covarSampOrNull"
+"covarSampResample"
+"covarSampSimpleState"
+"covarSampStable"
+"covarSampStableArgMax"
+"covarSampStableArgMin"
+"covarSampStableArray"
+"covarSampStableDistinct"
+"covarSampStableForEach"
+"covarSampStableIf"
+"covarSampStableMap"
+"covarSampStableMerge"
+"covarSampStableNull"
+"covarSampStableOrDefault"
+"covarSampStableOrNull"
+"covarSampStableResample"
+"covarSampStableSimpleState"
+"covarSampStableState"
+"covarSampState"
+"cramersV"
+"cramersVArgMax"
+"cramersVArgMin"
+"cramersVArray"
+"cramersVBiasCorrected"
+"cramersVBiasCorrectedArgMax"
+"cramersVBiasCorrectedArgMin"
+"cramersVBiasCorrectedArray"
+"cramersVBiasCorrectedDistinct"
+"cramersVBiasCorrectedForEach"
+"cramersVBiasCorrectedIf"
+"cramersVBiasCorrectedMap"
+"cramersVBiasCorrectedMerge"
+"cramersVBiasCorrectedNull"
+"cramersVBiasCorrectedOrDefault"
+"cramersVBiasCorrectedOrNull"
+"cramersVBiasCorrectedResample"
+"cramersVBiasCorrectedSimpleState"
+"cramersVBiasCorrectedState"
+"cramersVDistinct"
+"cramersVForEach"
+"cramersVIf"
+"cramersVMap"
+"cramersVMerge"
+"cramersVNull"
+"cramersVOrDefault"
+"cramersVOrNull"
+"cramersVResample"
+"cramersVSimpleState"
+"cramersVState"
+"curdate"
+"currentDatabase"
+"currentProfiles"
+"currentRoles"
+"currentSchemas"
+"currentUser"
+"current_database"
+"current_date"
+"current_schemas"
+"current_timestamp"
+"current_user"
+"cutFragment"
+"cutIPv6"
+"cutQueryString"
+"cutQueryStringAndFragment"
+"cutToFirstSignificantSubdomain"
+"cutToFirstSignificantSubdomainCustom"
+"cutToFirstSignificantSubdomainCustomRFC"
+"cutToFirstSignificantSubdomainCustomWithWWW"
+"cutToFirstSignificantSubdomainCustomWithWWWRFC"
+"cutToFirstSignificantSubdomainRFC"
+"cutToFirstSignificantSubdomainWithWWW"
+"cutToFirstSignificantSubdomainWithWWWRFC"
+"cutURLParameter"
+"cutWWW"
+"damerauLevenshteinDistance"
+"dateDiff"
+"dateName"
+"dateTime64ToSnowflake"
+"dateTime64ToSnowflakeID"
+"dateTimeToSnowflake"
+"dateTimeToSnowflakeID"
+"dateTrunc"
+"date_diff"
+"decodeHTMLComponent"
+"decodeURLComponent"
+"decodeURLFormComponent"
+"decodeXMLComponent"
+"decrypt"
+"defaultProfiles"
+"defaultRoles"
+"defaultValueOfArgumentType"
+"defaultValueOfTypeName"
+"degrees"
+"deltaSum"
+"deltaSumArgMax"
+"deltaSumArgMin"
+"deltaSumArray"
+"deltaSumDistinct"
+"deltaSumForEach"
+"deltaSumIf"
+"deltaSumMap"
+"deltaSumMerge"
+"deltaSumNull"
+"deltaSumOrDefault"
+"deltaSumOrNull"
+"deltaSumResample"
+"deltaSumSimpleState"
+"deltaSumState"
+"deltaSumTimestamp"
+"deltaSumTimestampArgMax"
+"deltaSumTimestampArgMin"
+"deltaSumTimestampArray"
+"deltaSumTimestampDistinct"
+"deltaSumTimestampForEach"
+"deltaSumTimestampIf"
+"deltaSumTimestampMap"
+"deltaSumTimestampMerge"
+"deltaSumTimestampNull"
+"deltaSumTimestampOrDefault"
+"deltaSumTimestampOrNull"
+"deltaSumTimestampResample"
+"deltaSumTimestampSimpleState"
+"deltaSumTimestampState"
+"demangle"
+"denseRank"
+"denseRankArgMax"
+"denseRankArgMin"
+"denseRankArray"
+"denseRankDistinct"
+"denseRankForEach"
+"denseRankIf"
+"denseRankMap"
+"denseRankMerge"
+"denseRankNull"
+"denseRankOrDefault"
+"denseRankOrNull"
+"denseRankResample"
+"denseRankSimpleState"
+"denseRankState"
+"dense_rank"
+"dense_rankArgMax"
+"dense_rankArgMin"
+"dense_rankArray"
+"dense_rankDistinct"
+"dense_rankForEach"
+"dense_rankIf"
+"dense_rankMap"
+"dense_rankMerge"
+"dense_rankNull"
+"dense_rankOrDefault"
+"dense_rankOrNull"
+"dense_rankResample"
+"dense_rankSimpleState"
+"dense_rankState"
+"detectCharset"
+"detectLanguage"
+"detectLanguageMixed"
+"detectLanguageUnknown"
+"detectProgrammingLanguage"
+"detectTonality"
+"dictGet"
+"dictGetAll"
+"dictGetChildren"
+"dictGetDate"
+"dictGetDateOrDefault"
+"dictGetDateTime"
+"dictGetDateTimeOrDefault"
+"dictGetDescendants"
+"dictGetFloat32"
+"dictGetFloat32OrDefault"
+"dictGetFloat64"
+"dictGetFloat64OrDefault"
+"dictGetHierarchy"
+"dictGetIPv4"
+"dictGetIPv4OrDefault"
+"dictGetIPv6"
+"dictGetIPv6OrDefault"
+"dictGetInt16"
+"dictGetInt16OrDefault"
+"dictGetInt32"
+"dictGetInt32OrDefault"
+"dictGetInt64"
+"dictGetInt64OrDefault"
+"dictGetInt8"
+"dictGetInt8OrDefault"
+"dictGetOrDefault"
+"dictGetOrNull"
+"dictGetString"
+"dictGetStringOrDefault"
+"dictGetUInt16"
+"dictGetUInt16OrDefault"
+"dictGetUInt32"
+"dictGetUInt32OrDefault"
+"dictGetUInt64"
+"dictGetUInt64OrDefault"
+"dictGetUInt8"
+"dictGetUInt8OrDefault"
+"dictGetUUID"
+"dictGetUUIDOrDefault"
+"dictHas"
+"dictIsIn"
+"displayName"
+"distanceL1"
+"distanceL2"
+"distanceL2Squared"
+"distanceLinf"
+"distanceLp"
+"divide"
+"divideDecimal"
+"domain"
+"domainRFC"
+"domainWithoutWWW"
+"domainWithoutWWWRFC"
+"dotProduct"
+"dumpColumnStructure"
+"dynamicElement"
+"dynamicType"
+"e"
+"editDistance"
+"editDistanceUTF8"
+"empty"
+"emptyArrayDate"
+"emptyArrayDateTime"
+"emptyArrayFloat32"
+"emptyArrayFloat64"
+"emptyArrayInt16"
+"emptyArrayInt32"
+"emptyArrayInt64"
+"emptyArrayInt8"
+"emptyArrayString"
+"emptyArrayToSingle"
+"emptyArrayUInt16"
+"emptyArrayUInt32"
+"emptyArrayUInt64"
+"emptyArrayUInt8"
+"enabledProfiles"
+"enabledRoles"
+"encodeURLComponent"
+"encodeURLFormComponent"
+"encodeXMLComponent"
+"encrypt"
+"endsWith"
+"endsWithUTF8"
+"entropy"
+"entropyArgMax"
+"entropyArgMin"
+"entropyArray"
+"entropyDistinct"
+"entropyForEach"
+"entropyIf"
+"entropyMap"
+"entropyMerge"
+"entropyNull"
+"entropyOrDefault"
+"entropyOrNull"
+"entropyResample"
+"entropySimpleState"
+"entropyState"
+"equals"
+"erf"
+"erfc"
+"errorCodeToName"
+"evalMLMethod"
+"exp"
+"exp10"
+"exp2"
+"exponentialMovingAverage"
+"exponentialMovingAverageArgMax"
+"exponentialMovingAverageArgMin"
+"exponentialMovingAverageArray"
+"exponentialMovingAverageDistinct"
+"exponentialMovingAverageForEach"
+"exponentialMovingAverageIf"
+"exponentialMovingAverageMap"
+"exponentialMovingAverageMerge"
+"exponentialMovingAverageNull"
+"exponentialMovingAverageOrDefault"
+"exponentialMovingAverageOrNull"
+"exponentialMovingAverageResample"
+"exponentialMovingAverageSimpleState"
+"exponentialMovingAverageState"
+"exponentialTimeDecayedAvg"
+"exponentialTimeDecayedAvgArgMax"
+"exponentialTimeDecayedAvgArgMin"
+"exponentialTimeDecayedAvgArray"
+"exponentialTimeDecayedAvgDistinct"
+"exponentialTimeDecayedAvgForEach"
+"exponentialTimeDecayedAvgIf"
+"exponentialTimeDecayedAvgMap"
+"exponentialTimeDecayedAvgMerge"
+"exponentialTimeDecayedAvgNull"
+"exponentialTimeDecayedAvgOrDefault"
+"exponentialTimeDecayedAvgOrNull"
+"exponentialTimeDecayedAvgResample"
+"exponentialTimeDecayedAvgSimpleState"
+"exponentialTimeDecayedAvgState"
+"exponentialTimeDecayedCount"
+"exponentialTimeDecayedCountArgMax"
+"exponentialTimeDecayedCountArgMin"
+"exponentialTimeDecayedCountArray"
+"exponentialTimeDecayedCountDistinct"
+"exponentialTimeDecayedCountForEach"
+"exponentialTimeDecayedCountIf"
+"exponentialTimeDecayedCountMap"
+"exponentialTimeDecayedCountMerge"
+"exponentialTimeDecayedCountNull"
+"exponentialTimeDecayedCountOrDefault"
+"exponentialTimeDecayedCountOrNull"
+"exponentialTimeDecayedCountResample"
+"exponentialTimeDecayedCountSimpleState"
+"exponentialTimeDecayedCountState"
+"exponentialTimeDecayedMax"
+"exponentialTimeDecayedMaxArgMax"
+"exponentialTimeDecayedMaxArgMin"
+"exponentialTimeDecayedMaxArray"
+"exponentialTimeDecayedMaxDistinct"
+"exponentialTimeDecayedMaxForEach"
+"exponentialTimeDecayedMaxIf"
+"exponentialTimeDecayedMaxMap"
+"exponentialTimeDecayedMaxMerge"
+"exponentialTimeDecayedMaxNull"
+"exponentialTimeDecayedMaxOrDefault"
+"exponentialTimeDecayedMaxOrNull"
+"exponentialTimeDecayedMaxResample"
+"exponentialTimeDecayedMaxSimpleState"
+"exponentialTimeDecayedMaxState"
+"exponentialTimeDecayedSum"
+"exponentialTimeDecayedSumArgMax"
+"exponentialTimeDecayedSumArgMin"
+"exponentialTimeDecayedSumArray"
+"exponentialTimeDecayedSumDistinct"
+"exponentialTimeDecayedSumForEach"
+"exponentialTimeDecayedSumIf"
+"exponentialTimeDecayedSumMap"
+"exponentialTimeDecayedSumMerge"
+"exponentialTimeDecayedSumNull"
+"exponentialTimeDecayedSumOrDefault"
+"exponentialTimeDecayedSumOrNull"
+"exponentialTimeDecayedSumResample"
+"exponentialTimeDecayedSumSimpleState"
+"exponentialTimeDecayedSumState"
+"extract"
+"extractAll"
+"extractAllGroups"
+"extractAllGroupsHorizontal"
+"extractAllGroupsVertical"
+"extractGroups"
+"extractKeyValuePairs"
+"extractKeyValuePairsWithEscaping"
+"extractTextFromHTML"
+"extractURLParameter"
+"extractURLParameterNames"
+"extractURLParameters"
+"factorial"
+"farmFingerprint64"
+"farmHash64"
+"file"
+"filesystemAvailable"
+"filesystemCapacity"
+"filesystemUnreserved"
+"finalizeAggregation"
+"firstLine"
+"firstSignificantSubdomain"
+"firstSignificantSubdomainCustom"
+"firstSignificantSubdomainCustomRFC"
+"firstSignificantSubdomainRFC"
+"first_value"
+"first_valueArgMax"
+"first_valueArgMin"
+"first_valueArray"
+"first_valueDistinct"
+"first_valueForEach"
+"first_valueIf"
+"first_valueMap"
+"first_valueMerge"
+"first_valueNull"
+"first_valueOrDefault"
+"first_valueOrNull"
+"first_valueResample"
+"first_valueSimpleState"
+"first_valueState"
+"first_value_respect_nulls"
+"first_value_respect_nullsArgMax"
+"first_value_respect_nullsArgMin"
+"first_value_respect_nullsArray"
+"first_value_respect_nullsDistinct"
+"first_value_respect_nullsForEach"
+"first_value_respect_nullsIf"
+"first_value_respect_nullsMap"
+"first_value_respect_nullsMerge"
+"first_value_respect_nullsNull"
+"first_value_respect_nullsOrDefault"
+"first_value_respect_nullsOrNull"
+"first_value_respect_nullsResample"
+"first_value_respect_nullsSimpleState"
+"first_value_respect_nullsState"
+"flameGraph"
+"flameGraphArgMax"
+"flameGraphArgMin"
+"flameGraphArray"
+"flameGraphDistinct"
+"flameGraphForEach"
+"flameGraphIf"
+"flameGraphMap"
+"flameGraphMerge"
+"flameGraphNull"
+"flameGraphOrDefault"
+"flameGraphOrNull"
+"flameGraphResample"
+"flameGraphSimpleState"
+"flameGraphState"
+"flatten"
+"flattenTuple"
+"floor"
+"format"
+"formatDateTime"
+"formatDateTimeInJodaSyntax"
+"formatQuery"
+"formatQueryOrNull"
+"formatQuerySingleLine"
+"formatQuerySingleLineOrNull"
+"formatReadableDecimalSize"
+"formatReadableQuantity"
+"formatReadableSize"
+"formatReadableTimeDelta"
+"formatRow"
+"formatRowNoNewline"
+"fragment"
+"fromDaysSinceYearZero"
+"fromDaysSinceYearZero32"
+"fromModifiedJulianDay"
+"fromModifiedJulianDayOrNull"
+"fromUTCTimestamp"
+"fromUnixTimestamp"
+"fromUnixTimestamp64Micro"
+"fromUnixTimestamp64Milli"
+"fromUnixTimestamp64Nano"
+"fromUnixTimestampInJodaSyntax"
+"from_utc_timestamp"
+"fullHostName"
+"fuzzBits"
+"gccMurmurHash"
+"gcd"
+"generateRandomStructure"
+"generateSnowflakeID"
+"generateULID"
+"generateUUIDv4"
+"generateUUIDv7"
+"geoDistance"
+"geoToH3"
+"geoToS2"
+"geohashDecode"
+"geohashEncode"
+"geohashesInBox"
+"getClientHTTPHeader"
+"getMacro"
+"getOSKernelVersion"
+"getServerPort"
+"getSetting"
+"getSizeOfEnumType"
+"getSubcolumn"
+"getTypeSerializationStreams"
+"globalIn"
+"globalInIgnoreSet"
+"globalNotIn"
+"globalNotInIgnoreSet"
+"globalNotNullIn"
+"globalNotNullInIgnoreSet"
+"globalNullIn"
+"globalNullInIgnoreSet"
+"globalVariable"
+"greatCircleAngle"
+"greatCircleDistance"
+"greater"
+"greaterOrEquals"
+"greatest"
+"groupArray"
+"groupArrayArgMax"
+"groupArrayArgMin"
+"groupArrayArray"
+"groupArrayDistinct"
+"groupArrayForEach"
+"groupArrayIf"
+"groupArrayInsertAt"
+"groupArrayInsertAtArgMax"
+"groupArrayInsertAtArgMin"
+"groupArrayInsertAtArray"
+"groupArrayInsertAtDistinct"
+"groupArrayInsertAtForEach"
+"groupArrayInsertAtIf"
+"groupArrayInsertAtMap"
+"groupArrayInsertAtMerge"
+"groupArrayInsertAtNull"
+"groupArrayInsertAtOrDefault"
+"groupArrayInsertAtOrNull"
+"groupArrayInsertAtResample"
+"groupArrayInsertAtSimpleState"
+"groupArrayInsertAtState"
+"groupArrayIntersect"
+"groupArrayIntersectArgMax"
+"groupArrayIntersectArgMin"
+"groupArrayIntersectArray"
+"groupArrayIntersectDistinct"
+"groupArrayIntersectForEach"
+"groupArrayIntersectIf"
+"groupArrayIntersectMap"
+"groupArrayIntersectMerge"
+"groupArrayIntersectNull"
+"groupArrayIntersectOrDefault"
+"groupArrayIntersectOrNull"
+"groupArrayIntersectResample"
+"groupArrayIntersectSimpleState"
+"groupArrayIntersectState"
+"groupArrayLast"
+"groupArrayLastArgMax"
+"groupArrayLastArgMin"
+"groupArrayLastArray"
+"groupArrayLastDistinct"
+"groupArrayLastForEach"
+"groupArrayLastIf"
+"groupArrayLastMap"
+"groupArrayLastMerge"
+"groupArrayLastNull"
+"groupArrayLastOrDefault"
+"groupArrayLastOrNull"
+"groupArrayLastResample"
+"groupArrayLastSimpleState"
+"groupArrayLastState"
+"groupArrayMap"
+"groupArrayMerge"
+"groupArrayMovingAvg"
+"groupArrayMovingAvgArgMax"
+"groupArrayMovingAvgArgMin"
+"groupArrayMovingAvgArray"
+"groupArrayMovingAvgDistinct"
+"groupArrayMovingAvgForEach"
+"groupArrayMovingAvgIf"
+"groupArrayMovingAvgMap"
+"groupArrayMovingAvgMerge"
+"groupArrayMovingAvgNull"
+"groupArrayMovingAvgOrDefault"
+"groupArrayMovingAvgOrNull"
+"groupArrayMovingAvgResample"
+"groupArrayMovingAvgSimpleState"
+"groupArrayMovingAvgState"
+"groupArrayMovingSum"
+"groupArrayMovingSumArgMax"
+"groupArrayMovingSumArgMin"
+"groupArrayMovingSumArray"
+"groupArrayMovingSumDistinct"
+"groupArrayMovingSumForEach"
+"groupArrayMovingSumIf"
+"groupArrayMovingSumMap"
+"groupArrayMovingSumMerge"
+"groupArrayMovingSumNull"
+"groupArrayMovingSumOrDefault"
+"groupArrayMovingSumOrNull"
+"groupArrayMovingSumResample"
+"groupArrayMovingSumSimpleState"
+"groupArrayMovingSumState"
+"groupArrayNull"
+"groupArrayOrDefault"
+"groupArrayOrNull"
+"groupArrayResample"
+"groupArraySample"
+"groupArraySampleArgMax"
+"groupArraySampleArgMin"
+"groupArraySampleArray"
+"groupArraySampleDistinct"
+"groupArraySampleForEach"
+"groupArraySampleIf"
+"groupArraySampleMap"
+"groupArraySampleMerge"
+"groupArraySampleNull"
+"groupArraySampleOrDefault"
+"groupArraySampleOrNull"
+"groupArraySampleResample"
+"groupArraySampleSimpleState"
+"groupArraySampleState"
+"groupArraySimpleState"
+"groupArraySorted"
+"groupArraySortedArgMax"
+"groupArraySortedArgMin"
+"groupArraySortedArray"
+"groupArraySortedDistinct"
+"groupArraySortedForEach"
+"groupArraySortedIf"
+"groupArraySortedMap"
+"groupArraySortedMerge"
+"groupArraySortedNull"
+"groupArraySortedOrDefault"
+"groupArraySortedOrNull"
+"groupArraySortedResample"
+"groupArraySortedSimpleState"
+"groupArraySortedState"
+"groupArrayState"
+"groupBitAnd"
+"groupBitAndArgMax"
+"groupBitAndArgMin"
+"groupBitAndArray"
+"groupBitAndDistinct"
+"groupBitAndForEach"
+"groupBitAndIf"
+"groupBitAndMap"
+"groupBitAndMerge"
+"groupBitAndNull"
+"groupBitAndOrDefault"
+"groupBitAndOrNull"
+"groupBitAndResample"
+"groupBitAndSimpleState"
+"groupBitAndState"
+"groupBitOr"
+"groupBitOrArgMax"
+"groupBitOrArgMin"
+"groupBitOrArray"
+"groupBitOrDistinct"
+"groupBitOrForEach"
+"groupBitOrIf"
+"groupBitOrMap"
+"groupBitOrMerge"
+"groupBitOrNull"
+"groupBitOrOrDefault"
+"groupBitOrOrNull"
+"groupBitOrResample"
+"groupBitOrSimpleState"
+"groupBitOrState"
+"groupBitXor"
+"groupBitXorArgMax"
+"groupBitXorArgMin"
+"groupBitXorArray"
+"groupBitXorDistinct"
+"groupBitXorForEach"
+"groupBitXorIf"
+"groupBitXorMap"
+"groupBitXorMerge"
+"groupBitXorNull"
+"groupBitXorOrDefault"
+"groupBitXorOrNull"
+"groupBitXorResample"
+"groupBitXorSimpleState"
+"groupBitXorState"
+"groupBitmap"
+"groupBitmapAnd"
+"groupBitmapAndArgMax"
+"groupBitmapAndArgMin"
+"groupBitmapAndArray"
+"groupBitmapAndDistinct"
+"groupBitmapAndForEach"
+"groupBitmapAndIf"
+"groupBitmapAndMap"
+"groupBitmapAndMerge"
+"groupBitmapAndNull"
+"groupBitmapAndOrDefault"
+"groupBitmapAndOrNull"
+"groupBitmapAndResample"
+"groupBitmapAndSimpleState"
+"groupBitmapAndState"
+"groupBitmapArgMax"
+"groupBitmapArgMin"
+"groupBitmapArray"
+"groupBitmapDistinct"
+"groupBitmapForEach"
+"groupBitmapIf"
+"groupBitmapMap"
+"groupBitmapMerge"
+"groupBitmapNull"
+"groupBitmapOr"
+"groupBitmapOrArgMax"
+"groupBitmapOrArgMin"
+"groupBitmapOrArray"
+"groupBitmapOrDefault"
+"groupBitmapOrDistinct"
+"groupBitmapOrForEach"
+"groupBitmapOrIf"
+"groupBitmapOrMap"
+"groupBitmapOrMerge"
+"groupBitmapOrNull"
+"groupBitmapOrNull"
+"groupBitmapOrOrDefault"
+"groupBitmapOrOrNull"
+"groupBitmapOrResample"
+"groupBitmapOrSimpleState"
+"groupBitmapOrState"
+"groupBitmapResample"
+"groupBitmapSimpleState"
+"groupBitmapState"
+"groupBitmapXor"
+"groupBitmapXorArgMax"
+"groupBitmapXorArgMin"
+"groupBitmapXorArray"
+"groupBitmapXorDistinct"
+"groupBitmapXorForEach"
+"groupBitmapXorIf"
+"groupBitmapXorMap"
+"groupBitmapXorMerge"
+"groupBitmapXorNull"
+"groupBitmapXorOrDefault"
+"groupBitmapXorOrNull"
+"groupBitmapXorResample"
+"groupBitmapXorSimpleState"
+"groupBitmapXorState"
+"groupConcat"
+"groupConcatArgMax"
+"groupConcatArgMin"
+"groupConcatArray"
+"groupConcatDistinct"
+"groupConcatForEach"
+"groupConcatIf"
+"groupConcatMap"
+"groupConcatMerge"
+"groupConcatNull"
+"groupConcatOrDefault"
+"groupConcatOrNull"
+"groupConcatResample"
+"groupConcatSimpleState"
+"groupConcatState"
+"groupUniqArray"
+"groupUniqArrayArgMax"
+"groupUniqArrayArgMin"
+"groupUniqArrayArray"
+"groupUniqArrayDistinct"
+"groupUniqArrayForEach"
+"groupUniqArrayIf"
+"groupUniqArrayMap"
+"groupUniqArrayMerge"
+"groupUniqArrayNull"
+"groupUniqArrayOrDefault"
+"groupUniqArrayOrNull"
+"groupUniqArrayResample"
+"groupUniqArraySimpleState"
+"groupUniqArrayState"
+"group_concat"
+"group_concatArgMax"
+"group_concatArgMin"
+"group_concatArray"
+"group_concatDistinct"
+"group_concatForEach"
+"group_concatIf"
+"group_concatMap"
+"group_concatMerge"
+"group_concatNull"
+"group_concatOrDefault"
+"group_concatOrNull"
+"group_concatResample"
+"group_concatSimpleState"
+"group_concatState"
+"h3CellAreaM2"
+"h3CellAreaRads2"
+"h3Distance"
+"h3EdgeAngle"
+"h3EdgeLengthKm"
+"h3EdgeLengthM"
+"h3ExactEdgeLengthKm"
+"h3ExactEdgeLengthM"
+"h3ExactEdgeLengthRads"
+"h3GetBaseCell"
+"h3GetDestinationIndexFromUnidirectionalEdge"
+"h3GetFaces"
+"h3GetIndexesFromUnidirectionalEdge"
+"h3GetOriginIndexFromUnidirectionalEdge"
+"h3GetPentagonIndexes"
+"h3GetRes0Indexes"
+"h3GetResolution"
+"h3GetUnidirectionalEdge"
+"h3GetUnidirectionalEdgeBoundary"
+"h3GetUnidirectionalEdgesFromHexagon"
+"h3HexAreaKm2"
+"h3HexAreaM2"
+"h3HexRing"
+"h3IndexesAreNeighbors"
+"h3IsPentagon"
+"h3IsResClassIII"
+"h3IsValid"
+"h3Line"
+"h3NumHexagons"
+"h3PointDistKm"
+"h3PointDistM"
+"h3PointDistRads"
+"h3ToCenterChild"
+"h3ToChildren"
+"h3ToGeo"
+"h3ToGeoBoundary"
+"h3ToParent"
+"h3ToString"
+"h3UnidirectionalEdgeIsValid"
+"h3kRing"
+"halfMD5"
+"has"
+"hasAll"
+"hasAny"
+"hasColumnInTable"
+"hasSubsequence"
+"hasSubsequenceCaseInsensitive"
+"hasSubsequenceCaseInsensitiveUTF8"
+"hasSubsequenceUTF8"
+"hasSubstr"
+"hasThreadFuzzer"
+"hasToken"
+"hasTokenCaseInsensitive"
+"hasTokenCaseInsensitiveOrNull"
+"hasTokenOrNull"
+"hex"
+"hilbertDecode"
+"hilbertEncode"
+"histogram"
+"histogramArgMax"
+"histogramArgMin"
+"histogramArray"
+"histogramDistinct"
+"histogramForEach"
+"histogramIf"
+"histogramMap"
+"histogramMerge"
+"histogramNull"
+"histogramOrDefault"
+"histogramOrNull"
+"histogramResample"
+"histogramSimpleState"
+"histogramState"
+"hiveHash"
+"hop"
+"hopEnd"
+"hopStart"
+"hostName"
+"hostname"
+"hypot"
+"identity"
+"idnaDecode"
+"idnaEncode"
+"if"
+"ifNotFinite"
+"ifNull"
+"ignore"
+"ilike"
+"in"
+"inIgnoreSet"
+"indexHint"
+"indexOf"
+"initcap"
+"initcapUTF8"
+"initialQueryID"
+"initial_query_id"
+"initializeAggregation"
+"instr"
+"intDiv"
+"intDivOrZero"
+"intExp10"
+"intExp2"
+"intHash32"
+"intHash64"
+"intervalLengthSum"
+"intervalLengthSumArgMax"
+"intervalLengthSumArgMin"
+"intervalLengthSumArray"
+"intervalLengthSumDistinct"
+"intervalLengthSumForEach"
+"intervalLengthSumIf"
+"intervalLengthSumMap"
+"intervalLengthSumMerge"
+"intervalLengthSumNull"
+"intervalLengthSumOrDefault"
+"intervalLengthSumOrNull"
+"intervalLengthSumResample"
+"intervalLengthSumSimpleState"
+"intervalLengthSumState"
+"isConstant"
+"isDecimalOverflow"
+"isFinite"
+"isIPAddressInRange"
+"isIPv4String"
+"isIPv6String"
+"isInfinite"
+"isNaN"
+"isNotDistinctFrom"
+"isNotNull"
+"isNull"
+"isNullable"
+"isValidJSON"
+"isValidUTF8"
+"isZeroOrNull"
+"jaroSimilarity"
+"jaroWinklerSimilarity"
+"javaHash"
+"javaHashUTF16LE"
+"joinGet"
+"joinGetOrNull"
+"jsonMergePatch"
+"jumpConsistentHash"
+"kafkaMurmurHash"
+"kolmogorovSmirnovTest"
+"kolmogorovSmirnovTestArgMax"
+"kolmogorovSmirnovTestArgMin"
+"kolmogorovSmirnovTestArray"
+"kolmogorovSmirnovTestDistinct"
+"kolmogorovSmirnovTestForEach"
+"kolmogorovSmirnovTestIf"
+"kolmogorovSmirnovTestMap"
+"kolmogorovSmirnovTestMerge"
+"kolmogorovSmirnovTestNull"
+"kolmogorovSmirnovTestOrDefault"
+"kolmogorovSmirnovTestOrNull"
+"kolmogorovSmirnovTestResample"
+"kolmogorovSmirnovTestSimpleState"
+"kolmogorovSmirnovTestState"
+"kostikConsistentHash"
+"kql_array_sort_asc"
+"kql_array_sort_desc"
+"kurtPop"
+"kurtPopArgMax"
+"kurtPopArgMin"
+"kurtPopArray"
+"kurtPopDistinct"
+"kurtPopForEach"
+"kurtPopIf"
+"kurtPopMap"
+"kurtPopMerge"
+"kurtPopNull"
+"kurtPopOrDefault"
+"kurtPopOrNull"
+"kurtPopResample"
+"kurtPopSimpleState"
+"kurtPopState"
+"kurtSamp"
+"kurtSampArgMax"
+"kurtSampArgMin"
+"kurtSampArray"
+"kurtSampDistinct"
+"kurtSampForEach"
+"kurtSampIf"
+"kurtSampMap"
+"kurtSampMerge"
+"kurtSampNull"
+"kurtSampOrDefault"
+"kurtSampOrNull"
+"kurtSampResample"
+"kurtSampSimpleState"
+"kurtSampState"
+"lagInFrame"
+"lagInFrameArgMax"
+"lagInFrameArgMin"
+"lagInFrameArray"
+"lagInFrameDistinct"
+"lagInFrameForEach"
+"lagInFrameIf"
+"lagInFrameMap"
+"lagInFrameMerge"
+"lagInFrameNull"
+"lagInFrameOrDefault"
+"lagInFrameOrNull"
+"lagInFrameResample"
+"lagInFrameSimpleState"
+"lagInFrameState"
+"largestTriangleThreeBuckets"
+"largestTriangleThreeBucketsArgMax"
+"largestTriangleThreeBucketsArgMin"
+"largestTriangleThreeBucketsArray"
+"largestTriangleThreeBucketsDistinct"
+"largestTriangleThreeBucketsForEach"
+"largestTriangleThreeBucketsIf"
+"largestTriangleThreeBucketsMap"
+"largestTriangleThreeBucketsMerge"
+"largestTriangleThreeBucketsNull"
+"largestTriangleThreeBucketsOrDefault"
+"largestTriangleThreeBucketsOrNull"
+"largestTriangleThreeBucketsResample"
+"largestTriangleThreeBucketsSimpleState"
+"largestTriangleThreeBucketsState"
+"last_value"
+"last_valueArgMax"
+"last_valueArgMin"
+"last_valueArray"
+"last_valueDistinct"
+"last_valueForEach"
+"last_valueIf"
+"last_valueMap"
+"last_valueMerge"
+"last_valueNull"
+"last_valueOrDefault"
+"last_valueOrNull"
+"last_valueResample"
+"last_valueSimpleState"
+"last_valueState"
+"last_value_respect_nulls"
+"last_value_respect_nullsArgMax"
+"last_value_respect_nullsArgMin"
+"last_value_respect_nullsArray"
+"last_value_respect_nullsDistinct"
+"last_value_respect_nullsForEach"
+"last_value_respect_nullsIf"
+"last_value_respect_nullsMap"
+"last_value_respect_nullsMerge"
+"last_value_respect_nullsNull"
+"last_value_respect_nullsOrDefault"
+"last_value_respect_nullsOrNull"
+"last_value_respect_nullsResample"
+"last_value_respect_nullsSimpleState"
+"last_value_respect_nullsState"
+"lcase"
+"lcm"
+"leadInFrame"
+"leadInFrameArgMax"
+"leadInFrameArgMin"
+"leadInFrameArray"
+"leadInFrameDistinct"
+"leadInFrameForEach"
+"leadInFrameIf"
+"leadInFrameMap"
+"leadInFrameMerge"
+"leadInFrameNull"
+"leadInFrameOrDefault"
+"leadInFrameOrNull"
+"leadInFrameResample"
+"leadInFrameSimpleState"
+"leadInFrameState"
+"least"
+"left"
+"leftPad"
+"leftPadUTF8"
+"leftUTF8"
+"lemmatize"
+"length"
+"lengthUTF8"
+"less"
+"lessOrEquals"
+"levenshteinDistance"
+"levenshteinDistanceUTF8"
+"lgamma"
+"like"
+"ln"
+"locate"
+"log"
+"log10"
+"log1p"
+"log2"
+"logTrace"
+"lowCardinalityIndices"
+"lowCardinalityKeys"
+"lower"
+"lowerUTF8"
+"lpad"
+"ltrim"
+"lttb"
+"lttbArgMax"
+"lttbArgMin"
+"lttbArray"
+"lttbDistinct"
+"lttbForEach"
+"lttbIf"
+"lttbMap"
+"lttbMerge"
+"lttbNull"
+"lttbOrDefault"
+"lttbOrNull"
+"lttbResample"
+"lttbSimpleState"
+"lttbState"
+"makeDate"
+"makeDate32"
+"makeDateTime"
+"makeDateTime64"
+"mannWhitneyUTest"
+"mannWhitneyUTestArgMax"
+"mannWhitneyUTestArgMin"
+"mannWhitneyUTestArray"
+"mannWhitneyUTestDistinct"
+"mannWhitneyUTestForEach"
+"mannWhitneyUTestIf"
+"mannWhitneyUTestMap"
+"mannWhitneyUTestMerge"
+"mannWhitneyUTestNull"
+"mannWhitneyUTestOrDefault"
+"mannWhitneyUTestOrNull"
+"mannWhitneyUTestResample"
+"mannWhitneyUTestSimpleState"
+"mannWhitneyUTestState"
+"map"
+"mapAdd"
+"mapAll"
+"mapApply"
+"mapConcat"
+"mapContains"
+"mapContainsKeyLike"
+"mapExists"
+"mapExtractKeyLike"
+"mapFilter"
+"mapFromArrays"
+"mapFromString"
+"mapKeys"
+"mapPartialReverseSort"
+"mapPartialSort"
+"mapPopulateSeries"
+"mapReverseSort"
+"mapSort"
+"mapSubtract"
+"mapUpdate"
+"mapValues"
+"match"
+"materialize"
+"max"
+"max2"
+"maxArgMax"
+"maxArgMin"
+"maxArray"
+"maxDistinct"
+"maxForEach"
+"maxIf"
+"maxIntersections"
+"maxIntersectionsArgMax"
+"maxIntersectionsArgMin"
+"maxIntersectionsArray"
+"maxIntersectionsDistinct"
+"maxIntersectionsForEach"
+"maxIntersectionsIf"
+"maxIntersectionsMap"
+"maxIntersectionsMerge"
+"maxIntersectionsNull"
+"maxIntersectionsOrDefault"
+"maxIntersectionsOrNull"
+"maxIntersectionsPosition"
+"maxIntersectionsPositionArgMax"
+"maxIntersectionsPositionArgMin"
+"maxIntersectionsPositionArray"
+"maxIntersectionsPositionDistinct"
+"maxIntersectionsPositionForEach"
+"maxIntersectionsPositionIf"
+"maxIntersectionsPositionMap"
+"maxIntersectionsPositionMerge"
+"maxIntersectionsPositionNull"
+"maxIntersectionsPositionOrDefault"
+"maxIntersectionsPositionOrNull"
+"maxIntersectionsPositionResample"
+"maxIntersectionsPositionSimpleState"
+"maxIntersectionsPositionState"
+"maxIntersectionsResample"
+"maxIntersectionsSimpleState"
+"maxIntersectionsState"
+"maxMap"
+"maxMappedArrays"
+"maxMappedArraysArgMax"
+"maxMappedArraysArgMin"
+"maxMappedArraysArray"
+"maxMappedArraysDistinct"
+"maxMappedArraysForEach"
+"maxMappedArraysIf"
+"maxMappedArraysMap"
+"maxMappedArraysMerge"
+"maxMappedArraysNull"
+"maxMappedArraysOrDefault"
+"maxMappedArraysOrNull"
+"maxMappedArraysResample"
+"maxMappedArraysSimpleState"
+"maxMappedArraysState"
+"maxMerge"
+"maxNull"
+"maxOrDefault"
+"maxOrNull"
+"maxResample"
+"maxSimpleState"
+"maxState"
+"meanZTest"
+"meanZTestArgMax"
+"meanZTestArgMin"
+"meanZTestArray"
+"meanZTestDistinct"
+"meanZTestForEach"
+"meanZTestIf"
+"meanZTestMap"
+"meanZTestMerge"
+"meanZTestNull"
+"meanZTestOrDefault"
+"meanZTestOrNull"
+"meanZTestResample"
+"meanZTestSimpleState"
+"meanZTestState"
+"median"
+"medianArgMax"
+"medianArgMin"
+"medianArray"
+"medianBFloat16"
+"medianBFloat16ArgMax"
+"medianBFloat16ArgMin"
+"medianBFloat16Array"
+"medianBFloat16Distinct"
+"medianBFloat16ForEach"
+"medianBFloat16If"
+"medianBFloat16Map"
+"medianBFloat16Merge"
+"medianBFloat16Null"
+"medianBFloat16OrDefault"
+"medianBFloat16OrNull"
+"medianBFloat16Resample"
+"medianBFloat16SimpleState"
+"medianBFloat16State"
+"medianBFloat16Weighted"
+"medianBFloat16WeightedArgMax"
+"medianBFloat16WeightedArgMin"
+"medianBFloat16WeightedArray"
+"medianBFloat16WeightedDistinct"
+"medianBFloat16WeightedForEach"
+"medianBFloat16WeightedIf"
+"medianBFloat16WeightedMap"
+"medianBFloat16WeightedMerge"
+"medianBFloat16WeightedNull"
+"medianBFloat16WeightedOrDefault"
+"medianBFloat16WeightedOrNull"
+"medianBFloat16WeightedResample"
+"medianBFloat16WeightedSimpleState"
+"medianBFloat16WeightedState"
+"medianDD"
+"medianDDArgMax"
+"medianDDArgMin"
+"medianDDArray"
+"medianDDDistinct"
+"medianDDForEach"
+"medianDDIf"
+"medianDDMap"
+"medianDDMerge"
+"medianDDNull"
+"medianDDOrDefault"
+"medianDDOrNull"
+"medianDDResample"
+"medianDDSimpleState"
+"medianDDState"
+"medianDeterministic"
+"medianDeterministicArgMax"
+"medianDeterministicArgMin"
+"medianDeterministicArray"
+"medianDeterministicDistinct"
+"medianDeterministicForEach"
+"medianDeterministicIf"
+"medianDeterministicMap"
+"medianDeterministicMerge"
+"medianDeterministicNull"
+"medianDeterministicOrDefault"
+"medianDeterministicOrNull"
+"medianDeterministicResample"
+"medianDeterministicSimpleState"
+"medianDeterministicState"
+"medianDistinct"
+"medianExact"
+"medianExactArgMax"
+"medianExactArgMin"
+"medianExactArray"
+"medianExactDistinct"
+"medianExactForEach"
+"medianExactHigh"
+"medianExactHighArgMax"
+"medianExactHighArgMin"
+"medianExactHighArray"
+"medianExactHighDistinct"
+"medianExactHighForEach"
+"medianExactHighIf"
+"medianExactHighMap"
+"medianExactHighMerge"
+"medianExactHighNull"
+"medianExactHighOrDefault"
+"medianExactHighOrNull"
+"medianExactHighResample"
+"medianExactHighSimpleState"
+"medianExactHighState"
+"medianExactIf"
+"medianExactLow"
+"medianExactLowArgMax"
+"medianExactLowArgMin"
+"medianExactLowArray"
+"medianExactLowDistinct"
+"medianExactLowForEach"
+"medianExactLowIf"
+"medianExactLowMap"
+"medianExactLowMerge"
+"medianExactLowNull"
+"medianExactLowOrDefault"
+"medianExactLowOrNull"
+"medianExactLowResample"
+"medianExactLowSimpleState"
+"medianExactLowState"
+"medianExactMap"
+"medianExactMerge"
+"medianExactNull"
+"medianExactOrDefault"
+"medianExactOrNull"
+"medianExactResample"
+"medianExactSimpleState"
+"medianExactState"
+"medianExactWeighted"
+"medianExactWeightedArgMax"
+"medianExactWeightedArgMin"
+"medianExactWeightedArray"
+"medianExactWeightedDistinct"
+"medianExactWeightedForEach"
+"medianExactWeightedIf"
+"medianExactWeightedMap"
+"medianExactWeightedMerge"
+"medianExactWeightedNull"
+"medianExactWeightedOrDefault"
+"medianExactWeightedOrNull"
+"medianExactWeightedResample"
+"medianExactWeightedSimpleState"
+"medianExactWeightedState"
+"medianForEach"
+"medianGK"
+"medianGKArgMax"
+"medianGKArgMin"
+"medianGKArray"
+"medianGKDistinct"
+"medianGKForEach"
+"medianGKIf"
+"medianGKMap"
+"medianGKMerge"
+"medianGKNull"
+"medianGKOrDefault"
+"medianGKOrNull"
+"medianGKResample"
+"medianGKSimpleState"
+"medianGKState"
+"medianIf"
+"medianInterpolatedWeighted"
+"medianInterpolatedWeightedArgMax"
+"medianInterpolatedWeightedArgMin"
+"medianInterpolatedWeightedArray"
+"medianInterpolatedWeightedDistinct"
+"medianInterpolatedWeightedForEach"
+"medianInterpolatedWeightedIf"
+"medianInterpolatedWeightedMap"
+"medianInterpolatedWeightedMerge"
+"medianInterpolatedWeightedNull"
+"medianInterpolatedWeightedOrDefault"
+"medianInterpolatedWeightedOrNull"
+"medianInterpolatedWeightedResample"
+"medianInterpolatedWeightedSimpleState"
+"medianInterpolatedWeightedState"
+"medianMap"
+"medianMerge"
+"medianNull"
+"medianOrDefault"
+"medianOrNull"
+"medianResample"
+"medianSimpleState"
+"medianState"
+"medianTDigest"
+"medianTDigestArgMax"
+"medianTDigestArgMin"
+"medianTDigestArray"
+"medianTDigestDistinct"
+"medianTDigestForEach"
+"medianTDigestIf"
+"medianTDigestMap"
+"medianTDigestMerge"
+"medianTDigestNull"
+"medianTDigestOrDefault"
+"medianTDigestOrNull"
+"medianTDigestResample"
+"medianTDigestSimpleState"
+"medianTDigestState"
+"medianTDigestWeighted"
+"medianTDigestWeightedArgMax"
+"medianTDigestWeightedArgMin"
+"medianTDigestWeightedArray"
+"medianTDigestWeightedDistinct"
+"medianTDigestWeightedForEach"
+"medianTDigestWeightedIf"
+"medianTDigestWeightedMap"
+"medianTDigestWeightedMerge"
+"medianTDigestWeightedNull"
+"medianTDigestWeightedOrDefault"
+"medianTDigestWeightedOrNull"
+"medianTDigestWeightedResample"
+"medianTDigestWeightedSimpleState"
+"medianTDigestWeightedState"
+"medianTiming"
+"medianTimingArgMax"
+"medianTimingArgMin"
+"medianTimingArray"
+"medianTimingDistinct"
+"medianTimingForEach"
+"medianTimingIf"
+"medianTimingMap"
+"medianTimingMerge"
+"medianTimingNull"
+"medianTimingOrDefault"
+"medianTimingOrNull"
+"medianTimingResample"
+"medianTimingSimpleState"
+"medianTimingState"
+"medianTimingWeighted"
+"medianTimingWeightedArgMax"
+"medianTimingWeightedArgMin"
+"medianTimingWeightedArray"
+"medianTimingWeightedDistinct"
+"medianTimingWeightedForEach"
+"medianTimingWeightedIf"
+"medianTimingWeightedMap"
+"medianTimingWeightedMerge"
+"medianTimingWeightedNull"
+"medianTimingWeightedOrDefault"
+"medianTimingWeightedOrNull"
+"medianTimingWeightedResample"
+"medianTimingWeightedSimpleState"
+"medianTimingWeightedState"
+"metroHash64"
+"mid"
+"min"
+"min2"
+"minArgMax"
+"minArgMin"
+"minArray"
+"minDistinct"
+"minForEach"
+"minIf"
+"minMap"
+"minMappedArrays"
+"minMappedArraysArgMax"
+"minMappedArraysArgMin"
+"minMappedArraysArray"
+"minMappedArraysDistinct"
+"minMappedArraysForEach"
+"minMappedArraysIf"
+"minMappedArraysMap"
+"minMappedArraysMerge"
+"minMappedArraysNull"
+"minMappedArraysOrDefault"
+"minMappedArraysOrNull"
+"minMappedArraysResample"
+"minMappedArraysSimpleState"
+"minMappedArraysState"
+"minMerge"
+"minNull"
+"minOrDefault"
+"minOrNull"
+"minResample"
+"minSampleSizeContinous"
+"minSampleSizeContinuous"
+"minSampleSizeConversion"
+"minSimpleState"
+"minState"
+"minus"
+"mismatches"
+"mod"
+"modulo"
+"moduloLegacy"
+"moduloOrZero"
+"monthName"
+"mortonDecode"
+"mortonEncode"
+"multiFuzzyMatchAllIndices"
+"multiFuzzyMatchAny"
+"multiFuzzyMatchAnyIndex"
+"multiIf"
+"multiMatchAllIndices"
+"multiMatchAny"
+"multiMatchAnyIndex"
+"multiSearchAllPositions"
+"multiSearchAllPositionsCaseInsensitive"
+"multiSearchAllPositionsCaseInsensitiveUTF8"
+"multiSearchAllPositionsUTF8"
+"multiSearchAny"
+"multiSearchAnyCaseInsensitive"
+"multiSearchAnyCaseInsensitiveUTF8"
+"multiSearchAnyUTF8"
+"multiSearchFirstIndex"
+"multiSearchFirstIndexCaseInsensitive"
+"multiSearchFirstIndexCaseInsensitiveUTF8"
+"multiSearchFirstIndexUTF8"
+"multiSearchFirstPosition"
+"multiSearchFirstPositionCaseInsensitive"
+"multiSearchFirstPositionCaseInsensitiveUTF8"
+"multiSearchFirstPositionUTF8"
+"multiply"
+"multiplyDecimal"
+"murmurHash2_32"
+"murmurHash2_64"
+"murmurHash3_128"
+"murmurHash3_32"
+"murmurHash3_64"
+"negate"
+"neighbor"
+"nested"
+"netloc"
+"ngramDistance"
+"ngramDistanceCaseInsensitive"
+"ngramDistanceCaseInsensitiveUTF8"
+"ngramDistanceUTF8"
+"ngramMinHash"
+"ngramMinHashArg"
+"ngramMinHashArgCaseInsensitive"
+"ngramMinHashArgCaseInsensitiveUTF8"
+"ngramMinHashArgUTF8"
+"ngramMinHashCaseInsensitive"
+"ngramMinHashCaseInsensitiveUTF8"
+"ngramMinHashUTF8"
+"ngramSearch"
+"ngramSearchCaseInsensitive"
+"ngramSearchCaseInsensitiveUTF8"
+"ngramSearchUTF8"
+"ngramSimHash"
+"ngramSimHashCaseInsensitive"
+"ngramSimHashCaseInsensitiveUTF8"
+"ngramSimHashUTF8"
+"ngrams"
+"nonNegativeDerivative"
+"nonNegativeDerivativeArgMax"
+"nonNegativeDerivativeArgMin"
+"nonNegativeDerivativeArray"
+"nonNegativeDerivativeDistinct"
+"nonNegativeDerivativeForEach"
+"nonNegativeDerivativeIf"
+"nonNegativeDerivativeMap"
+"nonNegativeDerivativeMerge"
+"nonNegativeDerivativeNull"
+"nonNegativeDerivativeOrDefault"
+"nonNegativeDerivativeOrNull"
+"nonNegativeDerivativeResample"
+"nonNegativeDerivativeSimpleState"
+"nonNegativeDerivativeState"
+"normL1"
+"normL2"
+"normL2Squared"
+"normLinf"
+"normLp"
+"normalizeL1"
+"normalizeL2"
+"normalizeLinf"
+"normalizeLp"
+"normalizeQuery"
+"normalizeQueryKeepNames"
+"normalizeUTF8NFC"
+"normalizeUTF8NFD"
+"normalizeUTF8NFKC"
+"normalizeUTF8NFKD"
+"normalizedQueryHash"
+"normalizedQueryHashKeepNames"
+"not"
+"notEmpty"
+"notEquals"
+"notILike"
+"notIn"
+"notInIgnoreSet"
+"notLike"
+"notNullIn"
+"notNullInIgnoreSet"
+"nothing"
+"nothingArgMax"
+"nothingArgMin"
+"nothingArray"
+"nothingDistinct"
+"nothingForEach"
+"nothingIf"
+"nothingMap"
+"nothingMerge"
+"nothingNull"
+"nothingNull"
+"nothingNullArgMax"
+"nothingNullArgMin"
+"nothingNullArray"
+"nothingNullDistinct"
+"nothingNullForEach"
+"nothingNullIf"
+"nothingNullMap"
+"nothingNullMerge"
+"nothingNullNull"
+"nothingNullOrDefault"
+"nothingNullOrNull"
+"nothingNullResample"
+"nothingNullSimpleState"
+"nothingNullState"
+"nothingOrDefault"
+"nothingOrNull"
+"nothingResample"
+"nothingSimpleState"
+"nothingState"
+"nothingUInt64"
+"nothingUInt64ArgMax"
+"nothingUInt64ArgMin"
+"nothingUInt64Array"
+"nothingUInt64Distinct"
+"nothingUInt64ForEach"
+"nothingUInt64If"
+"nothingUInt64Map"
+"nothingUInt64Merge"
+"nothingUInt64Null"
+"nothingUInt64OrDefault"
+"nothingUInt64OrNull"
+"nothingUInt64Resample"
+"nothingUInt64SimpleState"
+"nothingUInt64State"
+"now"
+"now64"
+"nowInBlock"
+"nth_value"
+"nth_valueArgMax"
+"nth_valueArgMin"
+"nth_valueArray"
+"nth_valueDistinct"
+"nth_valueForEach"
+"nth_valueIf"
+"nth_valueMap"
+"nth_valueMerge"
+"nth_valueNull"
+"nth_valueOrDefault"
+"nth_valueOrNull"
+"nth_valueResample"
+"nth_valueSimpleState"
+"nth_valueState"
+"ntile"
+"ntileArgMax"
+"ntileArgMin"
+"ntileArray"
+"ntileDistinct"
+"ntileForEach"
+"ntileIf"
+"ntileMap"
+"ntileMerge"
+"ntileNull"
+"ntileOrDefault"
+"ntileOrNull"
+"ntileResample"
+"ntileSimpleState"
+"ntileState"
+"nullIf"
+"nullIn"
+"nullInIgnoreSet"
+"or"
+"parseDateTime"
+"parseDateTime32BestEffort"
+"parseDateTime32BestEffortOrNull"
+"parseDateTime32BestEffortOrZero"
+"parseDateTime64BestEffort"
+"parseDateTime64BestEffortOrNull"
+"parseDateTime64BestEffortOrZero"
+"parseDateTime64BestEffortUS"
+"parseDateTime64BestEffortUSOrNull"
+"parseDateTime64BestEffortUSOrZero"
+"parseDateTimeBestEffort"
+"parseDateTimeBestEffortOrNull"
+"parseDateTimeBestEffortOrZero"
+"parseDateTimeBestEffortUS"
+"parseDateTimeBestEffortUSOrNull"
+"parseDateTimeBestEffortUSOrZero"
+"parseDateTimeInJodaSyntax"
+"parseDateTimeInJodaSyntaxOrNull"
+"parseDateTimeInJodaSyntaxOrZero"
+"parseDateTimeOrNull"
+"parseDateTimeOrZero"
+"parseReadableSize"
+"parseReadableSizeOrNull"
+"parseReadableSizeOrZero"
+"parseTimeDelta"
+"partitionID"
+"partitionId"
+"path"
+"pathFull"
+"percentRank"
+"percentRankArgMax"
+"percentRankArgMin"
+"percentRankArray"
+"percentRankDistinct"
+"percentRankForEach"
+"percentRankIf"
+"percentRankMap"
+"percentRankMerge"
+"percentRankNull"
+"percentRankOrDefault"
+"percentRankOrNull"
+"percentRankResample"
+"percentRankSimpleState"
+"percentRankState"
+"percent_rank"
+"percent_rankArgMax"
+"percent_rankArgMin"
+"percent_rankArray"
+"percent_rankDistinct"
+"percent_rankForEach"
+"percent_rankIf"
+"percent_rankMap"
+"percent_rankMerge"
+"percent_rankNull"
+"percent_rankOrDefault"
+"percent_rankOrNull"
+"percent_rankResample"
+"percent_rankSimpleState"
+"percent_rankState"
+"pi"
+"plus"
+"pmod"
+"pointInEllipses"
+"pointInPolygon"
+"polygonAreaCartesian"
+"polygonAreaSpherical"
+"polygonConvexHullCartesian"
+"polygonPerimeterCartesian"
+"polygonPerimeterSpherical"
+"polygonsDistanceCartesian"
+"polygonsDistanceSpherical"
+"polygonsEqualsCartesian"
+"polygonsIntersectionCartesian"
+"polygonsIntersectionSpherical"
+"polygonsSymDifferenceCartesian"
+"polygonsSymDifferenceSpherical"
+"polygonsUnionCartesian"
+"polygonsUnionSpherical"
+"polygonsWithinCartesian"
+"polygonsWithinSpherical"
+"port"
+"portRFC"
+"position"
+"positionCaseInsensitive"
+"positionCaseInsensitiveUTF8"
+"positionUTF8"
+"positiveModulo"
+"positive_modulo"
+"pow"
+"power"
+"printf"
+"proportionsZTest"
+"protocol"
+"punycodeDecode"
+"punycodeEncode"
+"quantile"
+"quantileArgMax"
+"quantileArgMin"
+"quantileArray"
+"quantileBFloat16"
+"quantileBFloat16ArgMax"
+"quantileBFloat16ArgMin"
+"quantileBFloat16Array"
+"quantileBFloat16Distinct"
+"quantileBFloat16ForEach"
+"quantileBFloat16If"
+"quantileBFloat16Map"
+"quantileBFloat16Merge"
+"quantileBFloat16Null"
+"quantileBFloat16OrDefault"
+"quantileBFloat16OrNull"
+"quantileBFloat16Resample"
+"quantileBFloat16SimpleState"
+"quantileBFloat16State"
+"quantileBFloat16Weighted"
+"quantileBFloat16WeightedArgMax"
+"quantileBFloat16WeightedArgMin"
+"quantileBFloat16WeightedArray"
+"quantileBFloat16WeightedDistinct"
+"quantileBFloat16WeightedForEach"
+"quantileBFloat16WeightedIf"
+"quantileBFloat16WeightedMap"
+"quantileBFloat16WeightedMerge"
+"quantileBFloat16WeightedNull"
+"quantileBFloat16WeightedOrDefault"
+"quantileBFloat16WeightedOrNull"
+"quantileBFloat16WeightedResample"
+"quantileBFloat16WeightedSimpleState"
+"quantileBFloat16WeightedState"
+"quantileDD"
+"quantileDDArgMax"
+"quantileDDArgMin"
+"quantileDDArray"
+"quantileDDDistinct"
+"quantileDDForEach"
+"quantileDDIf"
+"quantileDDMap"
+"quantileDDMerge"
+"quantileDDNull"
+"quantileDDOrDefault"
+"quantileDDOrNull"
+"quantileDDResample"
+"quantileDDSimpleState"
+"quantileDDState"
+"quantileDeterministic"
+"quantileDeterministicArgMax"
+"quantileDeterministicArgMin"
+"quantileDeterministicArray"
+"quantileDeterministicDistinct"
+"quantileDeterministicForEach"
+"quantileDeterministicIf"
+"quantileDeterministicMap"
+"quantileDeterministicMerge"
+"quantileDeterministicNull"
+"quantileDeterministicOrDefault"
+"quantileDeterministicOrNull"
+"quantileDeterministicResample"
+"quantileDeterministicSimpleState"
+"quantileDeterministicState"
+"quantileDistinct"
+"quantileExact"
+"quantileExactArgMax"
+"quantileExactArgMin"
+"quantileExactArray"
+"quantileExactDistinct"
+"quantileExactExclusive"
+"quantileExactExclusiveArgMax"
+"quantileExactExclusiveArgMin"
+"quantileExactExclusiveArray"
+"quantileExactExclusiveDistinct"
+"quantileExactExclusiveForEach"
+"quantileExactExclusiveIf"
+"quantileExactExclusiveMap"
+"quantileExactExclusiveMerge"
+"quantileExactExclusiveNull"
+"quantileExactExclusiveOrDefault"
+"quantileExactExclusiveOrNull"
+"quantileExactExclusiveResample"
+"quantileExactExclusiveSimpleState"
+"quantileExactExclusiveState"
+"quantileExactForEach"
+"quantileExactHigh"
+"quantileExactHighArgMax"
+"quantileExactHighArgMin"
+"quantileExactHighArray"
+"quantileExactHighDistinct"
+"quantileExactHighForEach"
+"quantileExactHighIf"
+"quantileExactHighMap"
+"quantileExactHighMerge"
+"quantileExactHighNull"
+"quantileExactHighOrDefault"
+"quantileExactHighOrNull"
+"quantileExactHighResample"
+"quantileExactHighSimpleState"
+"quantileExactHighState"
+"quantileExactIf"
+"quantileExactInclusive"
+"quantileExactInclusiveArgMax"
+"quantileExactInclusiveArgMin"
+"quantileExactInclusiveArray"
+"quantileExactInclusiveDistinct"
+"quantileExactInclusiveForEach"
+"quantileExactInclusiveIf"
+"quantileExactInclusiveMap"
+"quantileExactInclusiveMerge"
+"quantileExactInclusiveNull"
+"quantileExactInclusiveOrDefault"
+"quantileExactInclusiveOrNull"
+"quantileExactInclusiveResample"
+"quantileExactInclusiveSimpleState"
+"quantileExactInclusiveState"
+"quantileExactLow"
+"quantileExactLowArgMax"
+"quantileExactLowArgMin"
+"quantileExactLowArray"
+"quantileExactLowDistinct"
+"quantileExactLowForEach"
+"quantileExactLowIf"
+"quantileExactLowMap"
+"quantileExactLowMerge"
+"quantileExactLowNull"
+"quantileExactLowOrDefault"
+"quantileExactLowOrNull"
+"quantileExactLowResample"
+"quantileExactLowSimpleState"
+"quantileExactLowState"
+"quantileExactMap"
+"quantileExactMerge"
+"quantileExactNull"
+"quantileExactOrDefault"
+"quantileExactOrNull"
+"quantileExactResample"
+"quantileExactSimpleState"
+"quantileExactState"
+"quantileExactWeighted"
+"quantileExactWeightedArgMax"
+"quantileExactWeightedArgMin"
+"quantileExactWeightedArray"
+"quantileExactWeightedDistinct"
+"quantileExactWeightedForEach"
+"quantileExactWeightedIf"
+"quantileExactWeightedMap"
+"quantileExactWeightedMerge"
+"quantileExactWeightedNull"
+"quantileExactWeightedOrDefault"
+"quantileExactWeightedOrNull"
+"quantileExactWeightedResample"
+"quantileExactWeightedSimpleState"
+"quantileExactWeightedState"
+"quantileForEach"
+"quantileGK"
+"quantileGKArgMax"
+"quantileGKArgMin"
+"quantileGKArray"
+"quantileGKDistinct"
+"quantileGKForEach"
+"quantileGKIf"
+"quantileGKMap"
+"quantileGKMerge"
+"quantileGKNull"
+"quantileGKOrDefault"
+"quantileGKOrNull"
+"quantileGKResample"
+"quantileGKSimpleState"
+"quantileGKState"
+"quantileIf"
+"quantileInterpolatedWeighted"
+"quantileInterpolatedWeightedArgMax"
+"quantileInterpolatedWeightedArgMin"
+"quantileInterpolatedWeightedArray"
+"quantileInterpolatedWeightedDistinct"
+"quantileInterpolatedWeightedForEach"
+"quantileInterpolatedWeightedIf"
+"quantileInterpolatedWeightedMap"
+"quantileInterpolatedWeightedMerge"
+"quantileInterpolatedWeightedNull"
+"quantileInterpolatedWeightedOrDefault"
+"quantileInterpolatedWeightedOrNull"
+"quantileInterpolatedWeightedResample"
+"quantileInterpolatedWeightedSimpleState"
+"quantileInterpolatedWeightedState"
+"quantileMap"
+"quantileMerge"
+"quantileNull"
+"quantileOrDefault"
+"quantileOrNull"
+"quantileResample"
+"quantileSimpleState"
+"quantileState"
+"quantileTDigest"
+"quantileTDigestArgMax"
+"quantileTDigestArgMin"
+"quantileTDigestArray"
+"quantileTDigestDistinct"
+"quantileTDigestForEach"
+"quantileTDigestIf"
+"quantileTDigestMap"
+"quantileTDigestMerge"
+"quantileTDigestNull"
+"quantileTDigestOrDefault"
+"quantileTDigestOrNull"
+"quantileTDigestResample"
+"quantileTDigestSimpleState"
+"quantileTDigestState"
+"quantileTDigestWeighted"
+"quantileTDigestWeightedArgMax"
+"quantileTDigestWeightedArgMin"
+"quantileTDigestWeightedArray"
+"quantileTDigestWeightedDistinct"
+"quantileTDigestWeightedForEach"
+"quantileTDigestWeightedIf"
+"quantileTDigestWeightedMap"
+"quantileTDigestWeightedMerge"
+"quantileTDigestWeightedNull"
+"quantileTDigestWeightedOrDefault"
+"quantileTDigestWeightedOrNull"
+"quantileTDigestWeightedResample"
+"quantileTDigestWeightedSimpleState"
+"quantileTDigestWeightedState"
+"quantileTiming"
+"quantileTimingArgMax"
+"quantileTimingArgMin"
+"quantileTimingArray"
+"quantileTimingDistinct"
+"quantileTimingForEach"
+"quantileTimingIf"
+"quantileTimingMap"
+"quantileTimingMerge"
+"quantileTimingNull"
+"quantileTimingOrDefault"
+"quantileTimingOrNull"
+"quantileTimingResample"
+"quantileTimingSimpleState"
+"quantileTimingState"
+"quantileTimingWeighted"
+"quantileTimingWeightedArgMax"
+"quantileTimingWeightedArgMin"
+"quantileTimingWeightedArray"
+"quantileTimingWeightedDistinct"
+"quantileTimingWeightedForEach"
+"quantileTimingWeightedIf"
+"quantileTimingWeightedMap"
+"quantileTimingWeightedMerge"
+"quantileTimingWeightedNull"
+"quantileTimingWeightedOrDefault"
+"quantileTimingWeightedOrNull"
+"quantileTimingWeightedResample"
+"quantileTimingWeightedSimpleState"
+"quantileTimingWeightedState"
+"quantiles"
+"quantilesArgMax"
+"quantilesArgMin"
+"quantilesArray"
+"quantilesBFloat16"
+"quantilesBFloat16ArgMax"
+"quantilesBFloat16ArgMin"
+"quantilesBFloat16Array"
+"quantilesBFloat16Distinct"
+"quantilesBFloat16ForEach"
+"quantilesBFloat16If"
+"quantilesBFloat16Map"
+"quantilesBFloat16Merge"
+"quantilesBFloat16Null"
+"quantilesBFloat16OrDefault"
+"quantilesBFloat16OrNull"
+"quantilesBFloat16Resample"
+"quantilesBFloat16SimpleState"
+"quantilesBFloat16State"
+"quantilesBFloat16Weighted"
+"quantilesBFloat16WeightedArgMax"
+"quantilesBFloat16WeightedArgMin"
+"quantilesBFloat16WeightedArray"
+"quantilesBFloat16WeightedDistinct"
+"quantilesBFloat16WeightedForEach"
+"quantilesBFloat16WeightedIf"
+"quantilesBFloat16WeightedMap"
+"quantilesBFloat16WeightedMerge"
+"quantilesBFloat16WeightedNull"
+"quantilesBFloat16WeightedOrDefault"
+"quantilesBFloat16WeightedOrNull"
+"quantilesBFloat16WeightedResample"
+"quantilesBFloat16WeightedSimpleState"
+"quantilesBFloat16WeightedState"
+"quantilesDD"
+"quantilesDDArgMax"
+"quantilesDDArgMin"
+"quantilesDDArray"
+"quantilesDDDistinct"
+"quantilesDDForEach"
+"quantilesDDIf"
+"quantilesDDMap"
+"quantilesDDMerge"
+"quantilesDDNull"
+"quantilesDDOrDefault"
+"quantilesDDOrNull"
+"quantilesDDResample"
+"quantilesDDSimpleState"
+"quantilesDDState"
+"quantilesDeterministic"
+"quantilesDeterministicArgMax"
+"quantilesDeterministicArgMin"
+"quantilesDeterministicArray"
+"quantilesDeterministicDistinct"
+"quantilesDeterministicForEach"
+"quantilesDeterministicIf"
+"quantilesDeterministicMap"
+"quantilesDeterministicMerge"
+"quantilesDeterministicNull"
+"quantilesDeterministicOrDefault"
+"quantilesDeterministicOrNull"
+"quantilesDeterministicResample"
+"quantilesDeterministicSimpleState"
+"quantilesDeterministicState"
+"quantilesDistinct"
+"quantilesExact"
+"quantilesExactArgMax"
+"quantilesExactArgMin"
+"quantilesExactArray"
+"quantilesExactDistinct"
+"quantilesExactExclusive"
+"quantilesExactExclusiveArgMax"
+"quantilesExactExclusiveArgMin"
+"quantilesExactExclusiveArray"
+"quantilesExactExclusiveDistinct"
+"quantilesExactExclusiveForEach"
+"quantilesExactExclusiveIf"
+"quantilesExactExclusiveMap"
+"quantilesExactExclusiveMerge"
+"quantilesExactExclusiveNull"
+"quantilesExactExclusiveOrDefault"
+"quantilesExactExclusiveOrNull"
+"quantilesExactExclusiveResample"
+"quantilesExactExclusiveSimpleState"
+"quantilesExactExclusiveState"
+"quantilesExactForEach"
+"quantilesExactHigh"
+"quantilesExactHighArgMax"
+"quantilesExactHighArgMin"
+"quantilesExactHighArray"
+"quantilesExactHighDistinct"
+"quantilesExactHighForEach"
+"quantilesExactHighIf"
+"quantilesExactHighMap"
+"quantilesExactHighMerge"
+"quantilesExactHighNull"
+"quantilesExactHighOrDefault"
+"quantilesExactHighOrNull"
+"quantilesExactHighResample"
+"quantilesExactHighSimpleState"
+"quantilesExactHighState"
+"quantilesExactIf"
+"quantilesExactInclusive"
+"quantilesExactInclusiveArgMax"
+"quantilesExactInclusiveArgMin"
+"quantilesExactInclusiveArray"
+"quantilesExactInclusiveDistinct"
+"quantilesExactInclusiveForEach"
+"quantilesExactInclusiveIf"
+"quantilesExactInclusiveMap"
+"quantilesExactInclusiveMerge"
+"quantilesExactInclusiveNull"
+"quantilesExactInclusiveOrDefault"
+"quantilesExactInclusiveOrNull"
+"quantilesExactInclusiveResample"
+"quantilesExactInclusiveSimpleState"
+"quantilesExactInclusiveState"
+"quantilesExactLow"
+"quantilesExactLowArgMax"
+"quantilesExactLowArgMin"
+"quantilesExactLowArray"
+"quantilesExactLowDistinct"
+"quantilesExactLowForEach"
+"quantilesExactLowIf"
+"quantilesExactLowMap"
+"quantilesExactLowMerge"
+"quantilesExactLowNull"
+"quantilesExactLowOrDefault"
+"quantilesExactLowOrNull"
+"quantilesExactLowResample"
+"quantilesExactLowSimpleState"
+"quantilesExactLowState"
+"quantilesExactMap"
+"quantilesExactMerge"
+"quantilesExactNull"
+"quantilesExactOrDefault"
+"quantilesExactOrNull"
+"quantilesExactResample"
+"quantilesExactSimpleState"
+"quantilesExactState"
+"quantilesExactWeighted"
+"quantilesExactWeightedArgMax"
+"quantilesExactWeightedArgMin"
+"quantilesExactWeightedArray"
+"quantilesExactWeightedDistinct"
+"quantilesExactWeightedForEach"
+"quantilesExactWeightedIf"
+"quantilesExactWeightedMap"
+"quantilesExactWeightedMerge"
+"quantilesExactWeightedNull"
+"quantilesExactWeightedOrDefault"
+"quantilesExactWeightedOrNull"
+"quantilesExactWeightedResample"
+"quantilesExactWeightedSimpleState"
+"quantilesExactWeightedState"
+"quantilesForEach"
+"quantilesGK"
+"quantilesGKArgMax"
+"quantilesGKArgMin"
+"quantilesGKArray"
+"quantilesGKDistinct"
+"quantilesGKForEach"
+"quantilesGKIf"
+"quantilesGKMap"
+"quantilesGKMerge"
+"quantilesGKNull"
+"quantilesGKOrDefault"
+"quantilesGKOrNull"
+"quantilesGKResample"
+"quantilesGKSimpleState"
+"quantilesGKState"
+"quantilesIf"
+"quantilesInterpolatedWeighted"
+"quantilesInterpolatedWeightedArgMax"
+"quantilesInterpolatedWeightedArgMin"
+"quantilesInterpolatedWeightedArray"
+"quantilesInterpolatedWeightedDistinct"
+"quantilesInterpolatedWeightedForEach"
+"quantilesInterpolatedWeightedIf"
+"quantilesInterpolatedWeightedMap"
+"quantilesInterpolatedWeightedMerge"
+"quantilesInterpolatedWeightedNull"
+"quantilesInterpolatedWeightedOrDefault"
+"quantilesInterpolatedWeightedOrNull"
+"quantilesInterpolatedWeightedResample"
+"quantilesInterpolatedWeightedSimpleState"
+"quantilesInterpolatedWeightedState"
+"quantilesMap"
+"quantilesMerge"
+"quantilesNull"
+"quantilesOrDefault"
+"quantilesOrNull"
+"quantilesResample"
+"quantilesSimpleState"
+"quantilesState"
+"quantilesTDigest"
+"quantilesTDigestArgMax"
+"quantilesTDigestArgMin"
+"quantilesTDigestArray"
+"quantilesTDigestDistinct"
+"quantilesTDigestForEach"
+"quantilesTDigestIf"
+"quantilesTDigestMap"
+"quantilesTDigestMerge"
+"quantilesTDigestNull"
+"quantilesTDigestOrDefault"
+"quantilesTDigestOrNull"
+"quantilesTDigestResample"
+"quantilesTDigestSimpleState"
+"quantilesTDigestState"
+"quantilesTDigestWeighted"
+"quantilesTDigestWeightedArgMax"
+"quantilesTDigestWeightedArgMin"
+"quantilesTDigestWeightedArray"
+"quantilesTDigestWeightedDistinct"
+"quantilesTDigestWeightedForEach"
+"quantilesTDigestWeightedIf"
+"quantilesTDigestWeightedMap"
+"quantilesTDigestWeightedMerge"
+"quantilesTDigestWeightedNull"
+"quantilesTDigestWeightedOrDefault"
+"quantilesTDigestWeightedOrNull"
+"quantilesTDigestWeightedResample"
+"quantilesTDigestWeightedSimpleState"
+"quantilesTDigestWeightedState"
+"quantilesTiming"
+"quantilesTimingArgMax"
+"quantilesTimingArgMin"
+"quantilesTimingArray"
+"quantilesTimingDistinct"
+"quantilesTimingForEach"
+"quantilesTimingIf"
+"quantilesTimingMap"
+"quantilesTimingMerge"
+"quantilesTimingNull"
+"quantilesTimingOrDefault"
+"quantilesTimingOrNull"
+"quantilesTimingResample"
+"quantilesTimingSimpleState"
+"quantilesTimingState"
+"quantilesTimingWeighted"
+"quantilesTimingWeightedArgMax"
+"quantilesTimingWeightedArgMin"
+"quantilesTimingWeightedArray"
+"quantilesTimingWeightedDistinct"
+"quantilesTimingWeightedForEach"
+"quantilesTimingWeightedIf"
+"quantilesTimingWeightedMap"
+"quantilesTimingWeightedMerge"
+"quantilesTimingWeightedNull"
+"quantilesTimingWeightedOrDefault"
+"quantilesTimingWeightedOrNull"
+"quantilesTimingWeightedResample"
+"quantilesTimingWeightedSimpleState"
+"quantilesTimingWeightedState"
+"queryID"
+"queryString"
+"queryStringAndFragment"
+"query_id"
+"radians"
+"rand"
+"rand32"
+"rand64"
+"randBernoulli"
+"randBinomial"
+"randCanonical"
+"randChiSquared"
+"randConstant"
+"randExponential"
+"randFisherF"
+"randLogNormal"
+"randNegativeBinomial"
+"randNormal"
+"randPoisson"
+"randStudentT"
+"randUniform"
+"randomFixedString"
+"randomPrintableASCII"
+"randomString"
+"randomStringUTF8"
+"range"
+"rank"
+"rankArgMax"
+"rankArgMin"
+"rankArray"
+"rankCorr"
+"rankCorrArgMax"
+"rankCorrArgMin"
+"rankCorrArray"
+"rankCorrDistinct"
+"rankCorrForEach"
+"rankCorrIf"
+"rankCorrMap"
+"rankCorrMerge"
+"rankCorrNull"
+"rankCorrOrDefault"
+"rankCorrOrNull"
+"rankCorrResample"
+"rankCorrSimpleState"
+"rankCorrState"
+"rankDistinct"
+"rankForEach"
+"rankIf"
+"rankMap"
+"rankMerge"
+"rankNull"
+"rankOrDefault"
+"rankOrNull"
+"rankResample"
+"rankSimpleState"
+"rankState"
+"readWKTLineString"
+"readWKTMultiLineString"
+"readWKTMultiPolygon"
+"readWKTPoint"
+"readWKTPolygon"
+"readWKTRing"
+"regexpExtract"
+"regexpQuoteMeta"
+"regionHierarchy"
+"regionIn"
+"regionToArea"
+"regionToCity"
+"regionToContinent"
+"regionToCountry"
+"regionToDistrict"
+"regionToName"
+"regionToPopulation"
+"regionToTopContinent"
+"reinterpret"
+"reinterpretAsDate"
+"reinterpretAsDateTime"
+"reinterpretAsFixedString"
+"reinterpretAsFloat32"
+"reinterpretAsFloat64"
+"reinterpretAsInt128"
+"reinterpretAsInt16"
+"reinterpretAsInt256"
+"reinterpretAsInt32"
+"reinterpretAsInt64"
+"reinterpretAsInt8"
+"reinterpretAsString"
+"reinterpretAsUInt128"
+"reinterpretAsUInt16"
+"reinterpretAsUInt256"
+"reinterpretAsUInt32"
+"reinterpretAsUInt64"
+"reinterpretAsUInt8"
+"reinterpretAsUUID"
+"repeat"
+"replace"
+"replaceAll"
+"replaceOne"
+"replaceRegexpAll"
+"replaceRegexpOne"
+"replicate"
+"retention"
+"retentionArgMax"
+"retentionArgMin"
+"retentionArray"
+"retentionDistinct"
+"retentionForEach"
+"retentionIf"
+"retentionMap"
+"retentionMerge"
+"retentionNull"
+"retentionOrDefault"
+"retentionOrNull"
+"retentionResample"
+"retentionSimpleState"
+"retentionState"
+"reverse"
+"reverseUTF8"
+"revision"
+"right"
+"rightPad"
+"rightPadUTF8"
+"rightUTF8"
+"round"
+"roundAge"
+"roundBankers"
+"roundDown"
+"roundDuration"
+"roundToExp2"
+"rowNumberInAllBlocks"
+"rowNumberInBlock"
+"row_number"
+"row_numberArgMax"
+"row_numberArgMin"
+"row_numberArray"
+"row_numberDistinct"
+"row_numberForEach"
+"row_numberIf"
+"row_numberMap"
+"row_numberMerge"
+"row_numberNull"
+"row_numberOrDefault"
+"row_numberOrNull"
+"row_numberResample"
+"row_numberSimpleState"
+"row_numberState"
+"rpad"
+"rtrim"
+"runningAccumulate"
+"runningConcurrency"
+"runningDifference"
+"runningDifferenceStartingWithFirstValue"
+"s2CapContains"
+"s2CapUnion"
+"s2CellsIntersect"
+"s2GetNeighbors"
+"s2RectAdd"
+"s2RectContains"
+"s2RectIntersection"
+"s2RectUnion"
+"s2ToGeo"
+"scalarProduct"
+"sequenceCount"
+"sequenceCountArgMax"
+"sequenceCountArgMin"
+"sequenceCountArray"
+"sequenceCountDistinct"
+"sequenceCountForEach"
+"sequenceCountIf"
+"sequenceCountMap"
+"sequenceCountMerge"
+"sequenceCountNull"
+"sequenceCountOrDefault"
+"sequenceCountOrNull"
+"sequenceCountResample"
+"sequenceCountSimpleState"
+"sequenceCountState"
+"sequenceMatch"
+"sequenceMatchArgMax"
+"sequenceMatchArgMin"
+"sequenceMatchArray"
+"sequenceMatchDistinct"
+"sequenceMatchForEach"
+"sequenceMatchIf"
+"sequenceMatchMap"
+"sequenceMatchMerge"
+"sequenceMatchNull"
+"sequenceMatchOrDefault"
+"sequenceMatchOrNull"
+"sequenceMatchResample"
+"sequenceMatchSimpleState"
+"sequenceMatchState"
+"sequenceNextNode"
+"sequenceNextNodeArgMax"
+"sequenceNextNodeArgMin"
+"sequenceNextNodeArray"
+"sequenceNextNodeDistinct"
+"sequenceNextNodeForEach"
+"sequenceNextNodeIf"
+"sequenceNextNodeMap"
+"sequenceNextNodeMerge"
+"sequenceNextNodeNull"
+"sequenceNextNodeOrDefault"
+"sequenceNextNodeOrNull"
+"sequenceNextNodeResample"
+"sequenceNextNodeSimpleState"
+"sequenceNextNodeState"
+"seriesDecomposeSTL"
+"seriesOutliersDetectTukey"
+"seriesPeriodDetectFFT"
+"serverTimeZone"
+"serverTimezone"
+"serverUUID"
+"shardCount"
+"shardNum"
+"showCertificate"
+"sigmoid"
+"sign"
+"simpleJSONExtractBool"
+"simpleJSONExtractFloat"
+"simpleJSONExtractInt"
+"simpleJSONExtractRaw"
+"simpleJSONExtractString"
+"simpleJSONExtractUInt"
+"simpleJSONHas"
+"simpleLinearRegression"
+"simpleLinearRegressionArgMax"
+"simpleLinearRegressionArgMin"
+"simpleLinearRegressionArray"
+"simpleLinearRegressionDistinct"
+"simpleLinearRegressionForEach"
+"simpleLinearRegressionIf"
+"simpleLinearRegressionMap"
+"simpleLinearRegressionMerge"
+"simpleLinearRegressionNull"
+"simpleLinearRegressionOrDefault"
+"simpleLinearRegressionOrNull"
+"simpleLinearRegressionResample"
+"simpleLinearRegressionSimpleState"
+"simpleLinearRegressionState"
+"sin"
+"singleValueOrNull"
+"singleValueOrNullArgMax"
+"singleValueOrNullArgMin"
+"singleValueOrNullArray"
+"singleValueOrNullDistinct"
+"singleValueOrNullForEach"
+"singleValueOrNullIf"
+"singleValueOrNullMap"
+"singleValueOrNullMerge"
+"singleValueOrNullNull"
+"singleValueOrNullOrDefault"
+"singleValueOrNullOrNull"
+"singleValueOrNullResample"
+"singleValueOrNullSimpleState"
+"singleValueOrNullState"
+"sinh"
+"sipHash128"
+"sipHash128Keyed"
+"sipHash128Reference"
+"sipHash128ReferenceKeyed"
+"sipHash64"
+"sipHash64Keyed"
+"skewPop"
+"skewPopArgMax"
+"skewPopArgMin"
+"skewPopArray"
+"skewPopDistinct"
+"skewPopForEach"
+"skewPopIf"
+"skewPopMap"
+"skewPopMerge"
+"skewPopNull"
+"skewPopOrDefault"
+"skewPopOrNull"
+"skewPopResample"
+"skewPopSimpleState"
+"skewPopState"
+"skewSamp"
+"skewSampArgMax"
+"skewSampArgMin"
+"skewSampArray"
+"skewSampDistinct"
+"skewSampForEach"
+"skewSampIf"
+"skewSampMap"
+"skewSampMerge"
+"skewSampNull"
+"skewSampOrDefault"
+"skewSampOrNull"
+"skewSampResample"
+"skewSampSimpleState"
+"skewSampState"
+"sleep"
+"sleepEachRow"
+"snowflakeIDToDateTime"
+"snowflakeIDToDateTime64"
+"snowflakeToDateTime"
+"snowflakeToDateTime64"
+"soundex"
+"space"
+"sparkBar"
+"sparkBarArgMax"
+"sparkBarArgMin"
+"sparkBarArray"
+"sparkBarDistinct"
+"sparkBarForEach"
+"sparkBarIf"
+"sparkBarMap"
+"sparkBarMerge"
+"sparkBarNull"
+"sparkBarOrDefault"
+"sparkBarOrNull"
+"sparkBarResample"
+"sparkBarSimpleState"
+"sparkBarState"
+"sparkbar"
+"sparkbarArgMax"
+"sparkbarArgMin"
+"sparkbarArray"
+"sparkbarDistinct"
+"sparkbarForEach"
+"sparkbarIf"
+"sparkbarMap"
+"sparkbarMerge"
+"sparkbarNull"
+"sparkbarOrDefault"
+"sparkbarOrNull"
+"sparkbarResample"
+"sparkbarSimpleState"
+"sparkbarState"
+"splitByAlpha"
+"splitByChar"
+"splitByNonAlpha"
+"splitByRegexp"
+"splitByString"
+"splitByWhitespace"
+"sqid"
+"sqidDecode"
+"sqidEncode"
+"sqrt"
+"startsWith"
+"startsWithUTF8"
+"stddevPop"
+"stddevPopArgMax"
+"stddevPopArgMin"
+"stddevPopArray"
+"stddevPopDistinct"
+"stddevPopForEach"
+"stddevPopIf"
+"stddevPopMap"
+"stddevPopMerge"
+"stddevPopNull"
+"stddevPopOrDefault"
+"stddevPopOrNull"
+"stddevPopResample"
+"stddevPopSimpleState"
+"stddevPopStable"
+"stddevPopStableArgMax"
+"stddevPopStableArgMin"
+"stddevPopStableArray"
+"stddevPopStableDistinct"
+"stddevPopStableForEach"
+"stddevPopStableIf"
+"stddevPopStableMap"
+"stddevPopStableMerge"
+"stddevPopStableNull"
+"stddevPopStableOrDefault"
+"stddevPopStableOrNull"
+"stddevPopStableResample"
+"stddevPopStableSimpleState"
+"stddevPopStableState"
+"stddevPopState"
+"stddevSamp"
+"stddevSampArgMax"
+"stddevSampArgMin"
+"stddevSampArray"
+"stddevSampDistinct"
+"stddevSampForEach"
+"stddevSampIf"
+"stddevSampMap"
+"stddevSampMerge"
+"stddevSampNull"
+"stddevSampOrDefault"
+"stddevSampOrNull"
+"stddevSampResample"
+"stddevSampSimpleState"
+"stddevSampStable"
+"stddevSampStableArgMax"
+"stddevSampStableArgMin"
+"stddevSampStableArray"
+"stddevSampStableDistinct"
+"stddevSampStableForEach"
+"stddevSampStableIf"
+"stddevSampStableMap"
+"stddevSampStableMerge"
+"stddevSampStableNull"
+"stddevSampStableOrDefault"
+"stddevSampStableOrNull"
+"stddevSampStableResample"
+"stddevSampStableSimpleState"
+"stddevSampStableState"
+"stddevSampState"
+"stem"
+"stochasticLinearRegression"
+"stochasticLinearRegressionArgMax"
+"stochasticLinearRegressionArgMin"
+"stochasticLinearRegressionArray"
+"stochasticLinearRegressionDistinct"
+"stochasticLinearRegressionForEach"
+"stochasticLinearRegressionIf"
+"stochasticLinearRegressionMap"
+"stochasticLinearRegressionMerge"
+"stochasticLinearRegressionNull"
+"stochasticLinearRegressionOrDefault"
+"stochasticLinearRegressionOrNull"
+"stochasticLinearRegressionResample"
+"stochasticLinearRegressionSimpleState"
+"stochasticLinearRegressionState"
+"stochasticLogisticRegression"
+"stochasticLogisticRegressionArgMax"
+"stochasticLogisticRegressionArgMin"
+"stochasticLogisticRegressionArray"
+"stochasticLogisticRegressionDistinct"
+"stochasticLogisticRegressionForEach"
+"stochasticLogisticRegressionIf"
+"stochasticLogisticRegressionMap"
+"stochasticLogisticRegressionMerge"
+"stochasticLogisticRegressionNull"
+"stochasticLogisticRegressionOrDefault"
+"stochasticLogisticRegressionOrNull"
+"stochasticLogisticRegressionResample"
+"stochasticLogisticRegressionSimpleState"
+"stochasticLogisticRegressionState"
+"str_to_date"
+"str_to_map"
+"stringJaccardIndex"
+"stringJaccardIndexUTF8"
+"stringToH3"
+"structureToCapnProtoSchema"
+"structureToProtobufSchema"
+"studentTTest"
+"studentTTestArgMax"
+"studentTTestArgMin"
+"studentTTestArray"
+"studentTTestDistinct"
+"studentTTestForEach"
+"studentTTestIf"
+"studentTTestMap"
+"studentTTestMerge"
+"studentTTestNull"
+"studentTTestOrDefault"
+"studentTTestOrNull"
+"studentTTestResample"
+"studentTTestSimpleState"
+"studentTTestState"
+"subBitmap"
+"subDate"
+"substr"
+"substring"
+"substringIndex"
+"substringIndexUTF8"
+"substringUTF8"
+"subtractDays"
+"subtractHours"
+"subtractInterval"
+"subtractMicroseconds"
+"subtractMilliseconds"
+"subtractMinutes"
+"subtractMonths"
+"subtractNanoseconds"
+"subtractQuarters"
+"subtractSeconds"
+"subtractTupleOfIntervals"
+"subtractWeeks"
+"subtractYears"
+"sum"
+"sumArgMax"
+"sumArgMin"
+"sumArray"
+"sumCount"
+"sumCountArgMax"
+"sumCountArgMin"
+"sumCountArray"
+"sumCountDistinct"
+"sumCountForEach"
+"sumCountIf"
+"sumCountMap"
+"sumCountMerge"
+"sumCountNull"
+"sumCountOrDefault"
+"sumCountOrNull"
+"sumCountResample"
+"sumCountSimpleState"
+"sumCountState"
+"sumDistinct"
+"sumForEach"
+"sumIf"
+"sumKahan"
+"sumKahanArgMax"
+"sumKahanArgMin"
+"sumKahanArray"
+"sumKahanDistinct"
+"sumKahanForEach"
+"sumKahanIf"
+"sumKahanMap"
+"sumKahanMerge"
+"sumKahanNull"
+"sumKahanOrDefault"
+"sumKahanOrNull"
+"sumKahanResample"
+"sumKahanSimpleState"
+"sumKahanState"
+"sumMap"
+"sumMapFiltered"
+"sumMapFilteredArgMax"
+"sumMapFilteredArgMin"
+"sumMapFilteredArray"
+"sumMapFilteredDistinct"
+"sumMapFilteredForEach"
+"sumMapFilteredIf"
+"sumMapFilteredMap"
+"sumMapFilteredMerge"
+"sumMapFilteredNull"
+"sumMapFilteredOrDefault"
+"sumMapFilteredOrNull"
+"sumMapFilteredResample"
+"sumMapFilteredSimpleState"
+"sumMapFilteredState"
+"sumMapFilteredWithOverflow"
+"sumMapFilteredWithOverflowArgMax"
+"sumMapFilteredWithOverflowArgMin"
+"sumMapFilteredWithOverflowArray"
+"sumMapFilteredWithOverflowDistinct"
+"sumMapFilteredWithOverflowForEach"
+"sumMapFilteredWithOverflowIf"
+"sumMapFilteredWithOverflowMap"
+"sumMapFilteredWithOverflowMerge"
+"sumMapFilteredWithOverflowNull"
+"sumMapFilteredWithOverflowOrDefault"
+"sumMapFilteredWithOverflowOrNull"
+"sumMapFilteredWithOverflowResample"
+"sumMapFilteredWithOverflowSimpleState"
+"sumMapFilteredWithOverflowState"
+"sumMapWithOverflow"
+"sumMapWithOverflowArgMax"
+"sumMapWithOverflowArgMin"
+"sumMapWithOverflowArray"
+"sumMapWithOverflowDistinct"
+"sumMapWithOverflowForEach"
+"sumMapWithOverflowIf"
+"sumMapWithOverflowMap"
+"sumMapWithOverflowMerge"
+"sumMapWithOverflowNull"
+"sumMapWithOverflowOrDefault"
+"sumMapWithOverflowOrNull"
+"sumMapWithOverflowResample"
+"sumMapWithOverflowSimpleState"
+"sumMapWithOverflowState"
+"sumMappedArrays"
+"sumMappedArraysArgMax"
+"sumMappedArraysArgMin"
+"sumMappedArraysArray"
+"sumMappedArraysDistinct"
+"sumMappedArraysForEach"
+"sumMappedArraysIf"
+"sumMappedArraysMap"
+"sumMappedArraysMerge"
+"sumMappedArraysNull"
+"sumMappedArraysOrDefault"
+"sumMappedArraysOrNull"
+"sumMappedArraysResample"
+"sumMappedArraysSimpleState"
+"sumMappedArraysState"
+"sumMerge"
+"sumNull"
+"sumOrDefault"
+"sumOrNull"
+"sumResample"
+"sumSimpleState"
+"sumState"
+"sumWithOverflow"
+"sumWithOverflowArgMax"
+"sumWithOverflowArgMin"
+"sumWithOverflowArray"
+"sumWithOverflowDistinct"
+"sumWithOverflowForEach"
+"sumWithOverflowIf"
+"sumWithOverflowMap"
+"sumWithOverflowMerge"
+"sumWithOverflowNull"
+"sumWithOverflowOrDefault"
+"sumWithOverflowOrNull"
+"sumWithOverflowResample"
+"sumWithOverflowSimpleState"
+"sumWithOverflowState"
+"svg"
+"synonyms"
+"tan"
+"tanh"
+"tcpPort"
+"tgamma"
+"theilsU"
+"theilsUArgMax"
+"theilsUArgMin"
+"theilsUArray"
+"theilsUDistinct"
+"theilsUForEach"
+"theilsUIf"
+"theilsUMap"
+"theilsUMerge"
+"theilsUNull"
+"theilsUOrDefault"
+"theilsUOrNull"
+"theilsUResample"
+"theilsUSimpleState"
+"theilsUState"
+"throwIf"
+"tid"
+"timeDiff"
+"timeSlot"
+"timeSlots"
+"timeZone"
+"timeZoneOf"
+"timeZoneOffset"
+"timestamp"
+"timestampDiff"
+"timestamp_diff"
+"timezone"
+"timezoneOf"
+"timezoneOffset"
+"toBool"
+"toColumnTypeName"
+"toDate"
+"toDate32"
+"toDate32OrDefault"
+"toDate32OrNull"
+"toDate32OrZero"
+"toDateOrDefault"
+"toDateOrNull"
+"toDateOrZero"
+"toDateTime"
+"toDateTime32"
+"toDateTime64"
+"toDateTime64OrDefault"
+"toDateTime64OrNull"
+"toDateTime64OrZero"
+"toDateTimeOrDefault"
+"toDateTimeOrNull"
+"toDateTimeOrZero"
+"toDayOfMonth"
+"toDayOfWeek"
+"toDayOfYear"
+"toDaysSinceYearZero"
+"toDecimal128"
+"toDecimal128OrDefault"
+"toDecimal128OrNull"
+"toDecimal128OrZero"
+"toDecimal256"
+"toDecimal256OrDefault"
+"toDecimal256OrNull"
+"toDecimal256OrZero"
+"toDecimal32"
+"toDecimal32OrDefault"
+"toDecimal32OrNull"
+"toDecimal32OrZero"
+"toDecimal64"
+"toDecimal64OrDefault"
+"toDecimal64OrNull"
+"toDecimal64OrZero"
+"toDecimalString"
+"toFixedString"
+"toFloat32"
+"toFloat32OrDefault"
+"toFloat32OrNull"
+"toFloat32OrZero"
+"toFloat64"
+"toFloat64OrDefault"
+"toFloat64OrNull"
+"toFloat64OrZero"
+"toHour"
+"toIPv4"
+"toIPv4OrDefault"
+"toIPv4OrNull"
+"toIPv4OrZero"
+"toIPv6"
+"toIPv6OrDefault"
+"toIPv6OrNull"
+"toIPv6OrZero"
+"toISOWeek"
+"toISOYear"
+"toInt128"
+"toInt128OrDefault"
+"toInt128OrNull"
+"toInt128OrZero"
+"toInt16"
+"toInt16OrDefault"
+"toInt16OrNull"
+"toInt16OrZero"
+"toInt256"
+"toInt256OrDefault"
+"toInt256OrNull"
+"toInt256OrZero"
+"toInt32"
+"toInt32OrDefault"
+"toInt32OrNull"
+"toInt32OrZero"
+"toInt64"
+"toInt64OrDefault"
+"toInt64OrNull"
+"toInt64OrZero"
+"toInt8"
+"toInt8OrDefault"
+"toInt8OrNull"
+"toInt8OrZero"
+"toIntervalDay"
+"toIntervalHour"
+"toIntervalMicrosecond"
+"toIntervalMillisecond"
+"toIntervalMinute"
+"toIntervalMonth"
+"toIntervalNanosecond"
+"toIntervalQuarter"
+"toIntervalSecond"
+"toIntervalWeek"
+"toIntervalYear"
+"toJSONString"
+"toLastDayOfMonth"
+"toLastDayOfWeek"
+"toLowCardinality"
+"toMillisecond"
+"toMinute"
+"toModifiedJulianDay"
+"toModifiedJulianDayOrNull"
+"toMonday"
+"toMonth"
+"toNullable"
+"toQuarter"
+"toRelativeDayNum"
+"toRelativeHourNum"
+"toRelativeMinuteNum"
+"toRelativeMonthNum"
+"toRelativeQuarterNum"
+"toRelativeSecondNum"
+"toRelativeWeekNum"
+"toRelativeYearNum"
+"toSecond"
+"toStartOfDay"
+"toStartOfFifteenMinutes"
+"toStartOfFiveMinute"
+"toStartOfFiveMinutes"
+"toStartOfHour"
+"toStartOfISOYear"
+"toStartOfInterval"
+"toStartOfMicrosecond"
+"toStartOfMillisecond"
+"toStartOfMinute"
+"toStartOfMonth"
+"toStartOfNanosecond"
+"toStartOfQuarter"
+"toStartOfSecond"
+"toStartOfTenMinutes"
+"toStartOfWeek"
+"toStartOfYear"
+"toString"
+"toStringCutToZero"
+"toTime"
+"toTimeZone"
+"toTimezone"
+"toTypeName"
+"toUInt128"
+"toUInt128OrDefault"
+"toUInt128OrNull"
+"toUInt128OrZero"
+"toUInt16"
+"toUInt16OrDefault"
+"toUInt16OrNull"
+"toUInt16OrZero"
+"toUInt256"
+"toUInt256OrDefault"
+"toUInt256OrNull"
+"toUInt256OrZero"
+"toUInt32"
+"toUInt32OrDefault"
+"toUInt32OrNull"
+"toUInt32OrZero"
+"toUInt64"
+"toUInt64OrDefault"
+"toUInt64OrNull"
+"toUInt64OrZero"
+"toUInt8"
+"toUInt8OrDefault"
+"toUInt8OrNull"
+"toUInt8OrZero"
+"toUTCTimestamp"
+"toUUID"
+"toUUIDOrDefault"
+"toUUIDOrNull"
+"toUUIDOrZero"
+"toUnixTimestamp"
+"toUnixTimestamp64Micro"
+"toUnixTimestamp64Milli"
+"toUnixTimestamp64Nano"
+"toValidUTF8"
+"toWeek"
+"toYYYYMM"
+"toYYYYMMDD"
+"toYYYYMMDDhhmmss"
+"toYear"
+"toYearWeek"
+"to_utc_timestamp"
+"today"
+"tokens"
+"topK"
+"topKArgMax"
+"topKArgMin"
+"topKArray"
+"topKDistinct"
+"topKForEach"
+"topKIf"
+"topKMap"
+"topKMerge"
+"topKNull"
+"topKOrDefault"
+"topKOrNull"
+"topKResample"
+"topKSimpleState"
+"topKState"
+"topKWeighted"
+"topKWeightedArgMax"
+"topKWeightedArgMin"
+"topKWeightedArray"
+"topKWeightedDistinct"
+"topKWeightedForEach"
+"topKWeightedIf"
+"topKWeightedMap"
+"topKWeightedMerge"
+"topKWeightedNull"
+"topKWeightedOrDefault"
+"topKWeightedOrNull"
+"topKWeightedResample"
+"topKWeightedSimpleState"
+"topKWeightedState"
+"topLevelDomain"
+"topLevelDomainRFC"
+"transactionID"
+"transactionLatestSnapshot"
+"transactionOldestSnapshot"
+"transform"
+"translate"
+"translateUTF8"
+"trim"
+"trimBoth"
+"trimLeft"
+"trimRight"
+"trunc"
+"truncate"
+"tryBase58Decode"
+"tryBase64Decode"
+"tryBase64URLDecode"
+"tryDecrypt"
+"tryIdnaEncode"
+"tryPunycodeDecode"
+"tumble"
+"tumbleEnd"
+"tumbleStart"
+"tuple"
+"tupleConcat"
+"tupleDivide"
+"tupleDivideByNumber"
+"tupleElement"
+"tupleHammingDistance"
+"tupleIntDiv"
+"tupleIntDivByNumber"
+"tupleIntDivOrZero"
+"tupleIntDivOrZeroByNumber"
+"tupleMinus"
+"tupleModulo"
+"tupleModuloByNumber"
+"tupleMultiply"
+"tupleMultiplyByNumber"
+"tupleNames"
+"tupleNegate"
+"tuplePlus"
+"tupleToNameValuePairs"
+"ucase"
+"unbin"
+"unhex"
+"uniq"
+"uniqArgMax"
+"uniqArgMin"
+"uniqArray"
+"uniqCombined"
+"uniqCombined64"
+"uniqCombined64ArgMax"
+"uniqCombined64ArgMin"
+"uniqCombined64Array"
+"uniqCombined64Distinct"
+"uniqCombined64ForEach"
+"uniqCombined64If"
+"uniqCombined64Map"
+"uniqCombined64Merge"
+"uniqCombined64Null"
+"uniqCombined64OrDefault"
+"uniqCombined64OrNull"
+"uniqCombined64Resample"
+"uniqCombined64SimpleState"
+"uniqCombined64State"
+"uniqCombinedArgMax"
+"uniqCombinedArgMin"
+"uniqCombinedArray"
+"uniqCombinedDistinct"
+"uniqCombinedForEach"
+"uniqCombinedIf"
+"uniqCombinedMap"
+"uniqCombinedMerge"
+"uniqCombinedNull"
+"uniqCombinedOrDefault"
+"uniqCombinedOrNull"
+"uniqCombinedResample"
+"uniqCombinedSimpleState"
+"uniqCombinedState"
+"uniqDistinct"
+"uniqExact"
+"uniqExactArgMax"
+"uniqExactArgMin"
+"uniqExactArray"
+"uniqExactDistinct"
+"uniqExactForEach"
+"uniqExactIf"
+"uniqExactMap"
+"uniqExactMerge"
+"uniqExactNull"
+"uniqExactOrDefault"
+"uniqExactOrNull"
+"uniqExactResample"
+"uniqExactSimpleState"
+"uniqExactState"
+"uniqForEach"
+"uniqHLL12"
+"uniqHLL12ArgMax"
+"uniqHLL12ArgMin"
+"uniqHLL12Array"
+"uniqHLL12Distinct"
+"uniqHLL12ForEach"
+"uniqHLL12If"
+"uniqHLL12Map"
+"uniqHLL12Merge"
+"uniqHLL12Null"
+"uniqHLL12OrDefault"
+"uniqHLL12OrNull"
+"uniqHLL12Resample"
+"uniqHLL12SimpleState"
+"uniqHLL12State"
+"uniqIf"
+"uniqMap"
+"uniqMerge"
+"uniqNull"
+"uniqOrDefault"
+"uniqOrNull"
+"uniqResample"
+"uniqSimpleState"
+"uniqState"
+"uniqTheta"
+"uniqThetaArgMax"
+"uniqThetaArgMin"
+"uniqThetaArray"
+"uniqThetaDistinct"
+"uniqThetaForEach"
+"uniqThetaIf"
+"uniqThetaIntersect"
+"uniqThetaMap"
+"uniqThetaMerge"
+"uniqThetaNot"
+"uniqThetaNull"
+"uniqThetaOrDefault"
+"uniqThetaOrNull"
+"uniqThetaResample"
+"uniqThetaSimpleState"
+"uniqThetaState"
+"uniqThetaUnion"
+"uniqUpTo"
+"uniqUpToArgMax"
+"uniqUpToArgMin"
+"uniqUpToArray"
+"uniqUpToDistinct"
+"uniqUpToForEach"
+"uniqUpToIf"
+"uniqUpToMap"
+"uniqUpToMerge"
+"uniqUpToNull"
+"uniqUpToOrDefault"
+"uniqUpToOrNull"
+"uniqUpToResample"
+"uniqUpToSimpleState"
+"uniqUpToState"
+"upper"
+"upperUTF8"
+"uptime"
+"user"
+"validateNestedArraySizes"
+"varPop"
+"varPopArgMax"
+"varPopArgMin"
+"varPopArray"
+"varPopDistinct"
+"varPopForEach"
+"varPopIf"
+"varPopMap"
+"varPopMerge"
+"varPopNull"
+"varPopOrDefault"
+"varPopOrNull"
+"varPopResample"
+"varPopSimpleState"
+"varPopStable"
+"varPopStableArgMax"
+"varPopStableArgMin"
+"varPopStableArray"
+"varPopStableDistinct"
+"varPopStableForEach"
+"varPopStableIf"
+"varPopStableMap"
+"varPopStableMerge"
+"varPopStableNull"
+"varPopStableOrDefault"
+"varPopStableOrNull"
+"varPopStableResample"
+"varPopStableSimpleState"
+"varPopStableState"
+"varPopState"
+"varSamp"
+"varSampArgMax"
+"varSampArgMin"
+"varSampArray"
+"varSampDistinct"
+"varSampForEach"
+"varSampIf"
+"varSampMap"
+"varSampMerge"
+"varSampNull"
+"varSampOrDefault"
+"varSampOrNull"
+"varSampResample"
+"varSampSimpleState"
+"varSampStable"
+"varSampStableArgMax"
+"varSampStableArgMin"
+"varSampStableArray"
+"varSampStableDistinct"
+"varSampStableForEach"
+"varSampStableIf"
+"varSampStableMap"
+"varSampStableMerge"
+"varSampStableNull"
+"varSampStableOrDefault"
+"varSampStableOrNull"
+"varSampStableResample"
+"varSampStableSimpleState"
+"varSampStableState"
+"varSampState"
+"variantElement"
+"variantType"
+"vectorDifference"
+"vectorSum"
+"version"
+"visibleWidth"
+"visitParamExtractBool"
+"visitParamExtractFloat"
+"visitParamExtractInt"
+"visitParamExtractRaw"
+"visitParamExtractString"
+"visitParamExtractUInt"
+"visitParamHas"
+"week"
+"welchTTest"
+"welchTTestArgMax"
+"welchTTestArgMin"
+"welchTTestArray"
+"welchTTestDistinct"
+"welchTTestForEach"
+"welchTTestIf"
+"welchTTestMap"
+"welchTTestMerge"
+"welchTTestNull"
+"welchTTestOrDefault"
+"welchTTestOrNull"
+"welchTTestResample"
+"welchTTestSimpleState"
+"welchTTestState"
+"widthBucket"
+"width_bucket"
+"windowFunnel"
+"windowFunnelArgMax"
+"windowFunnelArgMin"
+"windowFunnelArray"
+"windowFunnelDistinct"
+"windowFunnelForEach"
+"windowFunnelIf"
+"windowFunnelMap"
+"windowFunnelMerge"
+"windowFunnelNull"
+"windowFunnelOrDefault"
+"windowFunnelOrNull"
+"windowFunnelResample"
+"windowFunnelSimpleState"
+"windowFunnelState"
+"windowID"
+"wkt"
+"wordShingleMinHash"
+"wordShingleMinHashArg"
+"wordShingleMinHashArgCaseInsensitive"
+"wordShingleMinHashArgCaseInsensitiveUTF8"
+"wordShingleMinHashArgUTF8"
+"wordShingleMinHashCaseInsensitive"
+"wordShingleMinHashCaseInsensitiveUTF8"
+"wordShingleMinHashUTF8"
+"wordShingleSimHash"
+"wordShingleSimHashCaseInsensitive"
+"wordShingleSimHashCaseInsensitiveUTF8"
+"wordShingleSimHashUTF8"
+"wyHash64"
+"xor"
+"xxHash32"
+"xxHash64"
+"xxh3"
+"yandexConsistentHash"
+"yearweek"
+"yesterday"
+"zookeeperSessionUptime"
diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict
index 6f2a88c22fa..e562595fb67 100644
--- a/tests/fuzz/dictionaries/functions.dict
+++ b/tests/fuzz/dictionaries/functions.dict
@@ -126,6 +126,7 @@
 "JSONHas"
 "JSONKey"
 "JSONLength"
+"JSONMergePatch"
 "JSONType"
 "JSON_ARRAY_LENGTH"
 "JSON_EXISTS"
@@ -227,6 +228,8 @@
 "UTC_timestamp"
 "UUIDNumToString"
 "UUIDStringToNum"
+"UUIDToNum"
+"UUIDv7ToDateTime"
 "VAR_POP"
 "VAR_POPArgMax"
 "VAR_POPArgMin"
@@ -263,6 +266,7 @@
 "YYYYMMDDhhmmssToDateTime"
 "YYYYMMDDhhmmssToDateTime64"
 "_CAST"
+"__actionName"
 "__bitBoolMaskAnd"
 "__bitBoolMaskOr"
 "__bitSwapLastTwo"
@@ -660,6 +664,8 @@
 "base58Encode"
 "base64Decode"
 "base64Encode"
+"base64URLDecode"
+"base64URLEncode"
 "basename"
 "bin"
 "bitAnd"
@@ -744,8 +750,15 @@
 "cbrt"
 "ceil"
 "ceiling"
+"changeDay"
+"changeHour"
+"changeMinute"
+"changeMonth"
+"changeSecond"
+"changeYear"
 "char"
 "cityHash64"
+"clamp"
 "coalesce"
 "concat"
 "concatAssumeInjective"
@@ -970,6 +983,7 @@
 "current_date"
 "current_schemas"
 "current_timestamp"
+"current_user"
 "cutFragment"
 "cutIPv6"
 "cutQueryString"
@@ -988,7 +1002,9 @@
 "dateDiff"
 "dateName"
 "dateTime64ToSnowflake"
+"dateTime64ToSnowflakeID"
 "dateTimeToSnowflake"
+"dateTimeToSnowflakeID"
 "dateTrunc"
 "date_diff"
 "decodeHTMLComponent"
@@ -1032,6 +1048,21 @@
 "deltaSumTimestampSimpleState"
 "deltaSumTimestampState"
 "demangle"
+"denseRank"
+"denseRankArgMax"
+"denseRankArgMin"
+"denseRankArray"
+"denseRankDistinct"
+"denseRankForEach"
+"denseRankIf"
+"denseRankMap"
+"denseRankMerge"
+"denseRankNull"
+"denseRankOrDefault"
+"denseRankOrNull"
+"denseRankResample"
+"denseRankSimpleState"
+"denseRankState"
 "dense_rank"
 "dense_rankArgMax"
 "dense_rankArgMin"
@@ -1108,8 +1139,11 @@
 "domainWithoutWWWRFC"
 "dotProduct"
 "dumpColumnStructure"
+"dynamicElement"
+"dynamicType"
 "e"
 "editDistance"
+"editDistanceUTF8"
 "empty"
 "emptyArrayDate"
 "emptyArrayDateTime"
@@ -1334,14 +1368,17 @@
 "gccMurmurHash"
 "gcd"
 "generateRandomStructure"
+"generateSnowflakeID"
 "generateULID"
 "generateUUIDv4"
+"generateUUIDv7"
 "geoDistance"
 "geoToH3"
 "geoToS2"
 "geohashDecode"
 "geohashEncode"
 "geohashesInBox"
+"getClientHTTPHeader"
 "getMacro"
 "getOSKernelVersion"
 "getServerPort"
@@ -1589,6 +1626,20 @@
 "groupBitmapXorSimpleState"
 "groupBitmapXorState"
 "groupConcat"
+"groupConcatArgMax"
+"groupConcatArgMin"
+"groupConcatArray"
+"groupConcatDistinct"
+"groupConcatForEach"
+"groupConcatIf"
+"groupConcatMap"
+"groupConcatMerge"
+"groupConcatNull"
+"groupConcatOrDefault"
+"groupConcatOrNull"
+"groupConcatResample"
+"groupConcatSimpleState"
+"groupConcatState"
 "groupUniqArray"
 "groupUniqArrayArgMax"
 "groupUniqArrayArgMin"
@@ -1604,6 +1655,21 @@
 "groupUniqArrayResample"
 "groupUniqArraySimpleState"
 "groupUniqArrayState"
+"group_concat"
+"group_concatArgMax"
+"group_concatArgMin"
+"group_concatArray"
+"group_concatDistinct"
+"group_concatForEach"
+"group_concatIf"
+"group_concatMap"
+"group_concatMerge"
+"group_concatNull"
+"group_concatOrDefault"
+"group_concatOrNull"
+"group_concatResample"
+"group_concatSimpleState"
+"group_concatState"
 "h3CellAreaM2"
 "h3CellAreaRads2"
 "h3Distance"
@@ -1660,6 +1726,8 @@
 "hasTokenCaseInsensitiveOrNull"
 "hasTokenOrNull"
 "hex"
+"hilbertDecode"
+"hilbertEncode"
 "histogram"
 "histogramArgMax"
 "histogramArgMin"
@@ -1881,6 +1949,7 @@
 "less"
 "lessOrEquals"
 "levenshteinDistance"
+"levenshteinDistanceUTF8"
 "lgamma"
 "like"
 "ln"
@@ -2498,10 +2567,44 @@
 "parseDateTimeInJodaSyntaxOrZero"
 "parseDateTimeOrNull"
 "parseDateTimeOrZero"
+"parseReadableSize"
+"parseReadableSizeOrNull"
+"parseReadableSizeOrZero"
 "parseTimeDelta"
+"partitionID"
 "partitionId"
 "path"
 "pathFull"
+"percentRank"
+"percentRankArgMax"
+"percentRankArgMin"
+"percentRankArray"
+"percentRankDistinct"
+"percentRankForEach"
+"percentRankIf"
+"percentRankMap"
+"percentRankMerge"
+"percentRankNull"
+"percentRankOrDefault"
+"percentRankOrNull"
+"percentRankResample"
+"percentRankSimpleState"
+"percentRankState"
+"percent_rank"
+"percent_rankArgMax"
+"percent_rankArgMin"
+"percent_rankArray"
+"percent_rankDistinct"
+"percent_rankForEach"
+"percent_rankIf"
+"percent_rankMap"
+"percent_rankMerge"
+"percent_rankNull"
+"percent_rankOrDefault"
+"percent_rankOrNull"
+"percent_rankResample"
+"percent_rankSimpleState"
+"percent_rankState"
 "pi"
 "plus"
 "pmod"
@@ -2533,6 +2636,7 @@
 "positive_modulo"
 "pow"
 "power"
+"printf"
 "proportionsZTest"
 "protocol"
 "punycodeDecode"
@@ -3103,6 +3207,8 @@
 "rankResample"
 "rankSimpleState"
 "rankState"
+"readWKTLineString"
+"readWKTMultiLineString"
 "readWKTMultiPolygon"
 "readWKTPoint"
 "readWKTPolygon"
@@ -3340,6 +3446,8 @@
 "skewSampState"
 "sleep"
 "sleepEachRow"
+"snowflakeIDToDateTime"
+"snowflakeIDToDateTime64"
 "snowflakeToDateTime"
 "snowflakeToDateTime64"
 "soundex"
@@ -3902,6 +4010,7 @@
 "truncate"
 "tryBase58Decode"
 "tryBase64Decode"
+"tryBase64URLDecode"
 "tryDecrypt"
 "tryIdnaEncode"
 "tryPunycodeDecode"
@@ -3923,6 +4032,7 @@
 "tupleModuloByNumber"
 "tupleMultiply"
 "tupleMultiplyByNumber"
+"tupleNames"
 "tupleNegate"
 "tuplePlus"
 "tupleToNameValuePairs"
diff --git a/tests/fuzz/dictionaries/key_words.dict b/tests/fuzz/dictionaries/keywords.dict
similarity index 95%
rename from tests/fuzz/dictionaries/key_words.dict
rename to tests/fuzz/dictionaries/keywords.dict
index db517a2382c..abaaf9e53b5 100644
--- a/tests/fuzz/dictionaries/key_words.dict
+++ b/tests/fuzz/dictionaries/keywords.dict
@@ -3,7 +3,7 @@
 "ADD CONSTRAINT"
 "ADD INDEX"
 "ADD PROJECTION"
-"ADD STATISTIC"
+"ADD STATISTICS"
 "ADMIN OPTION FOR"
 "AFTER"
 "ALGORITHM"
@@ -76,7 +76,7 @@
 "CLEAR COLUMN"
 "CLEAR INDEX"
 "CLEAR PROJECTION"
-"CLEAR STATISTIC"
+"CLEAR STATISTICS"
 "CLUSTER"
 "CLUSTERS"
 "CN"
@@ -110,6 +110,8 @@
 "CURRENTUSER"
 "CURRENT_USER"
 "D"
+"DATA"
+"DATA INNER UUID"
 "DATABASE"
 "DATABASES"
 "DATE"
@@ -147,7 +149,7 @@
 "DROP PART"
 "DROP PARTITION"
 "DROP PROJECTION"
-"DROP STATISTIC"
+"DROP STATISTICS"
 "DROP TABLE"
 "DROP TEMPORARY TABLE"
 "ELSE"
@@ -247,6 +249,7 @@
 "IS NULL"
 "IS_OBJECT_ID"
 "JOIN"
+"JWT"
 "KERBEROS"
 "KEY"
 "KEY BY"
@@ -277,13 +280,15 @@
 "MATERIALIZE COLUMN"
 "MATERIALIZE INDEX"
 "MATERIALIZE PROJECTION"
-"MATERIALIZE STATISTIC"
+"MATERIALIZE STATISTICS"
 "MATERIALIZE TTL"
 "MATERIALIZED"
 "MAX"
 "MCS"
 "MEMORY"
 "MERGES"
+"METRICS"
+"METRICS INNER UUID"
 "MI"
 "MICROSECOND"
 "MICROSECONDS"
@@ -297,12 +302,14 @@
 "MODIFY"
 "MODIFY COLUMN"
 "MODIFY COMMENT"
+"MODIFY DEFINER"
 "MODIFY ORDER BY"
 "MODIFY QUERY"
 "MODIFY REFRESH"
 "MODIFY SAMPLE BY"
 "MODIFY SETTING"
 "MODIFY SQL SECURITY"
+"MODIFY STATISTICS"
 "MODIFY TTL"
 "MONTH"
 "MONTHS"
@@ -373,6 +380,7 @@
 "Protobuf"
 "Q"
 "QQ"
+"QUALIFY"
 "QUARTER"
 "QUARTERS"
 "QUERY"
@@ -384,6 +392,7 @@
 "READONLY"
 "REALM"
 "RECOMPRESS"
+"RECURSIVE"
 "REFERENCES"
 "REFRESH"
 "REGEXP"
@@ -415,6 +424,7 @@
 "SALT"
 "SAMPLE"
 "SAMPLE BY"
+"SAN"
 "SCHEME"
 "SECOND"
 "SECONDS"
@@ -460,7 +470,8 @@
 "SS"
 "SSH_KEY"
 "SSL_CERTIFICATE"
-"STATISTIC"
+"START TRANSACTION"
+"STATISTICS"
 "STEP"
 "STORAGE"
 "STRICT"
@@ -475,6 +486,8 @@
 "TABLE"
 "TABLE OVERRIDE"
 "TABLES"
+"TAGS"
+"TAGS INNER UUID"
 "TEMPORARY"
 "TEMPORARY TABLE"
 "TEST"
@@ -529,6 +542,7 @@
 "WITH NAME"
 "WITH REPLACE OPTION"
 "WITH TIES"
+"WITH_ITEMINDEX"
 "WK"
 "WRITABLE"
 "WW"
@@ -540,4 +554,3 @@
 "bagexpansion"
 "base_backup"
 "cluster_host_ids"
-"with_itemindex"

From b5f7875f574b640dc00d42dea5e721058e9a10fc Mon Sep 17 00:00:00 2001
From: divanik <dan.ivanik@clickhouse.com>
Date: Tue, 13 Aug 2024 14:04:04 +0000
Subject: [PATCH 248/265] Remove odd new line

---
 tests/queries/0_stateless/00652_mergetree_mutations.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/00652_mergetree_mutations.sh b/tests/queries/0_stateless/00652_mergetree_mutations.sh
index 3b0966dd2c3..6be0ebf882f 100755
--- a/tests/queries/0_stateless/00652_mergetree_mutations.sh
+++ b/tests/queries/0_stateless/00652_mergetree_mutations.sh
@@ -89,4 +89,3 @@ done
 ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, is_done FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' and table = 'mutations_cleaner' ORDER BY mutation_id"
 
 ${CLICKHOUSE_CLIENT} --query="DROP TABLE mutations_cleaner"
-

From 79e055783931bd544897ec23eb07b7c7c9b09a69 Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Tue, 13 Aug 2024 14:18:48 +0000
Subject: [PATCH 249/265] Write a script to automatically update the dict

---
 tests/fuzz/README.md                   |   23 -
 tests/fuzz/all.dict                    | 2377 +++++++------
 tests/fuzz/dictionaries/datatypes.dict | 4418 +-----------------------
 tests/fuzz/dictionaries/functions.dict | 4283 -----------------------
 tests/fuzz/update_dict.sh              |   20 +
 5 files changed, 1389 insertions(+), 9732 deletions(-)
 delete mode 100644 tests/fuzz/README.md
 create mode 100755 tests/fuzz/update_dict.sh

diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md
deleted file mode 100644
index 576ad66ed93..00000000000
--- a/tests/fuzz/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-The list of functions generated via the following query
-
-```
-    clickhouse client -q "SELECT * FROM (SELECT DISTINCT concat('\"', name, '\"') as res FROM system.functions ORDER BY name UNION ALL SELECT concat('\"', a.name, b.name, '\"') as res FROM system.functions as a CROSS JOIN system.aggregate_function_combinators as b WHERE a.is_aggregate = 1) ORDER BY res" > functions.dict
-```
-
-The list of datatypes generated via the following query:
-
-```
-    clickhouse client -q "SELECT DISTINCT concat('\"', name, '\"') as res FROM system.data_type_families ORDER BY name" > datatypes.dict
-```
-
-The list of keywords generated via the following query:
-
-```
-    clickhouse client -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > keywords.dict
-```
-
-Then merge all dictionaries into one (all.dict)
-
-```
-    cat ./dictionaries/* | LC_ALL=C sort | uniq > all.dict
-```
\ No newline at end of file
diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict
index 1c3c657d6b0..30af3746fca 100644
--- a/tests/fuzz/all.dict
+++ b/tests/fuzz/all.dict
@@ -1,15 +1,971 @@
-"abs"
-"accurateCast"
-"accurateCastOrDefault"
-"accurateCastOrNull"
-"acos"
-"acosh"
 "ADD COLUMN"
 "ADD CONSTRAINT"
 "ADD INDEX"
 "ADD PROJECTION"
 "ADD STATISTICS"
 "ADD"
+"ADMIN OPTION FOR"
+"AFTER"
+"ALGORITHM"
+"ALIAS"
+"ALL"
+"ALLOWED_LATENESS"
+"ALTER COLUMN"
+"ALTER DATABASE"
+"ALTER LIVE VIEW"
+"ALTER POLICY"
+"ALTER PROFILE"
+"ALTER QUOTA"
+"ALTER ROLE"
+"ALTER ROW POLICY"
+"ALTER SETTINGS PROFILE"
+"ALTER TABLE"
+"ALTER TEMPORARY TABLE"
+"ALTER USER"
+"ALTER"
+"AND STDOUT"
+"AND"
+"ANTI"
+"ANY"
+"APPEND"
+"APPLY DELETED MASK"
+"APPLY"
+"ARRAY JOIN"
+"AS"
+"ASC"
+"ASCENDING"
+"ASOF"
+"ASSUME"
+"AST"
+"ASYNC"
+"ATTACH PART"
+"ATTACH PARTITION"
+"ATTACH POLICY"
+"ATTACH PROFILE"
+"ATTACH QUOTA"
+"ATTACH ROLE"
+"ATTACH ROW POLICY"
+"ATTACH SETTINGS PROFILE"
+"ATTACH USER"
+"ATTACH"
+"AUTO_INCREMENT"
+"AZURE"
+"AggregateFunction"
+"Array"
+"BACKUP"
+"BCRYPT_HASH"
+"BCRYPT_PASSWORD"
+"BEGIN TRANSACTION"
+"BETWEEN"
+"BIDIRECTIONAL"
+"BIGINT SIGNED"
+"BIGINT UNSIGNED"
+"BIGINT"
+"BINARY LARGE OBJECT"
+"BINARY VARYING"
+"BINARY"
+"BIT"
+"BIT_AND"
+"BIT_ANDArgMax"
+"BIT_ANDArgMin"
+"BIT_ANDArray"
+"BIT_ANDDistinct"
+"BIT_ANDForEach"
+"BIT_ANDIf"
+"BIT_ANDMap"
+"BIT_ANDMerge"
+"BIT_ANDNull"
+"BIT_ANDOrDefault"
+"BIT_ANDOrNull"
+"BIT_ANDResample"
+"BIT_ANDSimpleState"
+"BIT_ANDState"
+"BIT_OR"
+"BIT_ORArgMax"
+"BIT_ORArgMin"
+"BIT_ORArray"
+"BIT_ORDistinct"
+"BIT_ORForEach"
+"BIT_ORIf"
+"BIT_ORMap"
+"BIT_ORMerge"
+"BIT_ORNull"
+"BIT_OROrDefault"
+"BIT_OROrNull"
+"BIT_ORResample"
+"BIT_ORSimpleState"
+"BIT_ORState"
+"BIT_XOR"
+"BIT_XORArgMax"
+"BIT_XORArgMin"
+"BIT_XORArray"
+"BIT_XORDistinct"
+"BIT_XORForEach"
+"BIT_XORIf"
+"BIT_XORMap"
+"BIT_XORMerge"
+"BIT_XORNull"
+"BIT_XOROrDefault"
+"BIT_XOROrNull"
+"BIT_XORResample"
+"BIT_XORSimpleState"
+"BIT_XORState"
+"BLAKE3"
+"BLOB"
+"BOTH"
+"BY"
+"BYTE"
+"BYTEA"
+"Bool"
+"CASCADE"
+"CASE"
+"CAST"
+"CHANGE"
+"CHANGEABLE_IN_READONLY"
+"CHANGED"
+"CHAR LARGE OBJECT"
+"CHAR VARYING"
+"CHAR"
+"CHARACTER LARGE OBJECT"
+"CHARACTER VARYING"
+"CHARACTER"
+"CHARACTER_LENGTH"
+"CHAR_LENGTH"
+"CHECK ALL TABLES"
+"CHECK TABLE"
+"CHECK"
+"CLEANUP"
+"CLEAR COLUMN"
+"CLEAR INDEX"
+"CLEAR PROJECTION"
+"CLEAR STATISTICS"
+"CLOB"
+"CLUSTER"
+"CLUSTERS"
+"CN"
+"CODEC"
+"COLLATE"
+"COLUMN"
+"COLUMNS"
+"COMMENT COLUMN"
+"COMMENT"
+"COMMIT"
+"COMPRESSION"
+"CONST"
+"CONSTRAINT"
+"COVAR_POP"
+"COVAR_POPArgMax"
+"COVAR_POPArgMin"
+"COVAR_POPArray"
+"COVAR_POPDistinct"
+"COVAR_POPForEach"
+"COVAR_POPIf"
+"COVAR_POPMap"
+"COVAR_POPMerge"
+"COVAR_POPNull"
+"COVAR_POPOrDefault"
+"COVAR_POPOrNull"
+"COVAR_POPResample"
+"COVAR_POPSimpleState"
+"COVAR_POPState"
+"COVAR_SAMP"
+"COVAR_SAMPArgMax"
+"COVAR_SAMPArgMin"
+"COVAR_SAMPArray"
+"COVAR_SAMPDistinct"
+"COVAR_SAMPForEach"
+"COVAR_SAMPIf"
+"COVAR_SAMPMap"
+"COVAR_SAMPMerge"
+"COVAR_SAMPNull"
+"COVAR_SAMPOrDefault"
+"COVAR_SAMPOrNull"
+"COVAR_SAMPResample"
+"COVAR_SAMPSimpleState"
+"COVAR_SAMPState"
+"CRC32"
+"CRC32IEEE"
+"CRC64"
+"CREATE POLICY"
+"CREATE PROFILE"
+"CREATE QUOTA"
+"CREATE ROLE"
+"CREATE ROW POLICY"
+"CREATE SETTINGS PROFILE"
+"CREATE TABLE"
+"CREATE TEMPORARY TABLE"
+"CREATE USER"
+"CREATE"
+"CROSS"
+"CUBE"
+"CURRENT GRANTS"
+"CURRENT QUOTA"
+"CURRENT ROLES"
+"CURRENT ROW"
+"CURRENT TRANSACTION"
+"CURRENTUSER"
+"CURRENT_USER"
+"D"
+"DATA INNER UUID"
+"DATA"
+"DATABASE"
+"DATABASES"
+"DATE"
+"DATEADD"
+"DATEDIFF"
+"DATESUB"
+"DATE_ADD"
+"DATE_DIFF"
+"DATE_FORMAT"
+"DATE_SUB"
+"DATE_TRUNC"
+"DAY"
+"DAYOFMONTH"
+"DAYOFWEEK"
+"DAYOFYEAR"
+"DAYS"
+"DD"
+"DEC"
+"DEDUPLICATE"
+"DEFAULT DATABASE"
+"DEFAULT ROLE"
+"DEFAULT"
+"DEFINER"
+"DELETE WHERE"
+"DELETE"
+"DEPENDS ON"
+"DESC"
+"DESCENDING"
+"DESCRIBE"
+"DETACH PART"
+"DETACH PARTITION"
+"DETACH"
+"DICTIONARIES"
+"DICTIONARY"
+"DISK"
+"DISTINCT ON"
+"DISTINCT"
+"DIV"
+"DOUBLE PRECISION"
+"DOUBLE"
+"DOUBLE_SHA1_HASH"
+"DOUBLE_SHA1_PASSWORD"
+"DROP COLUMN"
+"DROP CONSTRAINT"
+"DROP DEFAULT"
+"DROP DETACHED PART"
+"DROP DETACHED PARTITION"
+"DROP INDEX"
+"DROP PART"
+"DROP PARTITION"
+"DROP PROJECTION"
+"DROP STATISTICS"
+"DROP TABLE"
+"DROP TEMPORARY TABLE"
+"DROP"
+"Date"
+"Date32"
+"DateTime"
+"DateTime32"
+"DateTime64"
+"Decimal"
+"Decimal128"
+"Decimal256"
+"Decimal32"
+"Decimal64"
+"Dynamic"
+"ELSE"
+"EMPTY AS"
+"EMPTY"
+"ENABLED ROLES"
+"END"
+"ENFORCED"
+"ENGINE"
+"ENUM"
+"EPHEMERAL SEQUENTIAL"
+"EPHEMERAL"
+"ESTIMATE"
+"EVENT"
+"EVENTS"
+"EVERY"
+"EXCEPT DATABASE"
+"EXCEPT DATABASES"
+"EXCEPT TABLE"
+"EXCEPT TABLES"
+"EXCEPT"
+"EXCHANGE DICTIONARIES"
+"EXCHANGE TABLES"
+"EXISTS"
+"EXPLAIN"
+"EXPRESSION"
+"EXTENDED"
+"EXTERNAL DDL FROM"
+"EXTRACT"
+"Enum"
+"Enum16"
+"Enum8"
+"FALSE"
+"FETCH PART"
+"FETCH PARTITION"
+"FETCH"
+"FIELDS"
+"FILE"
+"FILESYSTEM CACHE"
+"FILESYSTEM CACHES"
+"FILTER"
+"FINAL"
+"FIRST"
+"FIXED"
+"FLOAT"
+"FOLLOWING"
+"FOR"
+"FOREIGN KEY"
+"FOREIGN"
+"FORGET PARTITION"
+"FORMAT"
+"FORMAT_BYTES"
+"FQDN"
+"FREEZE"
+"FROM INFILE"
+"FROM SHARD"
+"FROM"
+"FROM_BASE64"
+"FROM_DAYS"
+"FROM_UNIXTIME"
+"FULL"
+"FULLTEXT"
+"FUNCTION"
+"FixedString"
+"Float32"
+"Float64"
+"ForEach"
+"GEOMETRY"
+"GLOBAL IN"
+"GLOBAL NOT IN"
+"GLOBAL"
+"GRANT OPTION FOR"
+"GRANT"
+"GRANTEES"
+"GRANULARITY"
+"GROUP BY"
+"GROUPING SETS"
+"GROUPS"
+"H"
+"HASH"
+"HAVING"
+"HDFS"
+"HH"
+"HIERARCHICAL"
+"HOST"
+"HOUR"
+"HOURS"
+"HTTP"
+"ID"
+"IDENTIFIED"
+"IF EMPTY"
+"IF EXISTS"
+"IF NOT EXISTS"
+"IGNORE NULLS"
+"ILIKE"
+"IN PARTITION"
+"IN"
+"INDEX"
+"INDEXES"
+"INDICES"
+"INET4"
+"INET6"
+"INET6_ATON"
+"INET6_NTOA"
+"INET_ATON"
+"INET_NTOA"
+"INHERIT"
+"INJECTIVE"
+"INNER"
+"INSERT INTO"
+"INT SIGNED"
+"INT UNSIGNED"
+"INT"
+"INT1 SIGNED"
+"INT1 UNSIGNED"
+"INT1"
+"INTEGER SIGNED"
+"INTEGER UNSIGNED"
+"INTEGER"
+"INTERPOLATE"
+"INTERSECT"
+"INTERVAL"
+"INTO OUTFILE"
+"INVISIBLE"
+"INVOKER"
+"IP"
+"IPv4"
+"IPv4CIDRToRange"
+"IPv4NumToString"
+"IPv4NumToStringClassC"
+"IPv4StringToNum"
+"IPv4StringToNumOrDefault"
+"IPv4StringToNumOrNull"
+"IPv4ToIPv6"
+"IPv6"
+"IPv6CIDRToRange"
+"IPv6NumToString"
+"IPv6StringToNum"
+"IPv6StringToNumOrDefault"
+"IPv6StringToNumOrNull"
+"IS NOT DISTINCT FROM"
+"IS NOT NULL"
+"IS NULL"
+"IS_OBJECT_ID"
+"Int128"
+"Int16"
+"Int256"
+"Int32"
+"Int64"
+"Int8"
+"IntervalDay"
+"IntervalHour"
+"IntervalMicrosecond"
+"IntervalMillisecond"
+"IntervalMinute"
+"IntervalMonth"
+"IntervalNanosecond"
+"IntervalQuarter"
+"IntervalSecond"
+"IntervalWeek"
+"IntervalYear"
+"JOIN"
+"JSON"
+"JSONArrayLength"
+"JSONExtract"
+"JSONExtractArrayRaw"
+"JSONExtractBool"
+"JSONExtractFloat"
+"JSONExtractInt"
+"JSONExtractKeys"
+"JSONExtractKeysAndValues"
+"JSONExtractKeysAndValuesRaw"
+"JSONExtractRaw"
+"JSONExtractString"
+"JSONExtractUInt"
+"JSONHas"
+"JSONKey"
+"JSONLength"
+"JSONMergePatch"
+"JSONType"
+"JSON_ARRAY_LENGTH"
+"JSON_EXISTS"
+"JSON_QUERY"
+"JSON_VALUE"
+"JWT"
+"KERBEROS"
+"KEY BY"
+"KEY"
+"KEYED BY"
+"KEYS"
+"KILL"
+"KIND"
+"L1Distance"
+"L1Norm"
+"L1Normalize"
+"L2Distance"
+"L2Norm"
+"L2Normalize"
+"L2SquaredDistance"
+"L2SquaredNorm"
+"LARGE OBJECT"
+"LAST"
+"LAST_DAY"
+"LAYOUT"
+"LDAP"
+"LEADING"
+"LEFT ARRAY JOIN"
+"LEFT"
+"LESS THAN"
+"LEVEL"
+"LIFETIME"
+"LIGHTWEIGHT"
+"LIKE"
+"LIMIT"
+"LINEAR"
+"LIST"
+"LIVE"
+"LOCAL"
+"LONGBLOB"
+"LONGTEXT"
+"LTRIM"
+"LineString"
+"LinfDistance"
+"LinfNorm"
+"LinfNormalize"
+"LowCardinality"
+"LpDistance"
+"LpNorm"
+"LpNormalize"
+"M"
+"MACNumToString"
+"MACStringToNum"
+"MACStringToOUI"
+"MAP_FROM_ARRAYS"
+"MATCH"
+"MATERIALIZE COLUMN"
+"MATERIALIZE INDEX"
+"MATERIALIZE PROJECTION"
+"MATERIALIZE STATISTICS"
+"MATERIALIZE TTL"
+"MATERIALIZE"
+"MATERIALIZED"
+"MAX"
+"MCS"
+"MD4"
+"MD5"
+"MEDIUMBLOB"
+"MEDIUMINT SIGNED"
+"MEDIUMINT UNSIGNED"
+"MEDIUMINT"
+"MEDIUMTEXT"
+"MEMORY"
+"MERGES"
+"METRICS INNER UUID"
+"METRICS"
+"MI"
+"MICROSECOND"
+"MICROSECONDS"
+"MILLISECOND"
+"MILLISECONDS"
+"MIN"
+"MINUTE"
+"MINUTES"
+"MM"
+"MOD"
+"MODIFY COLUMN"
+"MODIFY COMMENT"
+"MODIFY DEFINER"
+"MODIFY ORDER BY"
+"MODIFY QUERY"
+"MODIFY REFRESH"
+"MODIFY SAMPLE BY"
+"MODIFY SETTING"
+"MODIFY SQL SECURITY"
+"MODIFY STATISTICS"
+"MODIFY TTL"
+"MODIFY"
+"MONTH"
+"MONTHS"
+"MOVE PART"
+"MOVE PARTITION"
+"MOVE"
+"MS"
+"MUTATION"
+"Map"
+"Merge"
+"MultiLineString"
+"MultiPolygon"
+"N"
+"NAME"
+"NAMED COLLECTION"
+"NANOSECOND"
+"NANOSECONDS"
+"NATIONAL CHAR VARYING"
+"NATIONAL CHAR"
+"NATIONAL CHARACTER LARGE OBJECT"
+"NATIONAL CHARACTER VARYING"
+"NATIONAL CHARACTER"
+"NCHAR LARGE OBJECT"
+"NCHAR VARYING"
+"NCHAR"
+"NEXT"
+"NO ACTION"
+"NO DELAY"
+"NO LIMITS"
+"NONE"
+"NOT BETWEEN"
+"NOT IDENTIFIED"
+"NOT ILIKE"
+"NOT IN"
+"NOT KEYED"
+"NOT LIKE"
+"NOT OVERRIDABLE"
+"NOT"
+"NO_PASSWORD"
+"NS"
+"NULL"
+"NULLS"
+"NUMERIC"
+"NVARCHAR"
+"Nested"
+"Nothing"
+"Null"
+"Nullable"
+"OCTET_LENGTH"
+"OFFSET"
+"ON DELETE"
+"ON UPDATE"
+"ON VOLUME"
+"ON"
+"ONLY"
+"OPTIMIZE TABLE"
+"OR REPLACE"
+"OR"
+"ORDER BY"
+"OUTER"
+"OVER"
+"OVERRIDABLE"
+"Object"
+"PART"
+"PARTIAL"
+"PARTITION BY"
+"PARTITION"
+"PARTITIONS"
+"PART_MOVE_TO_SHARD"
+"PASTE"
+"PERIODIC REFRESH"
+"PERMANENTLY"
+"PERMISSIVE"
+"PERSISTENT SEQUENTIAL"
+"PERSISTENT"
+"PIPELINE"
+"PLAINTEXT_PASSWORD"
+"PLAN"
+"POPULATE"
+"PRECEDING"
+"PRECISION"
+"PREWHERE"
+"PRIMARY KEY"
+"PRIMARY"
+"PROFILE"
+"PROJECTION"
+"PULL"
+"Point"
+"Polygon"
+"Protobuf"
+"Q"
+"QQ"
+"QUALIFY"
+"QUARTER"
+"QUARTERS"
+"QUERY TREE"
+"QUERY"
+"QUOTA"
+"RANDOMIZE FOR"
+"RANDOMIZED"
+"RANGE"
+"READONLY"
+"REAL"
+"REALM"
+"RECOMPRESS"
+"RECURSIVE"
+"REFERENCES"
+"REFRESH"
+"REGEXP"
+"REGEXP_EXTRACT"
+"REGEXP_MATCHES"
+"REGEXP_REPLACE"
+"REMOVE SAMPLE BY"
+"REMOVE TTL"
+"REMOVE"
+"RENAME COLUMN"
+"RENAME DATABASE"
+"RENAME DICTIONARY"
+"RENAME TABLE"
+"RENAME TO"
+"RENAME"
+"REPLACE PARTITION"
+"REPLACE"
+"RESET SETTING"
+"RESPECT NULLS"
+"RESTORE"
+"RESTRICT"
+"RESTRICTIVE"
+"RESUME"
+"REVOKE"
+"RIGHT"
+"ROLLBACK"
+"ROLLUP"
+"ROW"
+"ROWS"
+"RTRIM"
+"Resample"
+"Ring"
+"S"
+"S3"
+"SALT"
+"SAMPLE BY"
+"SAMPLE"
+"SAN"
+"SCHEMA"
+"SCHEME"
+"SECOND"
+"SECONDS"
+"SELECT"
+"SEMI"
+"SERVER"
+"SET DEFAULT ROLE"
+"SET DEFAULT"
+"SET FAKE TIME"
+"SET NULL"
+"SET ROLE DEFAULT"
+"SET ROLE"
+"SET TRANSACTION SNAPSHOT"
+"SET"
+"SETTINGS"
+"SHA1"
+"SHA224"
+"SHA256"
+"SHA256_HASH"
+"SHA256_PASSWORD"
+"SHA384"
+"SHA512"
+"SHA512_256"
+"SHOW ACCESS"
+"SHOW CREATE"
+"SHOW ENGINES"
+"SHOW FUNCTIONS"
+"SHOW GRANTS"
+"SHOW PRIVILEGES"
+"SHOW PROCESSLIST"
+"SHOW SETTING"
+"SHOW"
+"SIGNED"
+"SIMPLE"
+"SINGLE"
+"SMALLINT SIGNED"
+"SMALLINT UNSIGNED"
+"SMALLINT"
+"SOURCE"
+"SPATIAL"
+"SQL SECURITY"
+"SQL_TSI_DAY"
+"SQL_TSI_HOUR"
+"SQL_TSI_MICROSECOND"
+"SQL_TSI_MILLISECOND"
+"SQL_TSI_MINUTE"
+"SQL_TSI_MONTH"
+"SQL_TSI_NANOSECOND"
+"SQL_TSI_QUARTER"
+"SQL_TSI_SECOND"
+"SQL_TSI_WEEK"
+"SQL_TSI_YEAR"
+"SS"
+"SSH_KEY"
+"SSL_CERTIFICATE"
+"START TRANSACTION"
+"STATISTICS"
+"STD"
+"STDArgMax"
+"STDArgMin"
+"STDArray"
+"STDDEV_POP"
+"STDDEV_POPArgMax"
+"STDDEV_POPArgMin"
+"STDDEV_POPArray"
+"STDDEV_POPDistinct"
+"STDDEV_POPForEach"
+"STDDEV_POPIf"
+"STDDEV_POPMap"
+"STDDEV_POPMerge"
+"STDDEV_POPNull"
+"STDDEV_POPOrDefault"
+"STDDEV_POPOrNull"
+"STDDEV_POPResample"
+"STDDEV_POPSimpleState"
+"STDDEV_POPState"
+"STDDEV_SAMP"
+"STDDEV_SAMPArgMax"
+"STDDEV_SAMPArgMin"
+"STDDEV_SAMPArray"
+"STDDEV_SAMPDistinct"
+"STDDEV_SAMPForEach"
+"STDDEV_SAMPIf"
+"STDDEV_SAMPMap"
+"STDDEV_SAMPMerge"
+"STDDEV_SAMPNull"
+"STDDEV_SAMPOrDefault"
+"STDDEV_SAMPOrNull"
+"STDDEV_SAMPResample"
+"STDDEV_SAMPSimpleState"
+"STDDEV_SAMPState"
+"STDDistinct"
+"STDForEach"
+"STDIf"
+"STDMap"
+"STDMerge"
+"STDNull"
+"STDOrDefault"
+"STDOrNull"
+"STDResample"
+"STDSimpleState"
+"STDState"
+"STEP"
+"STORAGE"
+"STRICT"
+"STRICTLY_ASCENDING"
+"SUBPARTITION BY"
+"SUBPARTITION"
+"SUBPARTITIONS"
+"SUBSTRING"
+"SUBSTRING_INDEX"
+"SUSPEND"
+"SVG"
+"SYNC"
+"SYNTAX"
+"SYSTEM"
+"SimpleAggregateFunction"
+"State"
+"String"
+"TABLE OVERRIDE"
+"TABLE"
+"TABLES"
+"TAGS INNER UUID"
+"TAGS"
+"TEMPORARY TABLE"
+"TEMPORARY"
+"TEST"
+"TEXT"
+"THEN"
+"TIME"
+"TIMESTAMP"
+"TIMESTAMPADD"
+"TIMESTAMPDIFF"
+"TIMESTAMPSUB"
+"TIMESTAMP_ADD"
+"TIMESTAMP_DIFF"
+"TIMESTAMP_SUB"
+"TINYBLOB"
+"TINYINT SIGNED"
+"TINYINT UNSIGNED"
+"TINYINT"
+"TINYTEXT"
+"TO DISK"
+"TO INNER UUID"
+"TO SHARD"
+"TO TABLE"
+"TO VOLUME"
+"TO"
+"TOP"
+"TOTALS"
+"TO_BASE64"
+"TO_DAYS"
+"TO_UNIXTIME"
+"TRACKING ONLY"
+"TRAILING"
+"TRANSACTION"
+"TRIGGER"
+"TRIM"
+"TRUE"
+"TRUNCATE"
+"TTL"
+"TYPE"
+"TYPEOF"
+"Tuple"
+"UInt128"
+"UInt16"
+"UInt256"
+"UInt32"
+"UInt64"
+"UInt8"
+"ULIDStringToDateTime"
+"UNBOUNDED"
+"UNDROP"
+"UNFREEZE"
+"UNION"
+"UNIQUE"
+"UNSET FAKE TIME"
+"UNSIGNED"
+"UPDATE"
+"URL"
+"URLHash"
+"URLHierarchy"
+"URLPathHierarchy"
+"USE"
+"USING"
+"UTCTimestamp"
+"UTC_timestamp"
+"UUID"
+"UUIDNumToString"
+"UUIDStringToNum"
+"UUIDToNum"
+"UUIDv7ToDateTime"
+"VALID UNTIL"
+"VALUES"
+"VARBINARY"
+"VARCHAR"
+"VARCHAR2"
+"VARYING"
+"VAR_POP"
+"VAR_POPArgMax"
+"VAR_POPArgMin"
+"VAR_POPArray"
+"VAR_POPDistinct"
+"VAR_POPForEach"
+"VAR_POPIf"
+"VAR_POPMap"
+"VAR_POPMerge"
+"VAR_POPNull"
+"VAR_POPOrDefault"
+"VAR_POPOrNull"
+"VAR_POPResample"
+"VAR_POPSimpleState"
+"VAR_POPState"
+"VAR_SAMP"
+"VAR_SAMPArgMax"
+"VAR_SAMPArgMin"
+"VAR_SAMPArray"
+"VAR_SAMPDistinct"
+"VAR_SAMPForEach"
+"VAR_SAMPIf"
+"VAR_SAMPMap"
+"VAR_SAMPMerge"
+"VAR_SAMPNull"
+"VAR_SAMPOrDefault"
+"VAR_SAMPOrNull"
+"VAR_SAMPResample"
+"VAR_SAMPSimpleState"
+"VAR_SAMPState"
+"VIEW"
+"VISIBLE"
+"Variant"
+"WATCH"
+"WATERMARK"
+"WEEK"
+"WEEKS"
+"WHEN"
+"WHERE"
+"WINDOW"
+"WITH ADMIN OPTION"
+"WITH CHECK"
+"WITH FILL"
+"WITH GRANT OPTION"
+"WITH NAME"
+"WITH REPLACE OPTION"
+"WITH TIES"
+"WITH"
+"WITH_ITEMINDEX"
+"WK"
+"WRITABLE"
+"WW"
+"YEAR"
+"YEARS"
+"YY"
+"YYYY"
+"YYYYMMDDToDate"
+"YYYYMMDDToDate32"
+"YYYYMMDDhhmmssToDateTime"
+"YYYYMMDDhhmmssToDateTime64"
+"ZKPATH"
+"_CAST"
+"__actionName"
+"__bitBoolMaskAnd"
+"__bitBoolMaskOr"
+"__bitSwapLastTwo"
+"__bitWrapperFunc"
+"__getScalar"
+"__scalarSubqueryResult"
+"abs"
+"accurateCast"
+"accurateCastOrDefault"
+"accurateCastOrNull"
+"acos"
+"acosh"
 "addDate"
 "addDays"
 "addHours"
@@ -20,19 +976,16 @@
 "addMonths"
 "addNanoseconds"
 "addQuarters"
-"addressToLine"
-"addressToLineWithInlines"
-"addressToSymbol"
 "addSeconds"
 "addTupleOfIntervals"
 "addWeeks"
 "addYears"
-"ADMIN OPTION FOR"
+"addressToLine"
+"addressToLineWithInlines"
+"addressToSymbol"
 "aes_decrypt_mysql"
 "aes_encrypt_mysql"
-"AFTER"
 "age"
-"AggregateFunction"
 "aggThrow"
 "aggThrowArgMax"
 "aggThrowArgMin"
@@ -48,24 +1001,7 @@
 "aggThrowResample"
 "aggThrowSimpleState"
 "aggThrowState"
-"ALGORITHM"
-"ALIAS"
-"ALL"
-"ALLOWED_LATENESS"
 "alphaTokens"
-"ALTER COLUMN"
-"ALTER DATABASE"
-"ALTER LIVE VIEW"
-"ALTER POLICY"
-"ALTER PROFILE"
-"ALTER QUOTA"
-"ALTER ROLE"
-"ALTER ROW POLICY"
-"ALTER SETTINGS PROFILE"
-"ALTER TABLE"
-"ALTER TEMPORARY TABLE"
-"ALTER USER"
-"ALTER"
 "analysisOfVariance"
 "analysisOfVarianceArgMax"
 "analysisOfVarianceArgMin"
@@ -81,8 +1017,6 @@
 "analysisOfVarianceResample"
 "analysisOfVarianceSimpleState"
 "analysisOfVarianceState"
-"AND STDOUT"
-"AND"
 "and"
 "anova"
 "anovaArgMax"
@@ -99,8 +1033,6 @@
 "anovaResample"
 "anovaSimpleState"
 "anovaState"
-"ANTI"
-"ANY"
 "any"
 "anyArgMax"
 "anyArgMin"
@@ -206,10 +1138,7 @@
 "any_value_respect_nullsResample"
 "any_value_respect_nullsSimpleState"
 "any_value_respect_nullsState"
-"APPEND"
 "appendTrailingCharIfAbsent"
-"APPLY DELETED MASK"
-"APPLY"
 "approx_top_count"
 "approx_top_countArgMax"
 "approx_top_countArgMin"
@@ -285,11 +1214,9 @@
 "argMinResample"
 "argMinSimpleState"
 "argMinState"
-"ARRAY JOIN"
-"Array"
 "array"
-"arrayAll"
 "arrayAUC"
+"arrayAll"
 "arrayAvg"
 "arrayCompact"
 "arrayConcat"
@@ -382,31 +1309,13 @@
 "array_concat_aggResample"
 "array_concat_aggSimpleState"
 "array_concat_aggState"
-"AS"
-"ASC"
-"ASCENDING"
 "ascii"
 "asin"
 "asinh"
-"ASOF"
-"ASSUME"
 "assumeNotNull"
-"AST"
-"ASYNC"
 "atan"
 "atan2"
 "atanh"
-"ATTACH PART"
-"ATTACH PARTITION"
-"ATTACH POLICY"
-"ATTACH PROFILE"
-"ATTACH QUOTA"
-"ATTACH ROLE"
-"ATTACH ROW POLICY"
-"ATTACH SETTINGS PROFILE"
-"ATTACH USER"
-"ATTACH"
-"AUTO_INCREMENT"
 "avg"
 "avgArgMax"
 "avgArgMin"
@@ -437,8 +1346,6 @@
 "avgWeightedResample"
 "avgWeightedSimpleState"
 "avgWeightedState"
-"AZURE"
-"BACKUP"
 "bagexpansion"
 "bar"
 "base58Decode"
@@ -447,17 +1354,24 @@
 "base64Encode"
 "base64URLDecode"
 "base64URLEncode"
-"basename"
 "base_backup"
-"BCRYPT_HASH"
-"BCRYPT_PASSWORD"
-"BEGIN TRANSACTION"
-"BETWEEN"
-"BIDIRECTIONAL"
+"basename"
 "bin"
 "bitAnd"
 "bitCount"
 "bitHammingDistance"
+"bitNot"
+"bitOr"
+"bitPositionsToArray"
+"bitRotateLeft"
+"bitRotateRight"
+"bitShiftLeft"
+"bitShiftRight"
+"bitSlice"
+"bitTest"
+"bitTestAll"
+"bitTestAny"
+"bitXor"
 "bitmapAnd"
 "bitmapAndCardinality"
 "bitmapAndnot"
@@ -479,68 +1393,11 @@
 "bitmapXorCardinality"
 "bitmaskToArray"
 "bitmaskToList"
-"bitNot"
-"bitOr"
-"bitPositionsToArray"
-"bitRotateLeft"
-"bitRotateRight"
-"bitShiftLeft"
-"bitShiftRight"
-"bitSlice"
-"bitTest"
-"bitTestAll"
-"bitTestAny"
-"bitXor"
-"BIT_AND"
-"BIT_ANDArgMax"
-"BIT_ANDArgMin"
-"BIT_ANDArray"
-"BIT_ANDDistinct"
-"BIT_ANDForEach"
-"BIT_ANDIf"
-"BIT_ANDMap"
-"BIT_ANDMerge"
-"BIT_ANDNull"
-"BIT_ANDOrDefault"
-"BIT_ANDOrNull"
-"BIT_ANDResample"
-"BIT_ANDSimpleState"
-"BIT_ANDState"
-"BIT_OR"
-"BIT_ORArgMax"
-"BIT_ORArgMin"
-"BIT_ORArray"
-"BIT_ORDistinct"
-"BIT_ORForEach"
-"BIT_ORIf"
-"BIT_ORMap"
-"BIT_ORMerge"
-"BIT_ORNull"
-"BIT_OROrDefault"
-"BIT_OROrNull"
-"BIT_ORResample"
-"BIT_ORSimpleState"
-"BIT_ORState"
-"BIT_XOR"
-"BIT_XORArgMax"
-"BIT_XORArgMin"
-"BIT_XORArray"
-"BIT_XORDistinct"
-"BIT_XORForEach"
-"BIT_XORIf"
-"BIT_XORMap"
-"BIT_XORMerge"
-"BIT_XORNull"
-"BIT_XOROrDefault"
-"BIT_XOROrNull"
-"BIT_XORResample"
-"BIT_XORSimpleState"
-"BIT_XORState"
-"BLAKE3"
 "blockNumber"
 "blockSerializedSize"
 "blockSize"
-"BOTH"
+"bool"
+"boolean"
 "boundingRatio"
 "boundingRatioArgMax"
 "boundingRatioArgMin"
@@ -557,18 +1414,14 @@
 "boundingRatioSimpleState"
 "boundingRatioState"
 "buildId"
-"BY"
 "byteHammingDistance"
 "byteSize"
 "byteSlice"
 "byteSwap"
-"CASCADE"
-"CASE"
 "caseWithExpr"
 "caseWithExpression"
 "caseWithoutExpr"
 "caseWithoutExpression"
-"CAST"
 "catboostEvaluate"
 "categoricalInformationValue"
 "categoricalInformationValueArgMax"
@@ -588,46 +1441,17 @@
 "cbrt"
 "ceil"
 "ceiling"
-"CHANGE"
-"CHANGEABLE_IN_READONLY"
-"CHANGED"
 "changeDay"
 "changeHour"
 "changeMinute"
 "changeMonth"
 "changeSecond"
 "changeYear"
-"CHAR VARYING"
-"CHAR"
 "char"
-"CHARACTER LARGE OBJECT"
-"CHARACTER VARYING"
-"CHARACTER"
-"CHARACTER_LENGTH"
-"CHAR_LENGTH"
-"CHECK ALL TABLES"
-"CHECK TABLE"
-"CHECK"
 "cityHash64"
 "clamp"
-"CLEANUP"
-"CLEAR COLUMN"
-"CLEAR INDEX"
-"CLEAR PROJECTION"
-"CLEAR STATISTICS"
-"CLUSTER"
-"CLUSTERS"
 "cluster_host_ids"
-"CN"
 "coalesce"
-"CODEC"
-"COLLATE"
-"COLUMN"
-"COLUMNS"
-"COMMENT COLUMN"
-"COMMENT"
-"COMMIT"
-"COMPRESSION"
 "concat"
 "concatAssumeInjective"
 "concatWithSeparator"
@@ -635,8 +1459,6 @@
 "concat_ws"
 "connectionId"
 "connection_id"
-"CONST"
-"CONSTRAINT"
 "contingency"
 "contingencyArgMax"
 "contingencyArgMin"
@@ -813,36 +1635,6 @@
 "covarSampStableSimpleState"
 "covarSampStableState"
 "covarSampState"
-"COVAR_POP"
-"COVAR_POPArgMax"
-"COVAR_POPArgMin"
-"COVAR_POPArray"
-"COVAR_POPDistinct"
-"COVAR_POPForEach"
-"COVAR_POPIf"
-"COVAR_POPMap"
-"COVAR_POPMerge"
-"COVAR_POPNull"
-"COVAR_POPOrDefault"
-"COVAR_POPOrNull"
-"COVAR_POPResample"
-"COVAR_POPSimpleState"
-"COVAR_POPState"
-"COVAR_SAMP"
-"COVAR_SAMPArgMax"
-"COVAR_SAMPArgMin"
-"COVAR_SAMPArray"
-"COVAR_SAMPDistinct"
-"COVAR_SAMPForEach"
-"COVAR_SAMPIf"
-"COVAR_SAMPMap"
-"COVAR_SAMPMerge"
-"COVAR_SAMPNull"
-"COVAR_SAMPOrDefault"
-"COVAR_SAMPOrNull"
-"COVAR_SAMPResample"
-"COVAR_SAMPSimpleState"
-"COVAR_SAMPState"
 "cramersV"
 "cramersVArgMax"
 "cramersVArgMin"
@@ -873,38 +1665,16 @@
 "cramersVResample"
 "cramersVSimpleState"
 "cramersVState"
-"CRC32"
-"CRC32IEEE"
-"CRC64"
-"CREATE POLICY"
-"CREATE PROFILE"
-"CREATE QUOTA"
-"CREATE ROLE"
-"CREATE ROW POLICY"
-"CREATE SETTINGS PROFILE"
-"CREATE TABLE"
-"CREATE TEMPORARY TABLE"
-"CREATE USER"
-"CREATE"
-"CROSS"
-"CUBE"
 "curdate"
-"CURRENT GRANTS"
-"CURRENT QUOTA"
-"CURRENT ROLES"
-"CURRENT ROW"
-"CURRENT TRANSACTION"
 "currentDatabase"
 "currentProfiles"
 "currentRoles"
 "currentSchemas"
-"CURRENTUSER"
 "currentUser"
 "current_database"
 "current_date"
 "current_schemas"
 "current_timestamp"
-"CURRENT_USER"
 "current_user"
 "cutFragment"
 "cutIPv6"
@@ -920,59 +1690,25 @@
 "cutToFirstSignificantSubdomainWithWWWRFC"
 "cutURLParameter"
 "cutWWW"
-"D"
 "damerauLevenshteinDistance"
-"DATA INNER UUID"
-"DATA"
-"DATABASE"
-"DATABASES"
-"DATE"
-"Date"
-"DATEADD"
-"DATEDIFF"
 "dateDiff"
 "dateName"
-"DATESUB"
-"DateTime"
-"DateTime64"
 "dateTime64ToSnowflake"
 "dateTime64ToSnowflakeID"
 "dateTimeToSnowflake"
 "dateTimeToSnowflakeID"
 "dateTrunc"
-"DATE_ADD"
-"DATE_DIFF"
 "date_diff"
-"DATE_FORMAT"
-"DATE_SUB"
-"DATE_TRUNC"
-"DAY"
-"DAYOFMONTH"
-"DAYOFWEEK"
-"DAYOFYEAR"
-"DAYS"
-"DD"
-"Decimal"
-"Decimal128"
-"Decimal32"
-"Decimal64"
 "decodeHTMLComponent"
 "decodeURLComponent"
 "decodeURLFormComponent"
 "decodeXMLComponent"
 "decrypt"
-"DEDUPLICATE"
-"DEFAULT DATABASE"
-"DEFAULT ROLE"
-"DEFAULT"
 "defaultProfiles"
 "defaultRoles"
 "defaultValueOfArgumentType"
 "defaultValueOfTypeName"
-"DEFINER"
 "degrees"
-"DELETE WHERE"
-"DELETE"
 "deltaSum"
 "deltaSumArgMax"
 "deltaSumArgMin"
@@ -1034,13 +1770,6 @@
 "dense_rankResample"
 "dense_rankSimpleState"
 "dense_rankState"
-"DEPENDS ON"
-"DESC"
-"DESCENDING"
-"DESCRIBE"
-"DETACH PART"
-"DETACH PARTITION"
-"DETACH"
 "detectCharset"
 "detectLanguage"
 "detectLanguageMixed"
@@ -1060,6 +1789,10 @@
 "dictGetFloat64"
 "dictGetFloat64OrDefault"
 "dictGetHierarchy"
+"dictGetIPv4"
+"dictGetIPv4OrDefault"
+"dictGetIPv6"
+"dictGetIPv6OrDefault"
 "dictGetInt16"
 "dictGetInt16OrDefault"
 "dictGetInt32"
@@ -1068,10 +1801,6 @@
 "dictGetInt64OrDefault"
 "dictGetInt8"
 "dictGetInt8OrDefault"
-"dictGetIPv4"
-"dictGetIPv4OrDefault"
-"dictGetIPv6"
-"dictGetIPv6OrDefault"
 "dictGetOrDefault"
 "dictGetOrNull"
 "dictGetString"
@@ -1087,19 +1816,13 @@
 "dictGetUUID"
 "dictGetUUIDOrDefault"
 "dictHas"
-"DICTIONARIES"
-"DICTIONARY"
 "dictIsIn"
-"DISK"
 "displayName"
 "distanceL1"
 "distanceL2"
 "distanceL2Squared"
 "distanceLinf"
 "distanceLp"
-"DISTINCT ON"
-"DISTINCT"
-"DIV"
 "divide"
 "divideDecimal"
 "domain"
@@ -1107,30 +1830,12 @@
 "domainWithoutWWW"
 "domainWithoutWWWRFC"
 "dotProduct"
-"DOUBLE_SHA1_HASH"
-"DOUBLE_SHA1_PASSWORD"
-"DROP COLUMN"
-"DROP CONSTRAINT"
-"DROP DEFAULT"
-"DROP DETACHED PART"
-"DROP DETACHED PARTITION"
-"DROP INDEX"
-"DROP PART"
-"DROP PARTITION"
-"DROP PROJECTION"
-"DROP STATISTICS"
-"DROP TABLE"
-"DROP TEMPORARY TABLE"
-"DROP"
 "dumpColumnStructure"
 "dynamicElement"
 "dynamicType"
 "e"
 "editDistance"
 "editDistanceUTF8"
-"ELSE"
-"EMPTY AS"
-"EMPTY"
 "empty"
 "emptyArrayDate"
 "emptyArrayDateTime"
@@ -1146,18 +1851,14 @@
 "emptyArrayUInt32"
 "emptyArrayUInt64"
 "emptyArrayUInt8"
-"ENABLED ROLES"
 "enabledProfiles"
 "enabledRoles"
 "encodeURLComponent"
 "encodeURLFormComponent"
 "encodeXMLComponent"
 "encrypt"
-"END"
 "endsWith"
 "endsWithUTF8"
-"ENFORCED"
-"ENGINE"
 "entropy"
 "entropyArgMax"
 "entropyArgMin"
@@ -1173,32 +1874,14 @@
 "entropyResample"
 "entropySimpleState"
 "entropyState"
-"Enum"
-"Enum16"
-"Enum8"
-"EPHEMERAL SEQUENTIAL"
-"EPHEMERAL"
 "equals"
 "erf"
 "erfc"
 "errorCodeToName"
-"ESTIMATE"
 "evalMLMethod"
-"EVENT"
-"EVENTS"
-"EVERY"
-"EXCEPT DATABASE"
-"EXCEPT DATABASES"
-"EXCEPT TABLE"
-"EXCEPT TABLES"
-"EXCEPT"
-"EXCHANGE DICTIONARIES"
-"EXCHANGE TABLES"
-"EXISTS"
 "exp"
 "exp10"
 "exp2"
-"EXPLAIN"
 "exponentialMovingAverage"
 "exponentialMovingAverageArgMax"
 "exponentialMovingAverageArgMin"
@@ -1274,10 +1957,6 @@
 "exponentialTimeDecayedSumResample"
 "exponentialTimeDecayedSumSimpleState"
 "exponentialTimeDecayedSumState"
-"EXPRESSION"
-"EXTENDED"
-"EXTERNAL DDL FROM"
-"EXTRACT"
 "extract"
 "extractAll"
 "extractAllGroups"
@@ -1291,24 +1970,13 @@
 "extractURLParameterNames"
 "extractURLParameters"
 "factorial"
-"FALSE"
 "farmFingerprint64"
 "farmHash64"
-"FETCH PART"
-"FETCH PARTITION"
-"FETCH"
-"FIELDS"
-"FILE"
 "file"
-"FILESYSTEM CACHE"
-"FILESYSTEM CACHES"
 "filesystemAvailable"
 "filesystemCapacity"
 "filesystemUnreserved"
-"FILTER"
-"FINAL"
 "finalizeAggregation"
-"FIRST"
 "firstLine"
 "firstSignificantSubdomain"
 "firstSignificantSubdomainCustom"
@@ -1344,7 +2012,6 @@
 "first_value_respect_nullsResample"
 "first_value_respect_nullsSimpleState"
 "first_value_respect_nullsState"
-"FixedString"
 "flameGraph"
 "flameGraphArgMax"
 "flameGraphArgMin"
@@ -1362,16 +2029,7 @@
 "flameGraphState"
 "flatten"
 "flattenTuple"
-"Float32"
-"Float64"
 "floor"
-"FOLLOWING"
-"FOR"
-"ForEach"
-"FOREIGN KEY"
-"FOREIGN"
-"FORGET PARTITION"
-"FORMAT"
 "format"
 "formatDateTime"
 "formatDateTimeInJodaSyntax"
@@ -1385,31 +2043,19 @@
 "formatReadableTimeDelta"
 "formatRow"
 "formatRowNoNewline"
-"FORMAT_BYTES"
-"FQDN"
 "fragment"
-"FREEZE"
-"FROM INFILE"
-"FROM SHARD"
-"FROM"
 "fromDaysSinceYearZero"
 "fromDaysSinceYearZero32"
 "fromModifiedJulianDay"
 "fromModifiedJulianDayOrNull"
+"fromUTCTimestamp"
 "fromUnixTimestamp"
 "fromUnixTimestamp64Micro"
 "fromUnixTimestamp64Milli"
 "fromUnixTimestamp64Nano"
 "fromUnixTimestampInJodaSyntax"
-"fromUTCTimestamp"
-"FROM_BASE64"
-"FROM_DAYS"
-"FROM_UNIXTIME"
 "from_utc_timestamp"
-"FULL"
 "fullHostName"
-"FULLTEXT"
-"FUNCTION"
 "fuzzBits"
 "gccMurmurHash"
 "gcd"
@@ -1419,11 +2065,11 @@
 "generateUUIDv4"
 "generateUUIDv7"
 "geoDistance"
+"geoToH3"
+"geoToS2"
 "geohashDecode"
 "geohashEncode"
 "geohashesInBox"
-"geoToH3"
-"geoToS2"
 "getClientHTTPHeader"
 "getMacro"
 "getOSKernelVersion"
@@ -1432,9 +2078,6 @@
 "getSizeOfEnumType"
 "getSubcolumn"
 "getTypeSerializationStreams"
-"GLOBAL IN"
-"GLOBAL NOT IN"
-"GLOBAL"
 "globalIn"
 "globalInIgnoreSet"
 "globalNotIn"
@@ -1444,16 +2087,11 @@
 "globalNullIn"
 "globalNullInIgnoreSet"
 "globalVariable"
-"GRANT OPTION FOR"
-"GRANT"
-"GRANTEES"
-"GRANULARITY"
 "greatCircleAngle"
 "greatCircleDistance"
 "greater"
 "greaterOrEquals"
 "greatest"
-"GROUP BY"
 "groupArray"
 "groupArrayArgMax"
 "groupArrayArgMin"
@@ -1589,6 +2227,36 @@
 "groupBitAndResample"
 "groupBitAndSimpleState"
 "groupBitAndState"
+"groupBitOr"
+"groupBitOrArgMax"
+"groupBitOrArgMin"
+"groupBitOrArray"
+"groupBitOrDistinct"
+"groupBitOrForEach"
+"groupBitOrIf"
+"groupBitOrMap"
+"groupBitOrMerge"
+"groupBitOrNull"
+"groupBitOrOrDefault"
+"groupBitOrOrNull"
+"groupBitOrResample"
+"groupBitOrSimpleState"
+"groupBitOrState"
+"groupBitXor"
+"groupBitXorArgMax"
+"groupBitXorArgMin"
+"groupBitXorArray"
+"groupBitXorDistinct"
+"groupBitXorForEach"
+"groupBitXorIf"
+"groupBitXorMap"
+"groupBitXorMerge"
+"groupBitXorNull"
+"groupBitXorOrDefault"
+"groupBitXorOrNull"
+"groupBitXorResample"
+"groupBitXorSimpleState"
+"groupBitXorState"
 "groupBitmap"
 "groupBitmapAnd"
 "groupBitmapAndArgMax"
@@ -1648,36 +2316,6 @@
 "groupBitmapXorResample"
 "groupBitmapXorSimpleState"
 "groupBitmapXorState"
-"groupBitOr"
-"groupBitOrArgMax"
-"groupBitOrArgMin"
-"groupBitOrArray"
-"groupBitOrDistinct"
-"groupBitOrForEach"
-"groupBitOrIf"
-"groupBitOrMap"
-"groupBitOrMerge"
-"groupBitOrNull"
-"groupBitOrOrDefault"
-"groupBitOrOrNull"
-"groupBitOrResample"
-"groupBitOrSimpleState"
-"groupBitOrState"
-"groupBitXor"
-"groupBitXorArgMax"
-"groupBitXorArgMin"
-"groupBitXorArray"
-"groupBitXorDistinct"
-"groupBitXorForEach"
-"groupBitXorIf"
-"groupBitXorMap"
-"groupBitXorMerge"
-"groupBitXorNull"
-"groupBitXorOrDefault"
-"groupBitXorOrNull"
-"groupBitXorResample"
-"groupBitXorSimpleState"
-"groupBitXorState"
 "groupConcat"
 "groupConcatArgMax"
 "groupConcatArgMin"
@@ -1693,8 +2331,6 @@
 "groupConcatResample"
 "groupConcatSimpleState"
 "groupConcatState"
-"GROUPING SETS"
-"GROUPS"
 "groupUniqArray"
 "groupUniqArrayArgMax"
 "groupUniqArrayArgMin"
@@ -1725,7 +2361,6 @@
 "group_concatResample"
 "group_concatSimpleState"
 "group_concatState"
-"H"
 "h3CellAreaM2"
 "h3CellAreaRads2"
 "h3Distance"
@@ -1753,7 +2388,6 @@
 "h3IsPentagon"
 "h3IsResClassIII"
 "h3IsValid"
-"h3kRing"
 "h3Line"
 "h3NumHexagons"
 "h3PointDistKm"
@@ -1766,12 +2400,12 @@
 "h3ToParent"
 "h3ToString"
 "h3UnidirectionalEdgeIsValid"
+"h3kRing"
 "halfMD5"
 "has"
 "hasAll"
 "hasAny"
 "hasColumnInTable"
-"HASH"
 "hasSubsequence"
 "hasSubsequenceCaseInsensitive"
 "hasSubsequenceCaseInsensitiveUTF8"
@@ -1782,11 +2416,7 @@
 "hasTokenCaseInsensitive"
 "hasTokenCaseInsensitiveOrNull"
 "hasTokenOrNull"
-"HAVING"
-"HDFS"
 "hex"
-"HH"
-"HIERARCHICAL"
 "hilbertDecode"
 "hilbertEncode"
 "histogram"
@@ -1808,62 +2438,33 @@
 "hop"
 "hopEnd"
 "hopStart"
-"HOST"
 "hostName"
 "hostname"
-"HOUR"
-"HOURS"
-"HTTP"
 "hypot"
-"ID"
-"IDENTIFIED"
 "identity"
 "idnaDecode"
 "idnaEncode"
-"IF EMPTY"
-"IF EXISTS"
-"IF NOT EXISTS"
 "if"
 "ifNotFinite"
 "ifNull"
-"IGNORE NULLS"
 "ignore"
-"ILIKE"
 "ilike"
-"IN PARTITION"
-"IN"
 "in"
-"INDEX"
-"INDEXES"
+"inIgnoreSet"
 "indexHint"
 "indexOf"
-"INDICES"
-"INET6_ATON"
-"INET6_NTOA"
-"INET_ATON"
-"INET_NTOA"
-"INHERIT"
-"inIgnoreSet"
 "initcap"
 "initcapUTF8"
-"initializeAggregation"
 "initialQueryID"
 "initial_query_id"
-"INJECTIVE"
-"INNER"
-"INSERT INTO"
+"initializeAggregation"
 "instr"
-"Int16"
-"Int32"
-"Int64"
-"Int8"
 "intDiv"
 "intDivOrZero"
-"INTERPOLATE"
-"INTERSECT"
-"INTERVAL"
-"IntervalDay"
-"IntervalHour"
+"intExp10"
+"intExp2"
+"intHash32"
+"intHash64"
 "intervalLengthSum"
 "intervalLengthSumArgMax"
 "intervalLengthSumArgMin"
@@ -1879,42 +2480,13 @@
 "intervalLengthSumResample"
 "intervalLengthSumSimpleState"
 "intervalLengthSumState"
-"IntervalMinute"
-"IntervalMonth"
-"IntervalQuarter"
-"IntervalSecond"
-"IntervalWeek"
-"IntervalYear"
-"intExp10"
-"intExp2"
-"intHash32"
-"intHash64"
-"INTO OUTFILE"
-"INVISIBLE"
-"INVOKER"
-"IP"
-"IPv4CIDRToRange"
-"IPv4NumToString"
-"IPv4NumToStringClassC"
-"IPv4StringToNum"
-"IPv4StringToNumOrDefault"
-"IPv4StringToNumOrNull"
-"IPv4ToIPv6"
-"IPv6CIDRToRange"
-"IPv6NumToString"
-"IPv6StringToNum"
-"IPv6StringToNumOrDefault"
-"IPv6StringToNumOrNull"
-"IS NOT DISTINCT FROM"
-"IS NOT NULL"
-"IS NULL"
 "isConstant"
 "isDecimalOverflow"
 "isFinite"
-"isInfinite"
 "isIPAddressInRange"
 "isIPv4String"
 "isIPv6String"
+"isInfinite"
 "isNaN"
 "isNotDistinctFrom"
 "isNotNull"
@@ -1923,46 +2495,15 @@
 "isValidJSON"
 "isValidUTF8"
 "isZeroOrNull"
-"IS_OBJECT_ID"
 "jaroSimilarity"
 "jaroWinklerSimilarity"
 "javaHash"
 "javaHashUTF16LE"
-"JOIN"
 "joinGet"
 "joinGetOrNull"
-"JSONArrayLength"
-"JSONExtract"
-"JSONExtractArrayRaw"
-"JSONExtractBool"
-"JSONExtractFloat"
-"JSONExtractInt"
-"JSONExtractKeys"
-"JSONExtractKeysAndValues"
-"JSONExtractKeysAndValuesRaw"
-"JSONExtractRaw"
-"JSONExtractString"
-"JSONExtractUInt"
-"JSONHas"
-"JSONKey"
-"JSONLength"
-"JSONMergePatch"
 "jsonMergePatch"
-"JSONType"
-"JSON_ARRAY_LENGTH"
-"JSON_EXISTS"
-"JSON_QUERY"
-"JSON_VALUE"
 "jumpConsistentHash"
-"JWT"
 "kafkaMurmurHash"
-"KERBEROS"
-"KEY BY"
-"KEY"
-"KEYED BY"
-"KEYS"
-"KILL"
-"KIND"
 "kolmogorovSmirnovTest"
 "kolmogorovSmirnovTestArgMax"
 "kolmogorovSmirnovTestArgMin"
@@ -2011,14 +2552,6 @@
 "kurtSampResample"
 "kurtSampSimpleState"
 "kurtSampState"
-"L1Distance"
-"L1Norm"
-"L1Normalize"
-"L2Distance"
-"L2Norm"
-"L2Normalize"
-"L2SquaredDistance"
-"L2SquaredNorm"
 "lagInFrame"
 "lagInFrameArgMax"
 "lagInFrameArgMin"
@@ -2034,7 +2567,6 @@
 "lagInFrameResample"
 "lagInFrameSimpleState"
 "lagInFrameState"
-"LARGE OBJECT"
 "largestTriangleThreeBuckets"
 "largestTriangleThreeBucketsArgMax"
 "largestTriangleThreeBucketsArgMin"
@@ -2050,8 +2582,6 @@
 "largestTriangleThreeBucketsResample"
 "largestTriangleThreeBucketsSimpleState"
 "largestTriangleThreeBucketsState"
-"LAST"
-"LAST_DAY"
 "last_value"
 "last_valueArgMax"
 "last_valueArgMin"
@@ -2082,10 +2612,8 @@
 "last_value_respect_nullsResample"
 "last_value_respect_nullsSimpleState"
 "last_value_respect_nullsState"
-"LAYOUT"
 "lcase"
 "lcm"
-"LDAP"
 "leadInFrame"
 "leadInFrameArgMax"
 "leadInFrameArgMin"
@@ -2101,10 +2629,7 @@
 "leadInFrameResample"
 "leadInFrameSimpleState"
 "leadInFrameState"
-"LEADING"
 "least"
-"LEFT ARRAY JOIN"
-"LEFT"
 "left"
 "leftPad"
 "leftPadUTF8"
@@ -2112,42 +2637,24 @@
 "lemmatize"
 "length"
 "lengthUTF8"
-"LESS THAN"
 "less"
 "lessOrEquals"
-"LEVEL"
 "levenshteinDistance"
 "levenshteinDistanceUTF8"
 "lgamma"
-"LIFETIME"
-"LIGHTWEIGHT"
-"LIKE"
 "like"
-"LIMIT"
-"LINEAR"
-"LinfDistance"
-"LinfNorm"
-"LinfNormalize"
-"LIST"
-"LIVE"
 "ln"
-"LOCAL"
 "locate"
 "log"
 "log10"
 "log1p"
 "log2"
 "logTrace"
-"LowCardinality"
 "lowCardinalityIndices"
 "lowCardinalityKeys"
 "lower"
 "lowerUTF8"
 "lpad"
-"LpDistance"
-"LpNorm"
-"LpNormalize"
-"LTRIM"
 "ltrim"
 "lttb"
 "lttbArgMax"
@@ -2164,10 +2671,6 @@
 "lttbResample"
 "lttbSimpleState"
 "lttbState"
-"M"
-"MACNumToString"
-"MACStringToNum"
-"MACStringToOUI"
 "makeDate"
 "makeDate32"
 "makeDateTime"
@@ -2208,18 +2711,8 @@
 "mapSubtract"
 "mapUpdate"
 "mapValues"
-"MAP_FROM_ARRAYS"
-"MATCH"
 "match"
-"MATERIALIZE COLUMN"
-"MATERIALIZE INDEX"
-"MATERIALIZE PROJECTION"
-"MATERIALIZE STATISTICS"
-"MATERIALIZE TTL"
-"MATERIALIZE"
 "materialize"
-"MATERIALIZED"
-"MAX"
 "max"
 "max2"
 "maxArgMax"
@@ -2281,9 +2774,6 @@
 "maxResample"
 "maxSimpleState"
 "maxState"
-"MCS"
-"MD4"
-"MD5"
 "meanZTest"
 "meanZTestArgMax"
 "meanZTestArgMin"
@@ -2524,19 +3014,8 @@
 "medianTimingWeightedResample"
 "medianTimingWeightedSimpleState"
 "medianTimingWeightedState"
-"MEMORY"
-"Merge"
-"MERGES"
-"METRICS INNER UUID"
-"METRICS"
 "metroHash64"
-"MI"
-"MICROSECOND"
-"MICROSECONDS"
 "mid"
-"MILLISECOND"
-"MILLISECONDS"
-"MIN"
 "min"
 "min2"
 "minArgMax"
@@ -2572,37 +3051,15 @@
 "minSimpleState"
 "minState"
 "minus"
-"MINUTE"
-"MINUTES"
 "mismatches"
-"MM"
-"MOD"
 "mod"
-"MODIFY COLUMN"
-"MODIFY COMMENT"
-"MODIFY DEFINER"
-"MODIFY ORDER BY"
-"MODIFY QUERY"
-"MODIFY REFRESH"
-"MODIFY SAMPLE BY"
-"MODIFY SETTING"
-"MODIFY SQL SECURITY"
-"MODIFY STATISTICS"
-"MODIFY TTL"
-"MODIFY"
 "modulo"
 "moduloLegacy"
 "moduloOrZero"
-"MONTH"
 "monthName"
-"MONTHS"
 "mortonDecode"
 "mortonEncode"
-"MOVE PART"
-"MOVE PARTITION"
-"MOVE"
 "movingXXX"
-"MS"
 "multiFuzzyMatchAllIndices"
 "multiFuzzyMatchAny"
 "multiFuzzyMatchAnyIndex"
@@ -2610,8 +3067,6 @@
 "multiMatchAllIndices"
 "multiMatchAny"
 "multiMatchAnyIndex"
-"multiply"
-"multiplyDecimal"
 "multiSearchAllPositions"
 "multiSearchAllPositionsCaseInsensitive"
 "multiSearchAllPositionsCaseInsensitiveUTF8"
@@ -2628,23 +3083,17 @@
 "multiSearchFirstPositionCaseInsensitive"
 "multiSearchFirstPositionCaseInsensitiveUTF8"
 "multiSearchFirstPositionUTF8"
+"multiply"
+"multiplyDecimal"
 "murmurHash2_32"
 "murmurHash2_64"
 "murmurHash3_128"
 "murmurHash3_32"
 "murmurHash3_64"
-"MUTATION"
-"N"
-"NAME"
-"NAMED COLLECTION"
-"NANOSECOND"
-"NANOSECONDS"
 "negate"
 "neighbor"
-"Nested"
 "nested"
 "netloc"
-"NEXT"
 "ngramDistance"
 "ngramDistanceCaseInsensitive"
 "ngramDistanceCaseInsensitiveUTF8"
@@ -2657,7 +3106,6 @@
 "ngramMinHashCaseInsensitive"
 "ngramMinHashCaseInsensitiveUTF8"
 "ngramMinHashUTF8"
-"ngrams"
 "ngramSearch"
 "ngramSearchCaseInsensitive"
 "ngramSearchCaseInsensitiveUTF8"
@@ -2666,10 +3114,7 @@
 "ngramSimHashCaseInsensitive"
 "ngramSimHashCaseInsensitiveUTF8"
 "ngramSimHashUTF8"
-"NO ACTION"
-"NO DELAY"
-"NO LIMITS"
-"NONE"
+"ngrams"
 "nonNegativeDerivative"
 "nonNegativeDerivativeArgMax"
 "nonNegativeDerivativeArgMin"
@@ -2685,8 +3130,11 @@
 "nonNegativeDerivativeResample"
 "nonNegativeDerivativeSimpleState"
 "nonNegativeDerivativeState"
-"normalizedQueryHash"
-"normalizedQueryHashKeepNames"
+"normL1"
+"normL2"
+"normL2Squared"
+"normLinf"
+"normLp"
 "normalizeL1"
 "normalizeL2"
 "normalizeLinf"
@@ -2697,23 +3145,17 @@
 "normalizeUTF8NFD"
 "normalizeUTF8NFKC"
 "normalizeUTF8NFKD"
-"normL1"
-"normL2"
-"normL2Squared"
-"normLinf"
-"normLp"
-"NOT BETWEEN"
-"NOT IDENTIFIED"
-"NOT ILIKE"
-"NOT IN"
-"NOT KEYED"
-"NOT LIKE"
-"NOT OVERRIDABLE"
-"NOT"
+"normalizedQueryHash"
+"normalizedQueryHashKeepNames"
 "not"
 "notEmpty"
 "notEquals"
-"Nothing"
+"notILike"
+"notIn"
+"notInIgnoreSet"
+"notLike"
+"notNullIn"
+"notNullInIgnoreSet"
 "nothing"
 "nothingArgMax"
 "nothingArgMin"
@@ -2758,17 +3200,9 @@
 "nothingUInt64Resample"
 "nothingUInt64SimpleState"
 "nothingUInt64State"
-"notILike"
-"notIn"
-"notInIgnoreSet"
-"notLike"
-"notNullIn"
-"notNullInIgnoreSet"
 "now"
 "now64"
 "nowInBlock"
-"NO_PASSWORD"
-"NS"
 "nth_value"
 "nth_valueArgMax"
 "nth_valueArgMin"
@@ -2799,28 +3233,10 @@
 "ntileResample"
 "ntileSimpleState"
 "ntileState"
-"NULL"
-"Null"
-"Nullable"
 "nullIf"
 "nullIn"
 "nullInIgnoreSet"
-"NULLS"
-"OCTET_LENGTH"
-"OFFSET"
-"ON DELETE"
-"ON UPDATE"
-"ON VOLUME"
-"ON"
-"ONLY"
-"OPTIMIZE TABLE"
-"OR REPLACE"
-"OR"
 "or"
-"ORDER BY"
-"OUTER"
-"OVER"
-"OVERRIDABLE"
 "parseDateTime"
 "parseDateTime32BestEffort"
 "parseDateTime32BestEffortOrNull"
@@ -2846,15 +3262,8 @@
 "parseReadableSizeOrNull"
 "parseReadableSizeOrZero"
 "parseTimeDelta"
-"PART"
-"PARTIAL"
-"PARTITION BY"
-"PARTITION"
 "partitionID"
 "partitionId"
-"PARTITIONS"
-"PART_MOVE_TO_SHARD"
-"PASTE"
 "path"
 "pathFull"
 "percentRank"
@@ -2887,15 +3296,7 @@
 "percent_rankResample"
 "percent_rankSimpleState"
 "percent_rankState"
-"PERIODIC REFRESH"
-"PERMANENTLY"
-"PERMISSIVE"
-"PERSISTENT SEQUENTIAL"
-"PERSISTENT"
 "pi"
-"PIPELINE"
-"PLAINTEXT_PASSWORD"
-"PLAN"
 "plus"
 "pmod"
 "pointInEllipses"
@@ -2916,7 +3317,6 @@
 "polygonsUnionSpherical"
 "polygonsWithinCartesian"
 "polygonsWithinSpherical"
-"POPULATE"
 "port"
 "portRFC"
 "position"
@@ -2927,23 +3327,11 @@
 "positive_modulo"
 "pow"
 "power"
-"PRECEDING"
-"PRECISION"
-"PREWHERE"
-"PRIMARY KEY"
-"PRIMARY"
 "printf"
-"PROFILE"
-"PROJECTION"
 "proportionsZTest"
-"Protobuf"
 "protocol"
-"PULL"
 "punycodeDecode"
 "punycodeEncode"
-"Q"
-"QQ"
-"QUALIFY"
 "quantile"
 "quantileArgMax"
 "quantileArgMin"
@@ -3137,6 +3525,68 @@
 "quantileOrDefault"
 "quantileOrNull"
 "quantileResample"
+"quantileSimpleState"
+"quantileState"
+"quantileTDigest"
+"quantileTDigestArgMax"
+"quantileTDigestArgMin"
+"quantileTDigestArray"
+"quantileTDigestDistinct"
+"quantileTDigestForEach"
+"quantileTDigestIf"
+"quantileTDigestMap"
+"quantileTDigestMerge"
+"quantileTDigestNull"
+"quantileTDigestOrDefault"
+"quantileTDigestOrNull"
+"quantileTDigestResample"
+"quantileTDigestSimpleState"
+"quantileTDigestState"
+"quantileTDigestWeighted"
+"quantileTDigestWeightedArgMax"
+"quantileTDigestWeightedArgMin"
+"quantileTDigestWeightedArray"
+"quantileTDigestWeightedDistinct"
+"quantileTDigestWeightedForEach"
+"quantileTDigestWeightedIf"
+"quantileTDigestWeightedMap"
+"quantileTDigestWeightedMerge"
+"quantileTDigestWeightedNull"
+"quantileTDigestWeightedOrDefault"
+"quantileTDigestWeightedOrNull"
+"quantileTDigestWeightedResample"
+"quantileTDigestWeightedSimpleState"
+"quantileTDigestWeightedState"
+"quantileTiming"
+"quantileTimingArgMax"
+"quantileTimingArgMin"
+"quantileTimingArray"
+"quantileTimingDistinct"
+"quantileTimingForEach"
+"quantileTimingIf"
+"quantileTimingMap"
+"quantileTimingMerge"
+"quantileTimingNull"
+"quantileTimingOrDefault"
+"quantileTimingOrNull"
+"quantileTimingResample"
+"quantileTimingSimpleState"
+"quantileTimingState"
+"quantileTimingWeighted"
+"quantileTimingWeightedArgMax"
+"quantileTimingWeightedArgMin"
+"quantileTimingWeightedArray"
+"quantileTimingWeightedDistinct"
+"quantileTimingWeightedForEach"
+"quantileTimingWeightedIf"
+"quantileTimingWeightedMap"
+"quantileTimingWeightedMerge"
+"quantileTimingWeightedNull"
+"quantileTimingWeightedOrDefault"
+"quantileTimingWeightedOrNull"
+"quantileTimingWeightedResample"
+"quantileTimingWeightedSimpleState"
+"quantileTimingWeightedState"
 "quantiles"
 "quantilesArgMax"
 "quantilesArgMin"
@@ -3309,7 +3759,6 @@
 "quantilesGKSimpleState"
 "quantilesGKState"
 "quantilesIf"
-"quantileSimpleState"
 "quantilesInterpolatedWeighted"
 "quantilesInterpolatedWeightedArgMax"
 "quantilesInterpolatedWeightedArgMin"
@@ -3333,7 +3782,6 @@
 "quantilesResample"
 "quantilesSimpleState"
 "quantilesState"
-"quantileState"
 "quantilesTDigest"
 "quantilesTDigestArgMax"
 "quantilesTDigestArgMin"
@@ -3394,75 +3842,10 @@
 "quantilesTimingWeightedResample"
 "quantilesTimingWeightedSimpleState"
 "quantilesTimingWeightedState"
-"quantileTDigest"
-"quantileTDigestArgMax"
-"quantileTDigestArgMin"
-"quantileTDigestArray"
-"quantileTDigestDistinct"
-"quantileTDigestForEach"
-"quantileTDigestIf"
-"quantileTDigestMap"
-"quantileTDigestMerge"
-"quantileTDigestNull"
-"quantileTDigestOrDefault"
-"quantileTDigestOrNull"
-"quantileTDigestResample"
-"quantileTDigestSimpleState"
-"quantileTDigestState"
-"quantileTDigestWeighted"
-"quantileTDigestWeightedArgMax"
-"quantileTDigestWeightedArgMin"
-"quantileTDigestWeightedArray"
-"quantileTDigestWeightedDistinct"
-"quantileTDigestWeightedForEach"
-"quantileTDigestWeightedIf"
-"quantileTDigestWeightedMap"
-"quantileTDigestWeightedMerge"
-"quantileTDigestWeightedNull"
-"quantileTDigestWeightedOrDefault"
-"quantileTDigestWeightedOrNull"
-"quantileTDigestWeightedResample"
-"quantileTDigestWeightedSimpleState"
-"quantileTDigestWeightedState"
-"quantileTiming"
-"quantileTimingArgMax"
-"quantileTimingArgMin"
-"quantileTimingArray"
-"quantileTimingDistinct"
-"quantileTimingForEach"
-"quantileTimingIf"
-"quantileTimingMap"
-"quantileTimingMerge"
-"quantileTimingNull"
-"quantileTimingOrDefault"
-"quantileTimingOrNull"
-"quantileTimingResample"
-"quantileTimingSimpleState"
-"quantileTimingState"
-"quantileTimingWeighted"
-"quantileTimingWeightedArgMax"
-"quantileTimingWeightedArgMin"
-"quantileTimingWeightedArray"
-"quantileTimingWeightedDistinct"
-"quantileTimingWeightedForEach"
-"quantileTimingWeightedIf"
-"quantileTimingWeightedMap"
-"quantileTimingWeightedMerge"
-"quantileTimingWeightedNull"
-"quantileTimingWeightedOrDefault"
-"quantileTimingWeightedOrNull"
-"quantileTimingWeightedResample"
-"quantileTimingWeightedSimpleState"
-"quantileTimingWeightedState"
-"QUARTER"
-"QUARTERS"
-"QUERY TREE"
-"QUERY"
 "queryID"
 "queryString"
 "queryStringAndFragment"
 "query_id"
-"QUOTA"
 "radians"
 "rand"
 "rand32"
@@ -3477,16 +3860,13 @@
 "randLogNormal"
 "randNegativeBinomial"
 "randNormal"
-"randomFixedString"
-"RANDOMIZE FOR"
-"RANDOMIZED"
-"randomPrintableASCII"
-"randomString"
-"randomStringUTF8"
 "randPoisson"
 "randStudentT"
 "randUniform"
-"RANGE"
+"randomFixedString"
+"randomPrintableASCII"
+"randomString"
+"randomStringUTF8"
 "range"
 "rank"
 "rankArgMax"
@@ -3518,24 +3898,14 @@
 "rankResample"
 "rankSimpleState"
 "rankState"
-"READONLY"
 "readWKTLineString"
 "readWKTMultiLineString"
 "readWKTMultiPolygon"
 "readWKTPoint"
 "readWKTPolygon"
 "readWKTRing"
-"REALM"
-"RECOMPRESS"
-"RECURSIVE"
-"REFERENCES"
-"REFRESH"
-"REGEXP"
 "regexpExtract"
 "regexpQuoteMeta"
-"REGEXP_EXTRACT"
-"REGEXP_MATCHES"
-"REGEXP_REPLACE"
 "regionHierarchy"
 "regionIn"
 "regionToArea"
@@ -3566,31 +3936,13 @@
 "reinterpretAsUInt64"
 "reinterpretAsUInt8"
 "reinterpretAsUUID"
-"REMOVE SAMPLE BY"
-"REMOVE TTL"
-"REMOVE"
-"RENAME COLUMN"
-"RENAME DATABASE"
-"RENAME DICTIONARY"
-"RENAME TABLE"
-"RENAME TO"
-"RENAME"
 "repeat"
-"REPLACE PARTITION"
-"REPLACE"
 "replace"
 "replaceAll"
 "replaceOne"
 "replaceRegexpAll"
 "replaceRegexpOne"
 "replicate"
-"Resample"
-"RESET SETTING"
-"RESPECT NULLS"
-"RESTORE"
-"RESTRICT"
-"RESTRICTIVE"
-"RESUME"
 "retention"
 "retentionArgMax"
 "retentionArgMin"
@@ -3609,24 +3961,18 @@
 "reverse"
 "reverseUTF8"
 "revision"
-"REVOKE"
-"RIGHT"
 "right"
 "rightPad"
 "rightPadUTF8"
 "rightUTF8"
-"ROLLBACK"
-"ROLLUP"
 "round"
 "roundAge"
 "roundBankers"
 "roundDown"
 "roundDuration"
 "roundToExp2"
-"ROW"
 "rowNumberInAllBlocks"
 "rowNumberInBlock"
-"ROWS"
 "row_number"
 "row_numberArgMax"
 "row_numberArgMin"
@@ -3643,13 +3989,11 @@
 "row_numberSimpleState"
 "row_numberState"
 "rpad"
-"RTRIM"
 "rtrim"
 "runningAccumulate"
 "runningConcurrency"
 "runningDifference"
 "runningDifferenceStartingWithFirstValue"
-"S"
 "s2CapContains"
 "s2CapUnion"
 "s2CellsIntersect"
@@ -3659,18 +4003,7 @@
 "s2RectIntersection"
 "s2RectUnion"
 "s2ToGeo"
-"S3"
-"SALT"
-"SAMPLE BY"
-"SAMPLE"
-"SAN"
 "scalarProduct"
-"SCHEMA"
-"SCHEME"
-"SECOND"
-"SECONDS"
-"SELECT"
-"SEMI"
 "sequenceCount"
 "sequenceCountArgMax"
 "sequenceCountArgMin"
@@ -3719,43 +4052,14 @@
 "seriesDecomposeSTL"
 "seriesOutliersDetectTukey"
 "seriesPeriodDetectFFT"
-"SERVER"
 "serverTimeZone"
 "serverTimezone"
 "serverUUID"
-"SET DEFAULT ROLE"
-"SET DEFAULT"
-"SET FAKE TIME"
-"SET NULL"
-"SET ROLE DEFAULT"
-"SET ROLE"
-"SET TRANSACTION SNAPSHOT"
-"SET"
-"SETTINGS"
-"SHA1"
-"SHA224"
-"SHA256"
-"SHA256_HASH"
-"SHA256_PASSWORD"
-"SHA384"
-"SHA512"
-"SHA512_256"
 "shardCount"
 "shardNum"
-"SHOW ACCESS"
-"SHOW CREATE"
-"SHOW ENGINES"
-"SHOW FUNCTIONS"
-"SHOW GRANTS"
-"SHOW PRIVILEGES"
-"SHOW PROCESSLIST"
-"SHOW SETTING"
-"SHOW"
 "showCertificate"
 "sigmoid"
 "sign"
-"SIGNED"
-"SIMPLE"
 "simpleJSONExtractBool"
 "simpleJSONExtractFloat"
 "simpleJSONExtractInt"
@@ -3838,39 +4142,37 @@
 "snowflakeToDateTime"
 "snowflakeToDateTime64"
 "soundex"
-"SOURCE"
 "space"
 "sparkBar"
-"sparkbar"
 "sparkBarArgMax"
-"sparkbarArgMax"
 "sparkBarArgMin"
-"sparkbarArgMin"
 "sparkBarArray"
-"sparkbarArray"
 "sparkBarDistinct"
-"sparkbarDistinct"
 "sparkBarForEach"
-"sparkbarForEach"
 "sparkBarIf"
-"sparkbarIf"
 "sparkBarMap"
-"sparkbarMap"
 "sparkBarMerge"
-"sparkbarMerge"
 "sparkBarNull"
-"sparkbarNull"
 "sparkBarOrDefault"
-"sparkbarOrDefault"
 "sparkBarOrNull"
-"sparkbarOrNull"
 "sparkBarResample"
-"sparkbarResample"
 "sparkBarSimpleState"
-"sparkbarSimpleState"
 "sparkBarState"
+"sparkbar"
+"sparkbarArgMax"
+"sparkbarArgMin"
+"sparkbarArray"
+"sparkbarDistinct"
+"sparkbarForEach"
+"sparkbarIf"
+"sparkbarMap"
+"sparkbarMerge"
+"sparkbarNull"
+"sparkbarOrDefault"
+"sparkbarOrNull"
+"sparkbarResample"
+"sparkbarSimpleState"
 "sparkbarState"
-"SPATIAL"
 "splitByAlpha"
 "splitByChar"
 "splitByNonAlpha"
@@ -3880,31 +4182,9 @@
 "sqid"
 "sqidDecode"
 "sqidEncode"
-"SQL SECURITY"
-"SQL_TSI_DAY"
-"SQL_TSI_HOUR"
-"SQL_TSI_MICROSECOND"
-"SQL_TSI_MILLISECOND"
-"SQL_TSI_MINUTE"
-"SQL_TSI_MONTH"
-"SQL_TSI_NANOSECOND"
-"SQL_TSI_QUARTER"
-"SQL_TSI_SECOND"
-"SQL_TSI_WEEK"
-"SQL_TSI_YEAR"
 "sqrt"
-"SS"
-"SSH_KEY"
-"SSL_CERTIFICATE"
-"START TRANSACTION"
 "startsWith"
 "startsWithUTF8"
-"State"
-"STATISTICS"
-"STD"
-"STDArgMax"
-"STDArgMin"
-"STDArray"
 "stddevPop"
 "stddevPopArgMax"
 "stddevPopArgMin"
@@ -3965,49 +4245,7 @@
 "stddevSampStableSimpleState"
 "stddevSampStableState"
 "stddevSampState"
-"STDDEV_POP"
-"STDDEV_POPArgMax"
-"STDDEV_POPArgMin"
-"STDDEV_POPArray"
-"STDDEV_POPDistinct"
-"STDDEV_POPForEach"
-"STDDEV_POPIf"
-"STDDEV_POPMap"
-"STDDEV_POPMerge"
-"STDDEV_POPNull"
-"STDDEV_POPOrDefault"
-"STDDEV_POPOrNull"
-"STDDEV_POPResample"
-"STDDEV_POPSimpleState"
-"STDDEV_POPState"
-"STDDEV_SAMP"
-"STDDEV_SAMPArgMax"
-"STDDEV_SAMPArgMin"
-"STDDEV_SAMPArray"
-"STDDEV_SAMPDistinct"
-"STDDEV_SAMPForEach"
-"STDDEV_SAMPIf"
-"STDDEV_SAMPMap"
-"STDDEV_SAMPMerge"
-"STDDEV_SAMPNull"
-"STDDEV_SAMPOrDefault"
-"STDDEV_SAMPOrNull"
-"STDDEV_SAMPResample"
-"STDDEV_SAMPSimpleState"
-"STDDEV_SAMPState"
-"STDDistinct"
-"STDForEach"
-"STDIf"
-"STDMap"
-"STDMerge"
-"STDNull"
-"STDOrDefault"
-"STDOrNull"
-"STDResample"
-"STDSimpleState"
-"STDState"
 "stem"
-"STEP"
 "stochasticLinearRegression"
 "stochasticLinearRegressionArgMax"
 "stochasticLinearRegressionArgMin"
@@ -4038,17 +4276,13 @@
 "stochasticLogisticRegressionResample"
 "stochasticLogisticRegressionSimpleState"
 "stochasticLogisticRegressionState"
-"STORAGE"
-"STRICT"
-"STRICTLY_ASCENDING"
-"String"
+"str_to_date"
+"str_to_map"
 "stringJaccardIndex"
 "stringJaccardIndexUTF8"
 "stringToH3"
 "structureToCapnProtoSchema"
 "structureToProtobufSchema"
-"str_to_date"
-"str_to_map"
 "studentTTest"
 "studentTTestArgMax"
 "studentTTestArgMin"
@@ -4066,16 +4300,11 @@
 "studentTTestState"
 "subBitmap"
 "subDate"
-"SUBPARTITION BY"
-"SUBPARTITION"
-"SUBPARTITIONS"
 "substr"
-"SUBSTRING"
 "substring"
 "substringIndex"
 "substringIndexUTF8"
 "substringUTF8"
-"SUBSTRING_INDEX"
 "subtractDays"
 "subtractHours"
 "subtractInterval"
@@ -4157,21 +4386,6 @@
 "sumMapFilteredWithOverflowResample"
 "sumMapFilteredWithOverflowSimpleState"
 "sumMapFilteredWithOverflowState"
-"sumMappedArrays"
-"sumMappedArraysArgMax"
-"sumMappedArraysArgMin"
-"sumMappedArraysArray"
-"sumMappedArraysDistinct"
-"sumMappedArraysForEach"
-"sumMappedArraysIf"
-"sumMappedArraysMap"
-"sumMappedArraysMerge"
-"sumMappedArraysNull"
-"sumMappedArraysOrDefault"
-"sumMappedArraysOrNull"
-"sumMappedArraysResample"
-"sumMappedArraysSimpleState"
-"sumMappedArraysState"
 "sumMapWithOverflow"
 "sumMapWithOverflowArgMax"
 "sumMapWithOverflowArgMin"
@@ -4187,6 +4401,21 @@
 "sumMapWithOverflowResample"
 "sumMapWithOverflowSimpleState"
 "sumMapWithOverflowState"
+"sumMappedArrays"
+"sumMappedArraysArgMax"
+"sumMappedArraysArgMin"
+"sumMappedArraysArray"
+"sumMappedArraysDistinct"
+"sumMappedArraysForEach"
+"sumMappedArraysIf"
+"sumMappedArraysMap"
+"sumMappedArraysMerge"
+"sumMappedArraysNull"
+"sumMappedArraysOrDefault"
+"sumMappedArraysOrNull"
+"sumMappedArraysResample"
+"sumMappedArraysSimpleState"
+"sumMappedArraysState"
 "sumMerge"
 "sumNull"
 "sumOrDefault"
@@ -4209,24 +4438,11 @@
 "sumWithOverflowResample"
 "sumWithOverflowSimpleState"
 "sumWithOverflowState"
-"SUSPEND"
-"SVG"
 "svg"
-"SYNC"
 "synonyms"
-"SYNTAX"
-"SYSTEM"
-"TABLE OVERRIDE"
-"TABLE"
-"TABLES"
-"TAGS INNER UUID"
-"TAGS"
 "tan"
 "tanh"
 "tcpPort"
-"TEMPORARY TABLE"
-"TEMPORARY"
-"TEST"
 "tgamma"
 "theilsU"
 "theilsUArgMax"
@@ -4243,34 +4459,20 @@
 "theilsUResample"
 "theilsUSimpleState"
 "theilsUState"
-"THEN"
 "throwIf"
 "tid"
 "timeDiff"
 "timeSlot"
 "timeSlots"
-"TIMESTAMP"
-"timestamp"
-"TIMESTAMPADD"
-"TIMESTAMPDIFF"
-"timestampDiff"
-"TIMESTAMPSUB"
-"TIMESTAMP_ADD"
-"TIMESTAMP_DIFF"
-"timestamp_diff"
-"TIMESTAMP_SUB"
 "timeZone"
-"timezone"
 "timeZoneOf"
-"timezoneOf"
 "timeZoneOffset"
+"timestamp"
+"timestampDiff"
+"timestamp_diff"
+"timezone"
+"timezoneOf"
 "timezoneOffset"
-"TO DISK"
-"TO INNER UUID"
-"TO SHARD"
-"TO TABLE"
-"TO VOLUME"
-"TO"
 "toBool"
 "toColumnTypeName"
 "toDate"
@@ -4290,7 +4492,6 @@
 "toDateTimeOrDefault"
 "toDateTimeOrNull"
 "toDateTimeOrZero"
-"today"
 "toDayOfMonth"
 "toDayOfWeek"
 "toDayOfYear"
@@ -4322,6 +4523,16 @@
 "toFloat64OrNull"
 "toFloat64OrZero"
 "toHour"
+"toIPv4"
+"toIPv4OrDefault"
+"toIPv4OrNull"
+"toIPv4OrZero"
+"toIPv6"
+"toIPv6OrDefault"
+"toIPv6OrNull"
+"toIPv6OrZero"
+"toISOWeek"
+"toISOYear"
 "toInt128"
 "toInt128OrDefault"
 "toInt128OrNull"
@@ -4357,18 +4568,7 @@
 "toIntervalSecond"
 "toIntervalWeek"
 "toIntervalYear"
-"toIPv4"
-"toIPv4OrDefault"
-"toIPv4OrNull"
-"toIPv4OrZero"
-"toIPv6"
-"toIPv6OrDefault"
-"toIPv6OrNull"
-"toIPv6OrZero"
-"toISOWeek"
-"toISOYear"
 "toJSONString"
-"tokens"
 "toLastDayOfMonth"
 "toLastDayOfWeek"
 "toLowCardinality"
@@ -4379,7 +4579,82 @@
 "toMonday"
 "toMonth"
 "toNullable"
-"TOP"
+"toQuarter"
+"toRelativeDayNum"
+"toRelativeHourNum"
+"toRelativeMinuteNum"
+"toRelativeMonthNum"
+"toRelativeQuarterNum"
+"toRelativeSecondNum"
+"toRelativeWeekNum"
+"toRelativeYearNum"
+"toSecond"
+"toStartOfDay"
+"toStartOfFifteenMinutes"
+"toStartOfFiveMinute"
+"toStartOfFiveMinutes"
+"toStartOfHour"
+"toStartOfISOYear"
+"toStartOfInterval"
+"toStartOfMicrosecond"
+"toStartOfMillisecond"
+"toStartOfMinute"
+"toStartOfMonth"
+"toStartOfNanosecond"
+"toStartOfQuarter"
+"toStartOfSecond"
+"toStartOfTenMinutes"
+"toStartOfWeek"
+"toStartOfYear"
+"toString"
+"toStringCutToZero"
+"toTime"
+"toTimeZone"
+"toTimezone"
+"toTypeName"
+"toUInt128"
+"toUInt128OrDefault"
+"toUInt128OrNull"
+"toUInt128OrZero"
+"toUInt16"
+"toUInt16OrDefault"
+"toUInt16OrNull"
+"toUInt16OrZero"
+"toUInt256"
+"toUInt256OrDefault"
+"toUInt256OrNull"
+"toUInt256OrZero"
+"toUInt32"
+"toUInt32OrDefault"
+"toUInt32OrNull"
+"toUInt32OrZero"
+"toUInt64"
+"toUInt64OrDefault"
+"toUInt64OrNull"
+"toUInt64OrZero"
+"toUInt8"
+"toUInt8OrDefault"
+"toUInt8OrNull"
+"toUInt8OrZero"
+"toUTCTimestamp"
+"toUUID"
+"toUUIDOrDefault"
+"toUUIDOrNull"
+"toUUIDOrZero"
+"toUnixTimestamp"
+"toUnixTimestamp64Micro"
+"toUnixTimestamp64Milli"
+"toUnixTimestamp64Nano"
+"toValidUTF8"
+"toWeek"
+"toYYYYMM"
+"toYYYYMMDD"
+"toYYYYMMDDhhmmss"
+"toYear"
+"toYearWeek"
+"to_utc_timestamp"
+"today"
+"tokens"
 "topK"
 "topKArgMax"
 "topKArgMin"
@@ -4412,102 +4687,17 @@
 "topKWeightedState"
 "topLevelDomain"
 "topLevelDomainRFC"
-"toQuarter"
-"toRelativeDayNum"
-"toRelativeHourNum"
-"toRelativeMinuteNum"
-"toRelativeMonthNum"
-"toRelativeQuarterNum"
-"toRelativeSecondNum"
-"toRelativeWeekNum"
-"toRelativeYearNum"
-"toSecond"
-"toStartOfDay"
-"toStartOfFifteenMinutes"
-"toStartOfFiveMinute"
-"toStartOfFiveMinutes"
-"toStartOfHour"
-"toStartOfInterval"
-"toStartOfISOYear"
-"toStartOfMicrosecond"
-"toStartOfMillisecond"
-"toStartOfMinute"
-"toStartOfMonth"
-"toStartOfNanosecond"
-"toStartOfQuarter"
-"toStartOfSecond"
-"toStartOfTenMinutes"
-"toStartOfWeek"
-"toStartOfYear"
-"toString"
-"toStringCutToZero"
-"TOTALS"
-"toTime"
-"toTimeZone"
-"toTimezone"
-"toTypeName"
-"toUInt128"
-"toUInt128OrDefault"
-"toUInt128OrNull"
-"toUInt128OrZero"
-"toUInt16"
-"toUInt16OrDefault"
-"toUInt16OrNull"
-"toUInt16OrZero"
-"toUInt256"
-"toUInt256OrDefault"
-"toUInt256OrNull"
-"toUInt256OrZero"
-"toUInt32"
-"toUInt32OrDefault"
-"toUInt32OrNull"
-"toUInt32OrZero"
-"toUInt64"
-"toUInt64OrDefault"
-"toUInt64OrNull"
-"toUInt64OrZero"
-"toUInt8"
-"toUInt8OrDefault"
-"toUInt8OrNull"
-"toUInt8OrZero"
-"toUnixTimestamp"
-"toUnixTimestamp64Micro"
-"toUnixTimestamp64Milli"
-"toUnixTimestamp64Nano"
-"toUTCTimestamp"
-"toUUID"
-"toUUIDOrDefault"
-"toUUIDOrNull"
-"toUUIDOrZero"
-"toValidUTF8"
-"toWeek"
-"toYear"
-"toYearWeek"
-"toYYYYMM"
-"toYYYYMMDD"
-"toYYYYMMDDhhmmss"
-"TO_BASE64"
-"TO_DAYS"
-"TO_UNIXTIME"
-"to_utc_timestamp"
-"TRACKING ONLY"
-"TRAILING"
-"TRANSACTION"
 "transactionID"
 "transactionLatestSnapshot"
 "transactionOldestSnapshot"
 "transform"
 "translate"
 "translateUTF8"
-"TRIGGER"
-"TRIM"
 "trim"
 "trimBoth"
 "trimLeft"
 "trimRight"
-"TRUE"
 "trunc"
-"TRUNCATE"
 "truncate"
 "tryBase58Decode"
 "tryBase64Decode"
@@ -4515,11 +4705,9 @@
 "tryDecrypt"
 "tryIdnaEncode"
 "tryPunycodeDecode"
-"TTL"
 "tumble"
 "tumbleEnd"
 "tumbleStart"
-"Tuple"
 "tuple"
 "tupleConcat"
 "tupleDivide"
@@ -4539,20 +4727,9 @@
 "tupleNegate"
 "tuplePlus"
 "tupleToNameValuePairs"
-"TYPE"
-"TYPEOF"
 "ucase"
-"UInt16"
-"UInt32"
-"UInt64"
-"UInt8"
-"ULIDStringToDateTime"
 "unbin"
-"UNBOUNDED"
-"UNDROP"
-"UNFREEZE"
 "unhex"
-"UNION"
 "uniq"
 "uniqArgMax"
 "uniqArgMin"
@@ -4646,7 +4823,6 @@
 "uniqThetaSimpleState"
 "uniqThetaState"
 "uniqThetaUnion"
-"UNIQUE"
 "uniqUpTo"
 "uniqUpToArgMax"
 "uniqUpToArgMin"
@@ -4662,31 +4838,11 @@
 "uniqUpToResample"
 "uniqUpToSimpleState"
 "uniqUpToState"
-"UNSET FAKE TIME"
-"UNSIGNED"
-"UPDATE"
 "upper"
 "upperUTF8"
 "uptime"
-"URL"
-"URLHash"
-"URLHierarchy"
-"URLPathHierarchy"
-"USE"
 "user"
-"USING"
-"UTCTimestamp"
-"UTC_timestamp"
-"UUID"
-"UUIDNumToString"
-"UUIDStringToNum"
-"UUIDToNum"
-"UUIDv7ToDateTime"
-"VALID UNTIL"
 "validateNestedArraySizes"
-"VALUES"
-"variantElement"
-"variantType"
 "varPop"
 "varPopArgMax"
 "varPopArgMin"
@@ -4747,42 +4903,11 @@
 "varSampStableSimpleState"
 "varSampStableState"
 "varSampState"
-"VARYING"
-"VAR_POP"
-"VAR_POPArgMax"
-"VAR_POPArgMin"
-"VAR_POPArray"
-"VAR_POPDistinct"
-"VAR_POPForEach"
-"VAR_POPIf"
-"VAR_POPMap"
-"VAR_POPMerge"
-"VAR_POPNull"
-"VAR_POPOrDefault"
-"VAR_POPOrNull"
-"VAR_POPResample"
-"VAR_POPSimpleState"
-"VAR_POPState"
-"VAR_SAMP"
-"VAR_SAMPArgMax"
-"VAR_SAMPArgMin"
-"VAR_SAMPArray"
-"VAR_SAMPDistinct"
-"VAR_SAMPForEach"
-"VAR_SAMPIf"
-"VAR_SAMPMap"
-"VAR_SAMPMerge"
-"VAR_SAMPNull"
-"VAR_SAMPOrDefault"
-"VAR_SAMPOrNull"
-"VAR_SAMPResample"
-"VAR_SAMPSimpleState"
-"VAR_SAMPState"
+"variantElement"
+"variantType"
 "vectorDifference"
 "vectorSum"
 "version"
-"VIEW"
-"VISIBLE"
 "visibleWidth"
 "visitParamExtractBool"
 "visitParamExtractFloat"
@@ -4791,11 +4916,7 @@
 "visitParamExtractString"
 "visitParamExtractUInt"
 "visitParamHas"
-"WATCH"
-"WATERMARK"
-"WEEK"
 "week"
-"WEEKS"
 "welchTTest"
 "welchTTestArgMax"
 "welchTTestArgMin"
@@ -4811,11 +4932,8 @@
 "welchTTestResample"
 "welchTTestSimpleState"
 "welchTTestState"
-"WHEN"
-"WHERE"
 "widthBucket"
 "width_bucket"
-"WINDOW"
 "windowFunnel"
 "windowFunnelArgMax"
 "windowFunnelArgMin"
@@ -4832,16 +4950,6 @@
 "windowFunnelSimpleState"
 "windowFunnelState"
 "windowID"
-"WITH ADMIN OPTION"
-"WITH CHECK"
-"WITH FILL"
-"WITH GRANT OPTION"
-"WITH NAME"
-"WITH REPLACE OPTION"
-"WITH TIES"
-"WITH"
-"WITH_ITEMINDEX"
-"WK"
 "wkt"
 "wordShingleMinHash"
 "wordShingleMinHashArg"
@@ -4855,31 +4963,12 @@
 "wordShingleSimHashCaseInsensitive"
 "wordShingleSimHashCaseInsensitiveUTF8"
 "wordShingleSimHashUTF8"
-"WRITABLE"
-"WW"
 "wyHash64"
 "xor"
-"xxh3"
 "xxHash32"
 "xxHash64"
+"xxh3"
 "yandexConsistentHash"
-"YEAR"
-"YEARS"
 "yearweek"
 "yesterday"
-"YY"
-"YYYY"
-"YYYYMMDDhhmmssToDateTime"
-"YYYYMMDDhhmmssToDateTime64"
-"YYYYMMDDToDate"
-"YYYYMMDDToDate32"
-"ZKPATH"
 "zookeeperSessionUptime"
-"_CAST"
-"__actionName"
-"__bitBoolMaskAnd"
-"__bitBoolMaskOr"
-"__bitSwapLastTwo"
-"__bitWrapperFunc"
-"__getScalar"
-"__scalarSubqueryResult"
diff --git a/tests/fuzz/dictionaries/datatypes.dict b/tests/fuzz/dictionaries/datatypes.dict
index e562595fb67..797905203b2 100644
--- a/tests/fuzz/dictionaries/datatypes.dict
+++ b/tests/fuzz/dictionaries/datatypes.dict
@@ -1,4283 +1,137 @@
-"BIT_AND"
-"BIT_ANDArgMax"
-"BIT_ANDArgMin"
-"BIT_ANDArray"
-"BIT_ANDDistinct"
-"BIT_ANDForEach"
-"BIT_ANDIf"
-"BIT_ANDMap"
-"BIT_ANDMerge"
-"BIT_ANDNull"
-"BIT_ANDOrDefault"
-"BIT_ANDOrNull"
-"BIT_ANDResample"
-"BIT_ANDSimpleState"
-"BIT_ANDState"
-"BIT_OR"
-"BIT_ORArgMax"
-"BIT_ORArgMin"
-"BIT_ORArray"
-"BIT_ORDistinct"
-"BIT_ORForEach"
-"BIT_ORIf"
-"BIT_ORMap"
-"BIT_ORMerge"
-"BIT_ORNull"
-"BIT_OROrDefault"
-"BIT_OROrNull"
-"BIT_ORResample"
-"BIT_ORSimpleState"
-"BIT_ORState"
-"BIT_XOR"
-"BIT_XORArgMax"
-"BIT_XORArgMin"
-"BIT_XORArray"
-"BIT_XORDistinct"
-"BIT_XORForEach"
-"BIT_XORIf"
-"BIT_XORMap"
-"BIT_XORMerge"
-"BIT_XORNull"
-"BIT_XOROrDefault"
-"BIT_XOROrNull"
-"BIT_XORResample"
-"BIT_XORSimpleState"
-"BIT_XORState"
-"BLAKE3"
-"CAST"
-"CHARACTER_LENGTH"
-"CHAR_LENGTH"
-"COVAR_POP"
-"COVAR_POPArgMax"
-"COVAR_POPArgMin"
-"COVAR_POPArray"
-"COVAR_POPDistinct"
-"COVAR_POPForEach"
-"COVAR_POPIf"
-"COVAR_POPMap"
-"COVAR_POPMerge"
-"COVAR_POPNull"
-"COVAR_POPOrDefault"
-"COVAR_POPOrNull"
-"COVAR_POPResample"
-"COVAR_POPSimpleState"
-"COVAR_POPState"
-"COVAR_SAMP"
-"COVAR_SAMPArgMax"
-"COVAR_SAMPArgMin"
-"COVAR_SAMPArray"
-"COVAR_SAMPDistinct"
-"COVAR_SAMPForEach"
-"COVAR_SAMPIf"
-"COVAR_SAMPMap"
-"COVAR_SAMPMerge"
-"COVAR_SAMPNull"
-"COVAR_SAMPOrDefault"
-"COVAR_SAMPOrNull"
-"COVAR_SAMPResample"
-"COVAR_SAMPSimpleState"
-"COVAR_SAMPState"
-"CRC32"
-"CRC32IEEE"
-"CRC64"
-"DATABASE"
-"DATE"
-"DATE_DIFF"
-"DATE_FORMAT"
-"DATE_TRUNC"
-"DAY"
-"DAYOFMONTH"
-"DAYOFWEEK"
-"DAYOFYEAR"
-"FORMAT_BYTES"
-"FQDN"
-"FROM_BASE64"
-"FROM_DAYS"
-"FROM_UNIXTIME"
-"HOUR"
-"INET6_ATON"
-"INET6_NTOA"
-"INET_ATON"
-"INET_NTOA"
-"IPv4CIDRToRange"
-"IPv4NumToString"
-"IPv4NumToStringClassC"
-"IPv4StringToNum"
-"IPv4StringToNumOrDefault"
-"IPv4StringToNumOrNull"
-"IPv4ToIPv6"
-"IPv6CIDRToRange"
-"IPv6NumToString"
-"IPv6StringToNum"
-"IPv6StringToNumOrDefault"
-"IPv6StringToNumOrNull"
-"JSONArrayLength"
-"JSONExtract"
-"JSONExtractArrayRaw"
-"JSONExtractBool"
-"JSONExtractFloat"
-"JSONExtractInt"
-"JSONExtractKeys"
-"JSONExtractKeysAndValues"
-"JSONExtractKeysAndValuesRaw"
-"JSONExtractRaw"
-"JSONExtractString"
-"JSONExtractUInt"
-"JSONHas"
-"JSONKey"
-"JSONLength"
-"JSONMergePatch"
-"JSONType"
-"JSON_ARRAY_LENGTH"
-"JSON_EXISTS"
-"JSON_QUERY"
-"JSON_VALUE"
-"L1Distance"
-"L1Norm"
-"L1Normalize"
-"L2Distance"
-"L2Norm"
-"L2Normalize"
-"L2SquaredDistance"
-"L2SquaredNorm"
-"LAST_DAY"
-"LinfDistance"
-"LinfNorm"
-"LinfNormalize"
-"LpDistance"
-"LpNorm"
-"LpNormalize"
-"MACNumToString"
-"MACStringToNum"
-"MACStringToOUI"
-"MAP_FROM_ARRAYS"
-"MD4"
-"MD5"
-"MILLISECOND"
-"MINUTE"
-"MONTH"
-"OCTET_LENGTH"
-"QUARTER"
-"REGEXP_EXTRACT"
-"REGEXP_MATCHES"
-"REGEXP_REPLACE"
-"SCHEMA"
-"SECOND"
-"SHA1"
-"SHA224"
-"SHA256"
-"SHA384"
-"SHA512"
-"SHA512_256"
-"STD"
-"STDArgMax"
-"STDArgMin"
-"STDArray"
-"STDDEV_POP"
-"STDDEV_POPArgMax"
-"STDDEV_POPArgMin"
-"STDDEV_POPArray"
-"STDDEV_POPDistinct"
-"STDDEV_POPForEach"
-"STDDEV_POPIf"
-"STDDEV_POPMap"
-"STDDEV_POPMerge"
-"STDDEV_POPNull"
-"STDDEV_POPOrDefault"
-"STDDEV_POPOrNull"
-"STDDEV_POPResample"
-"STDDEV_POPSimpleState"
-"STDDEV_POPState"
-"STDDEV_SAMP"
-"STDDEV_SAMPArgMax"
-"STDDEV_SAMPArgMin"
-"STDDEV_SAMPArray"
-"STDDEV_SAMPDistinct"
-"STDDEV_SAMPForEach"
-"STDDEV_SAMPIf"
-"STDDEV_SAMPMap"
-"STDDEV_SAMPMerge"
-"STDDEV_SAMPNull"
-"STDDEV_SAMPOrDefault"
-"STDDEV_SAMPOrNull"
-"STDDEV_SAMPResample"
-"STDDEV_SAMPSimpleState"
-"STDDEV_SAMPState"
-"STDDistinct"
-"STDForEach"
-"STDIf"
-"STDMap"
-"STDMerge"
-"STDNull"
-"STDOrDefault"
-"STDOrNull"
-"STDResample"
-"STDSimpleState"
-"STDState"
-"SUBSTRING_INDEX"
-"SVG"
-"TIMESTAMP_DIFF"
-"TO_BASE64"
-"TO_DAYS"
-"TO_UNIXTIME"
-"ULIDStringToDateTime"
-"URLHash"
-"URLHierarchy"
-"URLPathHierarchy"
-"UTCTimestamp"
-"UTC_timestamp"
-"UUIDNumToString"
-"UUIDStringToNum"
-"UUIDToNum"
-"UUIDv7ToDateTime"
-"VAR_POP"
-"VAR_POPArgMax"
-"VAR_POPArgMin"
-"VAR_POPArray"
-"VAR_POPDistinct"
-"VAR_POPForEach"
-"VAR_POPIf"
-"VAR_POPMap"
-"VAR_POPMerge"
-"VAR_POPNull"
-"VAR_POPOrDefault"
-"VAR_POPOrNull"
-"VAR_POPResample"
-"VAR_POPSimpleState"
-"VAR_POPState"
-"VAR_SAMP"
-"VAR_SAMPArgMax"
-"VAR_SAMPArgMin"
-"VAR_SAMPArray"
-"VAR_SAMPDistinct"
-"VAR_SAMPForEach"
-"VAR_SAMPIf"
-"VAR_SAMPMap"
-"VAR_SAMPMerge"
-"VAR_SAMPNull"
-"VAR_SAMPOrDefault"
-"VAR_SAMPOrNull"
-"VAR_SAMPResample"
-"VAR_SAMPSimpleState"
-"VAR_SAMPState"
+"AggregateFunction"
+"Array"
+"BIGINT"
+"BIGINT SIGNED"
+"BIGINT UNSIGNED"
+"BINARY"
+"BINARY LARGE OBJECT"
+"BINARY VARYING"
+"BIT"
+"BLOB"
+"BYTE"
+"BYTEA"
+"Bool"
+"CHAR"
+"CHAR LARGE OBJECT"
+"CHAR VARYING"
+"CHARACTER"
+"CHARACTER LARGE OBJECT"
+"CHARACTER VARYING"
+"CLOB"
+"DEC"
+"DOUBLE"
+"DOUBLE PRECISION"
+"Date"
+"Date32"
+"DateTime"
+"DateTime32"
+"DateTime64"
+"Decimal"
+"Decimal128"
+"Decimal256"
+"Decimal32"
+"Decimal64"
+"Dynamic"
+"ENUM"
+"Enum"
+"Enum16"
+"Enum8"
+"FIXED"
+"FLOAT"
+"FixedString"
+"Float32"
+"Float64"
+"GEOMETRY"
+"INET4"
+"INET6"
+"INT"
+"INT SIGNED"
+"INT UNSIGNED"
+"INT1"
+"INT1 SIGNED"
+"INT1 UNSIGNED"
+"INTEGER"
+"INTEGER SIGNED"
+"INTEGER UNSIGNED"
+"IPv4"
+"IPv6"
+"Int128"
+"Int16"
+"Int256"
+"Int32"
+"Int64"
+"Int8"
+"IntervalDay"
+"IntervalHour"
+"IntervalMicrosecond"
+"IntervalMillisecond"
+"IntervalMinute"
+"IntervalMonth"
+"IntervalNanosecond"
+"IntervalQuarter"
+"IntervalSecond"
+"IntervalWeek"
+"IntervalYear"
+"JSON"
+"LONGBLOB"
+"LONGTEXT"
+"LineString"
+"LowCardinality"
+"MEDIUMBLOB"
+"MEDIUMINT"
+"MEDIUMINT SIGNED"
+"MEDIUMINT UNSIGNED"
+"MEDIUMTEXT"
+"Map"
+"MultiLineString"
+"MultiPolygon"
+"NATIONAL CHAR"
+"NATIONAL CHAR VARYING"
+"NATIONAL CHARACTER"
+"NATIONAL CHARACTER LARGE OBJECT"
+"NATIONAL CHARACTER VARYING"
+"NCHAR"
+"NCHAR LARGE OBJECT"
+"NCHAR VARYING"
+"NUMERIC"
+"NVARCHAR"
+"Nested"
+"Nothing"
+"Nullable"
+"Object"
+"Point"
+"Polygon"
+"REAL"
+"Ring"
+"SET"
+"SIGNED"
+"SINGLE"
+"SMALLINT"
+"SMALLINT SIGNED"
+"SMALLINT UNSIGNED"
+"SimpleAggregateFunction"
+"String"
+"TEXT"
+"TIME"
+"TIMESTAMP"
+"TINYBLOB"
+"TINYINT"
+"TINYINT SIGNED"
+"TINYINT UNSIGNED"
+"TINYTEXT"
+"Tuple"
+"UInt128"
+"UInt16"
+"UInt256"
+"UInt32"
+"UInt64"
+"UInt8"
+"UNSIGNED"
+"UUID"
+"VARBINARY"
+"VARCHAR"
+"VARCHAR2"
+"Variant"
 "YEAR"
-"YYYYMMDDToDate"
-"YYYYMMDDToDate32"
-"YYYYMMDDhhmmssToDateTime"
-"YYYYMMDDhhmmssToDateTime64"
-"_CAST"
-"__actionName"
-"__bitBoolMaskAnd"
-"__bitBoolMaskOr"
-"__bitSwapLastTwo"
-"__bitWrapperFunc"
-"__getScalar"
-"__scalarSubqueryResult"
-"abs"
-"accurateCast"
-"accurateCastOrDefault"
-"accurateCastOrNull"
-"acos"
-"acosh"
-"addDate"
-"addDays"
-"addHours"
-"addInterval"
-"addMicroseconds"
-"addMilliseconds"
-"addMinutes"
-"addMonths"
-"addNanoseconds"
-"addQuarters"
-"addSeconds"
-"addTupleOfIntervals"
-"addWeeks"
-"addYears"
-"addressToLine"
-"addressToLineWithInlines"
-"addressToSymbol"
-"aes_decrypt_mysql"
-"aes_encrypt_mysql"
-"age"
-"aggThrow"
-"aggThrowArgMax"
-"aggThrowArgMin"
-"aggThrowArray"
-"aggThrowDistinct"
-"aggThrowForEach"
-"aggThrowIf"
-"aggThrowMap"
-"aggThrowMerge"
-"aggThrowNull"
-"aggThrowOrDefault"
-"aggThrowOrNull"
-"aggThrowResample"
-"aggThrowSimpleState"
-"aggThrowState"
-"alphaTokens"
-"analysisOfVariance"
-"analysisOfVarianceArgMax"
-"analysisOfVarianceArgMin"
-"analysisOfVarianceArray"
-"analysisOfVarianceDistinct"
-"analysisOfVarianceForEach"
-"analysisOfVarianceIf"
-"analysisOfVarianceMap"
-"analysisOfVarianceMerge"
-"analysisOfVarianceNull"
-"analysisOfVarianceOrDefault"
-"analysisOfVarianceOrNull"
-"analysisOfVarianceResample"
-"analysisOfVarianceSimpleState"
-"analysisOfVarianceState"
-"and"
-"anova"
-"anovaArgMax"
-"anovaArgMin"
-"anovaArray"
-"anovaDistinct"
-"anovaForEach"
-"anovaIf"
-"anovaMap"
-"anovaMerge"
-"anovaNull"
-"anovaOrDefault"
-"anovaOrNull"
-"anovaResample"
-"anovaSimpleState"
-"anovaState"
-"any"
-"anyArgMax"
-"anyArgMin"
-"anyArray"
-"anyDistinct"
-"anyForEach"
-"anyHeavy"
-"anyHeavyArgMax"
-"anyHeavyArgMin"
-"anyHeavyArray"
-"anyHeavyDistinct"
-"anyHeavyForEach"
-"anyHeavyIf"
-"anyHeavyMap"
-"anyHeavyMerge"
-"anyHeavyNull"
-"anyHeavyOrDefault"
-"anyHeavyOrNull"
-"anyHeavyResample"
-"anyHeavySimpleState"
-"anyHeavyState"
-"anyIf"
-"anyLast"
-"anyLastArgMax"
-"anyLastArgMin"
-"anyLastArray"
-"anyLastDistinct"
-"anyLastForEach"
-"anyLastIf"
-"anyLastMap"
-"anyLastMerge"
-"anyLastNull"
-"anyLastOrDefault"
-"anyLastOrNull"
-"anyLastResample"
-"anyLastSimpleState"
-"anyLastState"
-"anyLast_respect_nulls"
-"anyLast_respect_nullsArgMax"
-"anyLast_respect_nullsArgMin"
-"anyLast_respect_nullsArray"
-"anyLast_respect_nullsDistinct"
-"anyLast_respect_nullsForEach"
-"anyLast_respect_nullsIf"
-"anyLast_respect_nullsMap"
-"anyLast_respect_nullsMerge"
-"anyLast_respect_nullsNull"
-"anyLast_respect_nullsOrDefault"
-"anyLast_respect_nullsOrNull"
-"anyLast_respect_nullsResample"
-"anyLast_respect_nullsSimpleState"
-"anyLast_respect_nullsState"
-"anyMap"
-"anyMerge"
-"anyNull"
-"anyOrDefault"
-"anyOrNull"
-"anyResample"
-"anySimpleState"
-"anyState"
-"any_respect_nulls"
-"any_respect_nullsArgMax"
-"any_respect_nullsArgMin"
-"any_respect_nullsArray"
-"any_respect_nullsDistinct"
-"any_respect_nullsForEach"
-"any_respect_nullsIf"
-"any_respect_nullsMap"
-"any_respect_nullsMerge"
-"any_respect_nullsNull"
-"any_respect_nullsOrDefault"
-"any_respect_nullsOrNull"
-"any_respect_nullsResample"
-"any_respect_nullsSimpleState"
-"any_respect_nullsState"
-"any_value"
-"any_valueArgMax"
-"any_valueArgMin"
-"any_valueArray"
-"any_valueDistinct"
-"any_valueForEach"
-"any_valueIf"
-"any_valueMap"
-"any_valueMerge"
-"any_valueNull"
-"any_valueOrDefault"
-"any_valueOrNull"
-"any_valueResample"
-"any_valueSimpleState"
-"any_valueState"
-"any_value_respect_nulls"
-"any_value_respect_nullsArgMax"
-"any_value_respect_nullsArgMin"
-"any_value_respect_nullsArray"
-"any_value_respect_nullsDistinct"
-"any_value_respect_nullsForEach"
-"any_value_respect_nullsIf"
-"any_value_respect_nullsMap"
-"any_value_respect_nullsMerge"
-"any_value_respect_nullsNull"
-"any_value_respect_nullsOrDefault"
-"any_value_respect_nullsOrNull"
-"any_value_respect_nullsResample"
-"any_value_respect_nullsSimpleState"
-"any_value_respect_nullsState"
-"appendTrailingCharIfAbsent"
-"approx_top_count"
-"approx_top_countArgMax"
-"approx_top_countArgMin"
-"approx_top_countArray"
-"approx_top_countDistinct"
-"approx_top_countForEach"
-"approx_top_countIf"
-"approx_top_countMap"
-"approx_top_countMerge"
-"approx_top_countNull"
-"approx_top_countOrDefault"
-"approx_top_countOrNull"
-"approx_top_countResample"
-"approx_top_countSimpleState"
-"approx_top_countState"
-"approx_top_k"
-"approx_top_kArgMax"
-"approx_top_kArgMin"
-"approx_top_kArray"
-"approx_top_kDistinct"
-"approx_top_kForEach"
-"approx_top_kIf"
-"approx_top_kMap"
-"approx_top_kMerge"
-"approx_top_kNull"
-"approx_top_kOrDefault"
-"approx_top_kOrNull"
-"approx_top_kResample"
-"approx_top_kSimpleState"
-"approx_top_kState"
-"approx_top_sum"
-"approx_top_sumArgMax"
-"approx_top_sumArgMin"
-"approx_top_sumArray"
-"approx_top_sumDistinct"
-"approx_top_sumForEach"
-"approx_top_sumIf"
-"approx_top_sumMap"
-"approx_top_sumMerge"
-"approx_top_sumNull"
-"approx_top_sumOrDefault"
-"approx_top_sumOrNull"
-"approx_top_sumResample"
-"approx_top_sumSimpleState"
-"approx_top_sumState"
-"argMax"
-"argMaxArgMax"
-"argMaxArgMin"
-"argMaxArray"
-"argMaxDistinct"
-"argMaxForEach"
-"argMaxIf"
-"argMaxMap"
-"argMaxMerge"
-"argMaxNull"
-"argMaxOrDefault"
-"argMaxOrNull"
-"argMaxResample"
-"argMaxSimpleState"
-"argMaxState"
-"argMin"
-"argMinArgMax"
-"argMinArgMin"
-"argMinArray"
-"argMinDistinct"
-"argMinForEach"
-"argMinIf"
-"argMinMap"
-"argMinMerge"
-"argMinNull"
-"argMinOrDefault"
-"argMinOrNull"
-"argMinResample"
-"argMinSimpleState"
-"argMinState"
-"array"
-"arrayAUC"
-"arrayAll"
-"arrayAvg"
-"arrayCompact"
-"arrayConcat"
-"arrayCount"
-"arrayCumSum"
-"arrayCumSumNonNegative"
-"arrayDifference"
-"arrayDistinct"
-"arrayDotProduct"
-"arrayElement"
-"arrayEnumerate"
-"arrayEnumerateDense"
-"arrayEnumerateDenseRanked"
-"arrayEnumerateUniq"
-"arrayEnumerateUniqRanked"
-"arrayExists"
-"arrayFill"
-"arrayFilter"
-"arrayFirst"
-"arrayFirstIndex"
-"arrayFirstOrNull"
-"arrayFlatten"
-"arrayFold"
-"arrayIntersect"
-"arrayJaccardIndex"
-"arrayJoin"
-"arrayLast"
-"arrayLastIndex"
-"arrayLastOrNull"
-"arrayMap"
-"arrayMax"
-"arrayMin"
-"arrayPartialReverseSort"
-"arrayPartialShuffle"
-"arrayPartialSort"
-"arrayPopBack"
-"arrayPopFront"
-"arrayProduct"
-"arrayPushBack"
-"arrayPushFront"
-"arrayRandomSample"
-"arrayReduce"
-"arrayReduceInRanges"
-"arrayResize"
-"arrayReverse"
-"arrayReverseFill"
-"arrayReverseSort"
-"arrayReverseSplit"
-"arrayRotateLeft"
-"arrayRotateRight"
-"arrayShiftLeft"
-"arrayShiftRight"
-"arrayShingles"
-"arrayShuffle"
-"arraySlice"
-"arraySort"
-"arraySplit"
-"arrayStringConcat"
-"arraySum"
-"arrayUniq"
-"arrayWithConstant"
-"arrayZip"
-"array_agg"
-"array_aggArgMax"
-"array_aggArgMin"
-"array_aggArray"
-"array_aggDistinct"
-"array_aggForEach"
-"array_aggIf"
-"array_aggMap"
-"array_aggMerge"
-"array_aggNull"
-"array_aggOrDefault"
-"array_aggOrNull"
-"array_aggResample"
-"array_aggSimpleState"
-"array_aggState"
-"array_concat_agg"
-"array_concat_aggArgMax"
-"array_concat_aggArgMin"
-"array_concat_aggArray"
-"array_concat_aggDistinct"
-"array_concat_aggForEach"
-"array_concat_aggIf"
-"array_concat_aggMap"
-"array_concat_aggMerge"
-"array_concat_aggNull"
-"array_concat_aggOrDefault"
-"array_concat_aggOrNull"
-"array_concat_aggResample"
-"array_concat_aggSimpleState"
-"array_concat_aggState"
-"ascii"
-"asin"
-"asinh"
-"assumeNotNull"
-"atan"
-"atan2"
-"atanh"
-"avg"
-"avgArgMax"
-"avgArgMin"
-"avgArray"
-"avgDistinct"
-"avgForEach"
-"avgIf"
-"avgMap"
-"avgMerge"
-"avgNull"
-"avgOrDefault"
-"avgOrNull"
-"avgResample"
-"avgSimpleState"
-"avgState"
-"avgWeighted"
-"avgWeightedArgMax"
-"avgWeightedArgMin"
-"avgWeightedArray"
-"avgWeightedDistinct"
-"avgWeightedForEach"
-"avgWeightedIf"
-"avgWeightedMap"
-"avgWeightedMerge"
-"avgWeightedNull"
-"avgWeightedOrDefault"
-"avgWeightedOrNull"
-"avgWeightedResample"
-"avgWeightedSimpleState"
-"avgWeightedState"
-"bar"
-"base58Decode"
-"base58Encode"
-"base64Decode"
-"base64Encode"
-"base64URLDecode"
-"base64URLEncode"
-"basename"
-"bin"
-"bitAnd"
-"bitCount"
-"bitHammingDistance"
-"bitNot"
-"bitOr"
-"bitPositionsToArray"
-"bitRotateLeft"
-"bitRotateRight"
-"bitShiftLeft"
-"bitShiftRight"
-"bitSlice"
-"bitTest"
-"bitTestAll"
-"bitTestAny"
-"bitXor"
-"bitmapAnd"
-"bitmapAndCardinality"
-"bitmapAndnot"
-"bitmapAndnotCardinality"
-"bitmapBuild"
-"bitmapCardinality"
-"bitmapContains"
-"bitmapHasAll"
-"bitmapHasAny"
-"bitmapMax"
-"bitmapMin"
-"bitmapOr"
-"bitmapOrCardinality"
-"bitmapSubsetInRange"
-"bitmapSubsetLimit"
-"bitmapToArray"
-"bitmapTransform"
-"bitmapXor"
-"bitmapXorCardinality"
-"bitmaskToArray"
-"bitmaskToList"
-"blockNumber"
-"blockSerializedSize"
-"blockSize"
-"boundingRatio"
-"boundingRatioArgMax"
-"boundingRatioArgMin"
-"boundingRatioArray"
-"boundingRatioDistinct"
-"boundingRatioForEach"
-"boundingRatioIf"
-"boundingRatioMap"
-"boundingRatioMerge"
-"boundingRatioNull"
-"boundingRatioOrDefault"
-"boundingRatioOrNull"
-"boundingRatioResample"
-"boundingRatioSimpleState"
-"boundingRatioState"
-"buildId"
-"byteHammingDistance"
-"byteSize"
-"byteSlice"
-"byteSwap"
-"caseWithExpr"
-"caseWithExpression"
-"caseWithoutExpr"
-"caseWithoutExpression"
-"catboostEvaluate"
-"categoricalInformationValue"
-"categoricalInformationValueArgMax"
-"categoricalInformationValueArgMin"
-"categoricalInformationValueArray"
-"categoricalInformationValueDistinct"
-"categoricalInformationValueForEach"
-"categoricalInformationValueIf"
-"categoricalInformationValueMap"
-"categoricalInformationValueMerge"
-"categoricalInformationValueNull"
-"categoricalInformationValueOrDefault"
-"categoricalInformationValueOrNull"
-"categoricalInformationValueResample"
-"categoricalInformationValueSimpleState"
-"categoricalInformationValueState"
-"cbrt"
-"ceil"
-"ceiling"
-"changeDay"
-"changeHour"
-"changeMinute"
-"changeMonth"
-"changeSecond"
-"changeYear"
-"char"
-"cityHash64"
-"clamp"
-"coalesce"
-"concat"
-"concatAssumeInjective"
-"concatWithSeparator"
-"concatWithSeparatorAssumeInjective"
-"concat_ws"
-"connectionId"
-"connection_id"
-"contingency"
-"contingencyArgMax"
-"contingencyArgMin"
-"contingencyArray"
-"contingencyDistinct"
-"contingencyForEach"
-"contingencyIf"
-"contingencyMap"
-"contingencyMerge"
-"contingencyNull"
-"contingencyOrDefault"
-"contingencyOrNull"
-"contingencyResample"
-"contingencySimpleState"
-"contingencyState"
-"convertCharset"
-"corr"
-"corrArgMax"
-"corrArgMin"
-"corrArray"
-"corrDistinct"
-"corrForEach"
-"corrIf"
-"corrMap"
-"corrMatrix"
-"corrMatrixArgMax"
-"corrMatrixArgMin"
-"corrMatrixArray"
-"corrMatrixDistinct"
-"corrMatrixForEach"
-"corrMatrixIf"
-"corrMatrixMap"
-"corrMatrixMerge"
-"corrMatrixNull"
-"corrMatrixOrDefault"
-"corrMatrixOrNull"
-"corrMatrixResample"
-"corrMatrixSimpleState"
-"corrMatrixState"
-"corrMerge"
-"corrNull"
-"corrOrDefault"
-"corrOrNull"
-"corrResample"
-"corrSimpleState"
-"corrStable"
-"corrStableArgMax"
-"corrStableArgMin"
-"corrStableArray"
-"corrStableDistinct"
-"corrStableForEach"
-"corrStableIf"
-"corrStableMap"
-"corrStableMerge"
-"corrStableNull"
-"corrStableOrDefault"
-"corrStableOrNull"
-"corrStableResample"
-"corrStableSimpleState"
-"corrStableState"
-"corrState"
-"cos"
-"cosh"
-"cosineDistance"
-"count"
-"countArgMax"
-"countArgMin"
-"countArray"
-"countDigits"
-"countDistinct"
-"countEqual"
-"countForEach"
-"countIf"
-"countMap"
-"countMatches"
-"countMatchesCaseInsensitive"
-"countMerge"
-"countNull"
-"countOrDefault"
-"countOrNull"
-"countResample"
-"countSimpleState"
-"countState"
-"countSubstrings"
-"countSubstringsCaseInsensitive"
-"countSubstringsCaseInsensitiveUTF8"
-"covarPop"
-"covarPopArgMax"
-"covarPopArgMin"
-"covarPopArray"
-"covarPopDistinct"
-"covarPopForEach"
-"covarPopIf"
-"covarPopMap"
-"covarPopMatrix"
-"covarPopMatrixArgMax"
-"covarPopMatrixArgMin"
-"covarPopMatrixArray"
-"covarPopMatrixDistinct"
-"covarPopMatrixForEach"
-"covarPopMatrixIf"
-"covarPopMatrixMap"
-"covarPopMatrixMerge"
-"covarPopMatrixNull"
-"covarPopMatrixOrDefault"
-"covarPopMatrixOrNull"
-"covarPopMatrixResample"
-"covarPopMatrixSimpleState"
-"covarPopMatrixState"
-"covarPopMerge"
-"covarPopNull"
-"covarPopOrDefault"
-"covarPopOrNull"
-"covarPopResample"
-"covarPopSimpleState"
-"covarPopStable"
-"covarPopStableArgMax"
-"covarPopStableArgMin"
-"covarPopStableArray"
-"covarPopStableDistinct"
-"covarPopStableForEach"
-"covarPopStableIf"
-"covarPopStableMap"
-"covarPopStableMerge"
-"covarPopStableNull"
-"covarPopStableOrDefault"
-"covarPopStableOrNull"
-"covarPopStableResample"
-"covarPopStableSimpleState"
-"covarPopStableState"
-"covarPopState"
-"covarSamp"
-"covarSampArgMax"
-"covarSampArgMin"
-"covarSampArray"
-"covarSampDistinct"
-"covarSampForEach"
-"covarSampIf"
-"covarSampMap"
-"covarSampMatrix"
-"covarSampMatrixArgMax"
-"covarSampMatrixArgMin"
-"covarSampMatrixArray"
-"covarSampMatrixDistinct"
-"covarSampMatrixForEach"
-"covarSampMatrixIf"
-"covarSampMatrixMap"
-"covarSampMatrixMerge"
-"covarSampMatrixNull"
-"covarSampMatrixOrDefault"
-"covarSampMatrixOrNull"
-"covarSampMatrixResample"
-"covarSampMatrixSimpleState"
-"covarSampMatrixState"
-"covarSampMerge"
-"covarSampNull"
-"covarSampOrDefault"
-"covarSampOrNull"
-"covarSampResample"
-"covarSampSimpleState"
-"covarSampStable"
-"covarSampStableArgMax"
-"covarSampStableArgMin"
-"covarSampStableArray"
-"covarSampStableDistinct"
-"covarSampStableForEach"
-"covarSampStableIf"
-"covarSampStableMap"
-"covarSampStableMerge"
-"covarSampStableNull"
-"covarSampStableOrDefault"
-"covarSampStableOrNull"
-"covarSampStableResample"
-"covarSampStableSimpleState"
-"covarSampStableState"
-"covarSampState"
-"cramersV"
-"cramersVArgMax"
-"cramersVArgMin"
-"cramersVArray"
-"cramersVBiasCorrected"
-"cramersVBiasCorrectedArgMax"
-"cramersVBiasCorrectedArgMin"
-"cramersVBiasCorrectedArray"
-"cramersVBiasCorrectedDistinct"
-"cramersVBiasCorrectedForEach"
-"cramersVBiasCorrectedIf"
-"cramersVBiasCorrectedMap"
-"cramersVBiasCorrectedMerge"
-"cramersVBiasCorrectedNull"
-"cramersVBiasCorrectedOrDefault"
-"cramersVBiasCorrectedOrNull"
-"cramersVBiasCorrectedResample"
-"cramersVBiasCorrectedSimpleState"
-"cramersVBiasCorrectedState"
-"cramersVDistinct"
-"cramersVForEach"
-"cramersVIf"
-"cramersVMap"
-"cramersVMerge"
-"cramersVNull"
-"cramersVOrDefault"
-"cramersVOrNull"
-"cramersVResample"
-"cramersVSimpleState"
-"cramersVState"
-"curdate"
-"currentDatabase"
-"currentProfiles"
-"currentRoles"
-"currentSchemas"
-"currentUser"
-"current_database"
-"current_date"
-"current_schemas"
-"current_timestamp"
-"current_user"
-"cutFragment"
-"cutIPv6"
-"cutQueryString"
-"cutQueryStringAndFragment"
-"cutToFirstSignificantSubdomain"
-"cutToFirstSignificantSubdomainCustom"
-"cutToFirstSignificantSubdomainCustomRFC"
-"cutToFirstSignificantSubdomainCustomWithWWW"
-"cutToFirstSignificantSubdomainCustomWithWWWRFC"
-"cutToFirstSignificantSubdomainRFC"
-"cutToFirstSignificantSubdomainWithWWW"
-"cutToFirstSignificantSubdomainWithWWWRFC"
-"cutURLParameter"
-"cutWWW"
-"damerauLevenshteinDistance"
-"dateDiff"
-"dateName"
-"dateTime64ToSnowflake"
-"dateTime64ToSnowflakeID"
-"dateTimeToSnowflake"
-"dateTimeToSnowflakeID"
-"dateTrunc"
-"date_diff"
-"decodeHTMLComponent"
-"decodeURLComponent"
-"decodeURLFormComponent"
-"decodeXMLComponent"
-"decrypt"
-"defaultProfiles"
-"defaultRoles"
-"defaultValueOfArgumentType"
-"defaultValueOfTypeName"
-"degrees"
-"deltaSum"
-"deltaSumArgMax"
-"deltaSumArgMin"
-"deltaSumArray"
-"deltaSumDistinct"
-"deltaSumForEach"
-"deltaSumIf"
-"deltaSumMap"
-"deltaSumMerge"
-"deltaSumNull"
-"deltaSumOrDefault"
-"deltaSumOrNull"
-"deltaSumResample"
-"deltaSumSimpleState"
-"deltaSumState"
-"deltaSumTimestamp"
-"deltaSumTimestampArgMax"
-"deltaSumTimestampArgMin"
-"deltaSumTimestampArray"
-"deltaSumTimestampDistinct"
-"deltaSumTimestampForEach"
-"deltaSumTimestampIf"
-"deltaSumTimestampMap"
-"deltaSumTimestampMerge"
-"deltaSumTimestampNull"
-"deltaSumTimestampOrDefault"
-"deltaSumTimestampOrNull"
-"deltaSumTimestampResample"
-"deltaSumTimestampSimpleState"
-"deltaSumTimestampState"
-"demangle"
-"denseRank"
-"denseRankArgMax"
-"denseRankArgMin"
-"denseRankArray"
-"denseRankDistinct"
-"denseRankForEach"
-"denseRankIf"
-"denseRankMap"
-"denseRankMerge"
-"denseRankNull"
-"denseRankOrDefault"
-"denseRankOrNull"
-"denseRankResample"
-"denseRankSimpleState"
-"denseRankState"
-"dense_rank"
-"dense_rankArgMax"
-"dense_rankArgMin"
-"dense_rankArray"
-"dense_rankDistinct"
-"dense_rankForEach"
-"dense_rankIf"
-"dense_rankMap"
-"dense_rankMerge"
-"dense_rankNull"
-"dense_rankOrDefault"
-"dense_rankOrNull"
-"dense_rankResample"
-"dense_rankSimpleState"
-"dense_rankState"
-"detectCharset"
-"detectLanguage"
-"detectLanguageMixed"
-"detectLanguageUnknown"
-"detectProgrammingLanguage"
-"detectTonality"
-"dictGet"
-"dictGetAll"
-"dictGetChildren"
-"dictGetDate"
-"dictGetDateOrDefault"
-"dictGetDateTime"
-"dictGetDateTimeOrDefault"
-"dictGetDescendants"
-"dictGetFloat32"
-"dictGetFloat32OrDefault"
-"dictGetFloat64"
-"dictGetFloat64OrDefault"
-"dictGetHierarchy"
-"dictGetIPv4"
-"dictGetIPv4OrDefault"
-"dictGetIPv6"
-"dictGetIPv6OrDefault"
-"dictGetInt16"
-"dictGetInt16OrDefault"
-"dictGetInt32"
-"dictGetInt32OrDefault"
-"dictGetInt64"
-"dictGetInt64OrDefault"
-"dictGetInt8"
-"dictGetInt8OrDefault"
-"dictGetOrDefault"
-"dictGetOrNull"
-"dictGetString"
-"dictGetStringOrDefault"
-"dictGetUInt16"
-"dictGetUInt16OrDefault"
-"dictGetUInt32"
-"dictGetUInt32OrDefault"
-"dictGetUInt64"
-"dictGetUInt64OrDefault"
-"dictGetUInt8"
-"dictGetUInt8OrDefault"
-"dictGetUUID"
-"dictGetUUIDOrDefault"
-"dictHas"
-"dictIsIn"
-"displayName"
-"distanceL1"
-"distanceL2"
-"distanceL2Squared"
-"distanceLinf"
-"distanceLp"
-"divide"
-"divideDecimal"
-"domain"
-"domainRFC"
-"domainWithoutWWW"
-"domainWithoutWWWRFC"
-"dotProduct"
-"dumpColumnStructure"
-"dynamicElement"
-"dynamicType"
-"e"
-"editDistance"
-"editDistanceUTF8"
-"empty"
-"emptyArrayDate"
-"emptyArrayDateTime"
-"emptyArrayFloat32"
-"emptyArrayFloat64"
-"emptyArrayInt16"
-"emptyArrayInt32"
-"emptyArrayInt64"
-"emptyArrayInt8"
-"emptyArrayString"
-"emptyArrayToSingle"
-"emptyArrayUInt16"
-"emptyArrayUInt32"
-"emptyArrayUInt64"
-"emptyArrayUInt8"
-"enabledProfiles"
-"enabledRoles"
-"encodeURLComponent"
-"encodeURLFormComponent"
-"encodeXMLComponent"
-"encrypt"
-"endsWith"
-"endsWithUTF8"
-"entropy"
-"entropyArgMax"
-"entropyArgMin"
-"entropyArray"
-"entropyDistinct"
-"entropyForEach"
-"entropyIf"
-"entropyMap"
-"entropyMerge"
-"entropyNull"
-"entropyOrDefault"
-"entropyOrNull"
-"entropyResample"
-"entropySimpleState"
-"entropyState"
-"equals"
-"erf"
-"erfc"
-"errorCodeToName"
-"evalMLMethod"
-"exp"
-"exp10"
-"exp2"
-"exponentialMovingAverage"
-"exponentialMovingAverageArgMax"
-"exponentialMovingAverageArgMin"
-"exponentialMovingAverageArray"
-"exponentialMovingAverageDistinct"
-"exponentialMovingAverageForEach"
-"exponentialMovingAverageIf"
-"exponentialMovingAverageMap"
-"exponentialMovingAverageMerge"
-"exponentialMovingAverageNull"
-"exponentialMovingAverageOrDefault"
-"exponentialMovingAverageOrNull"
-"exponentialMovingAverageResample"
-"exponentialMovingAverageSimpleState"
-"exponentialMovingAverageState"
-"exponentialTimeDecayedAvg"
-"exponentialTimeDecayedAvgArgMax"
-"exponentialTimeDecayedAvgArgMin"
-"exponentialTimeDecayedAvgArray"
-"exponentialTimeDecayedAvgDistinct"
-"exponentialTimeDecayedAvgForEach"
-"exponentialTimeDecayedAvgIf"
-"exponentialTimeDecayedAvgMap"
-"exponentialTimeDecayedAvgMerge"
-"exponentialTimeDecayedAvgNull"
-"exponentialTimeDecayedAvgOrDefault"
-"exponentialTimeDecayedAvgOrNull"
-"exponentialTimeDecayedAvgResample"
-"exponentialTimeDecayedAvgSimpleState"
-"exponentialTimeDecayedAvgState"
-"exponentialTimeDecayedCount"
-"exponentialTimeDecayedCountArgMax"
-"exponentialTimeDecayedCountArgMin"
-"exponentialTimeDecayedCountArray"
-"exponentialTimeDecayedCountDistinct"
-"exponentialTimeDecayedCountForEach"
-"exponentialTimeDecayedCountIf"
-"exponentialTimeDecayedCountMap"
-"exponentialTimeDecayedCountMerge"
-"exponentialTimeDecayedCountNull"
-"exponentialTimeDecayedCountOrDefault"
-"exponentialTimeDecayedCountOrNull"
-"exponentialTimeDecayedCountResample"
-"exponentialTimeDecayedCountSimpleState"
-"exponentialTimeDecayedCountState"
-"exponentialTimeDecayedMax"
-"exponentialTimeDecayedMaxArgMax"
-"exponentialTimeDecayedMaxArgMin"
-"exponentialTimeDecayedMaxArray"
-"exponentialTimeDecayedMaxDistinct"
-"exponentialTimeDecayedMaxForEach"
-"exponentialTimeDecayedMaxIf"
-"exponentialTimeDecayedMaxMap"
-"exponentialTimeDecayedMaxMerge"
-"exponentialTimeDecayedMaxNull"
-"exponentialTimeDecayedMaxOrDefault"
-"exponentialTimeDecayedMaxOrNull"
-"exponentialTimeDecayedMaxResample"
-"exponentialTimeDecayedMaxSimpleState"
-"exponentialTimeDecayedMaxState"
-"exponentialTimeDecayedSum"
-"exponentialTimeDecayedSumArgMax"
-"exponentialTimeDecayedSumArgMin"
-"exponentialTimeDecayedSumArray"
-"exponentialTimeDecayedSumDistinct"
-"exponentialTimeDecayedSumForEach"
-"exponentialTimeDecayedSumIf"
-"exponentialTimeDecayedSumMap"
-"exponentialTimeDecayedSumMerge"
-"exponentialTimeDecayedSumNull"
-"exponentialTimeDecayedSumOrDefault"
-"exponentialTimeDecayedSumOrNull"
-"exponentialTimeDecayedSumResample"
-"exponentialTimeDecayedSumSimpleState"
-"exponentialTimeDecayedSumState"
-"extract"
-"extractAll"
-"extractAllGroups"
-"extractAllGroupsHorizontal"
-"extractAllGroupsVertical"
-"extractGroups"
-"extractKeyValuePairs"
-"extractKeyValuePairsWithEscaping"
-"extractTextFromHTML"
-"extractURLParameter"
-"extractURLParameterNames"
-"extractURLParameters"
-"factorial"
-"farmFingerprint64"
-"farmHash64"
-"file"
-"filesystemAvailable"
-"filesystemCapacity"
-"filesystemUnreserved"
-"finalizeAggregation"
-"firstLine"
-"firstSignificantSubdomain"
-"firstSignificantSubdomainCustom"
-"firstSignificantSubdomainCustomRFC"
-"firstSignificantSubdomainRFC"
-"first_value"
-"first_valueArgMax"
-"first_valueArgMin"
-"first_valueArray"
-"first_valueDistinct"
-"first_valueForEach"
-"first_valueIf"
-"first_valueMap"
-"first_valueMerge"
-"first_valueNull"
-"first_valueOrDefault"
-"first_valueOrNull"
-"first_valueResample"
-"first_valueSimpleState"
-"first_valueState"
-"first_value_respect_nulls"
-"first_value_respect_nullsArgMax"
-"first_value_respect_nullsArgMin"
-"first_value_respect_nullsArray"
-"first_value_respect_nullsDistinct"
-"first_value_respect_nullsForEach"
-"first_value_respect_nullsIf"
-"first_value_respect_nullsMap"
-"first_value_respect_nullsMerge"
-"first_value_respect_nullsNull"
-"first_value_respect_nullsOrDefault"
-"first_value_respect_nullsOrNull"
-"first_value_respect_nullsResample"
-"first_value_respect_nullsSimpleState"
-"first_value_respect_nullsState"
-"flameGraph"
-"flameGraphArgMax"
-"flameGraphArgMin"
-"flameGraphArray"
-"flameGraphDistinct"
-"flameGraphForEach"
-"flameGraphIf"
-"flameGraphMap"
-"flameGraphMerge"
-"flameGraphNull"
-"flameGraphOrDefault"
-"flameGraphOrNull"
-"flameGraphResample"
-"flameGraphSimpleState"
-"flameGraphState"
-"flatten"
-"flattenTuple"
-"floor"
-"format"
-"formatDateTime"
-"formatDateTimeInJodaSyntax"
-"formatQuery"
-"formatQueryOrNull"
-"formatQuerySingleLine"
-"formatQuerySingleLineOrNull"
-"formatReadableDecimalSize"
-"formatReadableQuantity"
-"formatReadableSize"
-"formatReadableTimeDelta"
-"formatRow"
-"formatRowNoNewline"
-"fragment"
-"fromDaysSinceYearZero"
-"fromDaysSinceYearZero32"
-"fromModifiedJulianDay"
-"fromModifiedJulianDayOrNull"
-"fromUTCTimestamp"
-"fromUnixTimestamp"
-"fromUnixTimestamp64Micro"
-"fromUnixTimestamp64Milli"
-"fromUnixTimestamp64Nano"
-"fromUnixTimestampInJodaSyntax"
-"from_utc_timestamp"
-"fullHostName"
-"fuzzBits"
-"gccMurmurHash"
-"gcd"
-"generateRandomStructure"
-"generateSnowflakeID"
-"generateULID"
-"generateUUIDv4"
-"generateUUIDv7"
-"geoDistance"
-"geoToH3"
-"geoToS2"
-"geohashDecode"
-"geohashEncode"
-"geohashesInBox"
-"getClientHTTPHeader"
-"getMacro"
-"getOSKernelVersion"
-"getServerPort"
-"getSetting"
-"getSizeOfEnumType"
-"getSubcolumn"
-"getTypeSerializationStreams"
-"globalIn"
-"globalInIgnoreSet"
-"globalNotIn"
-"globalNotInIgnoreSet"
-"globalNotNullIn"
-"globalNotNullInIgnoreSet"
-"globalNullIn"
-"globalNullInIgnoreSet"
-"globalVariable"
-"greatCircleAngle"
-"greatCircleDistance"
-"greater"
-"greaterOrEquals"
-"greatest"
-"groupArray"
-"groupArrayArgMax"
-"groupArrayArgMin"
-"groupArrayArray"
-"groupArrayDistinct"
-"groupArrayForEach"
-"groupArrayIf"
-"groupArrayInsertAt"
-"groupArrayInsertAtArgMax"
-"groupArrayInsertAtArgMin"
-"groupArrayInsertAtArray"
-"groupArrayInsertAtDistinct"
-"groupArrayInsertAtForEach"
-"groupArrayInsertAtIf"
-"groupArrayInsertAtMap"
-"groupArrayInsertAtMerge"
-"groupArrayInsertAtNull"
-"groupArrayInsertAtOrDefault"
-"groupArrayInsertAtOrNull"
-"groupArrayInsertAtResample"
-"groupArrayInsertAtSimpleState"
-"groupArrayInsertAtState"
-"groupArrayIntersect"
-"groupArrayIntersectArgMax"
-"groupArrayIntersectArgMin"
-"groupArrayIntersectArray"
-"groupArrayIntersectDistinct"
-"groupArrayIntersectForEach"
-"groupArrayIntersectIf"
-"groupArrayIntersectMap"
-"groupArrayIntersectMerge"
-"groupArrayIntersectNull"
-"groupArrayIntersectOrDefault"
-"groupArrayIntersectOrNull"
-"groupArrayIntersectResample"
-"groupArrayIntersectSimpleState"
-"groupArrayIntersectState"
-"groupArrayLast"
-"groupArrayLastArgMax"
-"groupArrayLastArgMin"
-"groupArrayLastArray"
-"groupArrayLastDistinct"
-"groupArrayLastForEach"
-"groupArrayLastIf"
-"groupArrayLastMap"
-"groupArrayLastMerge"
-"groupArrayLastNull"
-"groupArrayLastOrDefault"
-"groupArrayLastOrNull"
-"groupArrayLastResample"
-"groupArrayLastSimpleState"
-"groupArrayLastState"
-"groupArrayMap"
-"groupArrayMerge"
-"groupArrayMovingAvg"
-"groupArrayMovingAvgArgMax"
-"groupArrayMovingAvgArgMin"
-"groupArrayMovingAvgArray"
-"groupArrayMovingAvgDistinct"
-"groupArrayMovingAvgForEach"
-"groupArrayMovingAvgIf"
-"groupArrayMovingAvgMap"
-"groupArrayMovingAvgMerge"
-"groupArrayMovingAvgNull"
-"groupArrayMovingAvgOrDefault"
-"groupArrayMovingAvgOrNull"
-"groupArrayMovingAvgResample"
-"groupArrayMovingAvgSimpleState"
-"groupArrayMovingAvgState"
-"groupArrayMovingSum"
-"groupArrayMovingSumArgMax"
-"groupArrayMovingSumArgMin"
-"groupArrayMovingSumArray"
-"groupArrayMovingSumDistinct"
-"groupArrayMovingSumForEach"
-"groupArrayMovingSumIf"
-"groupArrayMovingSumMap"
-"groupArrayMovingSumMerge"
-"groupArrayMovingSumNull"
-"groupArrayMovingSumOrDefault"
-"groupArrayMovingSumOrNull"
-"groupArrayMovingSumResample"
-"groupArrayMovingSumSimpleState"
-"groupArrayMovingSumState"
-"groupArrayNull"
-"groupArrayOrDefault"
-"groupArrayOrNull"
-"groupArrayResample"
-"groupArraySample"
-"groupArraySampleArgMax"
-"groupArraySampleArgMin"
-"groupArraySampleArray"
-"groupArraySampleDistinct"
-"groupArraySampleForEach"
-"groupArraySampleIf"
-"groupArraySampleMap"
-"groupArraySampleMerge"
-"groupArraySampleNull"
-"groupArraySampleOrDefault"
-"groupArraySampleOrNull"
-"groupArraySampleResample"
-"groupArraySampleSimpleState"
-"groupArraySampleState"
-"groupArraySimpleState"
-"groupArraySorted"
-"groupArraySortedArgMax"
-"groupArraySortedArgMin"
-"groupArraySortedArray"
-"groupArraySortedDistinct"
-"groupArraySortedForEach"
-"groupArraySortedIf"
-"groupArraySortedMap"
-"groupArraySortedMerge"
-"groupArraySortedNull"
-"groupArraySortedOrDefault"
-"groupArraySortedOrNull"
-"groupArraySortedResample"
-"groupArraySortedSimpleState"
-"groupArraySortedState"
-"groupArrayState"
-"groupBitAnd"
-"groupBitAndArgMax"
-"groupBitAndArgMin"
-"groupBitAndArray"
-"groupBitAndDistinct"
-"groupBitAndForEach"
-"groupBitAndIf"
-"groupBitAndMap"
-"groupBitAndMerge"
-"groupBitAndNull"
-"groupBitAndOrDefault"
-"groupBitAndOrNull"
-"groupBitAndResample"
-"groupBitAndSimpleState"
-"groupBitAndState"
-"groupBitOr"
-"groupBitOrArgMax"
-"groupBitOrArgMin"
-"groupBitOrArray"
-"groupBitOrDistinct"
-"groupBitOrForEach"
-"groupBitOrIf"
-"groupBitOrMap"
-"groupBitOrMerge"
-"groupBitOrNull"
-"groupBitOrOrDefault"
-"groupBitOrOrNull"
-"groupBitOrResample"
-"groupBitOrSimpleState"
-"groupBitOrState"
-"groupBitXor"
-"groupBitXorArgMax"
-"groupBitXorArgMin"
-"groupBitXorArray"
-"groupBitXorDistinct"
-"groupBitXorForEach"
-"groupBitXorIf"
-"groupBitXorMap"
-"groupBitXorMerge"
-"groupBitXorNull"
-"groupBitXorOrDefault"
-"groupBitXorOrNull"
-"groupBitXorResample"
-"groupBitXorSimpleState"
-"groupBitXorState"
-"groupBitmap"
-"groupBitmapAnd"
-"groupBitmapAndArgMax"
-"groupBitmapAndArgMin"
-"groupBitmapAndArray"
-"groupBitmapAndDistinct"
-"groupBitmapAndForEach"
-"groupBitmapAndIf"
-"groupBitmapAndMap"
-"groupBitmapAndMerge"
-"groupBitmapAndNull"
-"groupBitmapAndOrDefault"
-"groupBitmapAndOrNull"
-"groupBitmapAndResample"
-"groupBitmapAndSimpleState"
-"groupBitmapAndState"
-"groupBitmapArgMax"
-"groupBitmapArgMin"
-"groupBitmapArray"
-"groupBitmapDistinct"
-"groupBitmapForEach"
-"groupBitmapIf"
-"groupBitmapMap"
-"groupBitmapMerge"
-"groupBitmapNull"
-"groupBitmapOr"
-"groupBitmapOrArgMax"
-"groupBitmapOrArgMin"
-"groupBitmapOrArray"
-"groupBitmapOrDefault"
-"groupBitmapOrDistinct"
-"groupBitmapOrForEach"
-"groupBitmapOrIf"
-"groupBitmapOrMap"
-"groupBitmapOrMerge"
-"groupBitmapOrNull"
-"groupBitmapOrNull"
-"groupBitmapOrOrDefault"
-"groupBitmapOrOrNull"
-"groupBitmapOrResample"
-"groupBitmapOrSimpleState"
-"groupBitmapOrState"
-"groupBitmapResample"
-"groupBitmapSimpleState"
-"groupBitmapState"
-"groupBitmapXor"
-"groupBitmapXorArgMax"
-"groupBitmapXorArgMin"
-"groupBitmapXorArray"
-"groupBitmapXorDistinct"
-"groupBitmapXorForEach"
-"groupBitmapXorIf"
-"groupBitmapXorMap"
-"groupBitmapXorMerge"
-"groupBitmapXorNull"
-"groupBitmapXorOrDefault"
-"groupBitmapXorOrNull"
-"groupBitmapXorResample"
-"groupBitmapXorSimpleState"
-"groupBitmapXorState"
-"groupConcat"
-"groupConcatArgMax"
-"groupConcatArgMin"
-"groupConcatArray"
-"groupConcatDistinct"
-"groupConcatForEach"
-"groupConcatIf"
-"groupConcatMap"
-"groupConcatMerge"
-"groupConcatNull"
-"groupConcatOrDefault"
-"groupConcatOrNull"
-"groupConcatResample"
-"groupConcatSimpleState"
-"groupConcatState"
-"groupUniqArray"
-"groupUniqArrayArgMax"
-"groupUniqArrayArgMin"
-"groupUniqArrayArray"
-"groupUniqArrayDistinct"
-"groupUniqArrayForEach"
-"groupUniqArrayIf"
-"groupUniqArrayMap"
-"groupUniqArrayMerge"
-"groupUniqArrayNull"
-"groupUniqArrayOrDefault"
-"groupUniqArrayOrNull"
-"groupUniqArrayResample"
-"groupUniqArraySimpleState"
-"groupUniqArrayState"
-"group_concat"
-"group_concatArgMax"
-"group_concatArgMin"
-"group_concatArray"
-"group_concatDistinct"
-"group_concatForEach"
-"group_concatIf"
-"group_concatMap"
-"group_concatMerge"
-"group_concatNull"
-"group_concatOrDefault"
-"group_concatOrNull"
-"group_concatResample"
-"group_concatSimpleState"
-"group_concatState"
-"h3CellAreaM2"
-"h3CellAreaRads2"
-"h3Distance"
-"h3EdgeAngle"
-"h3EdgeLengthKm"
-"h3EdgeLengthM"
-"h3ExactEdgeLengthKm"
-"h3ExactEdgeLengthM"
-"h3ExactEdgeLengthRads"
-"h3GetBaseCell"
-"h3GetDestinationIndexFromUnidirectionalEdge"
-"h3GetFaces"
-"h3GetIndexesFromUnidirectionalEdge"
-"h3GetOriginIndexFromUnidirectionalEdge"
-"h3GetPentagonIndexes"
-"h3GetRes0Indexes"
-"h3GetResolution"
-"h3GetUnidirectionalEdge"
-"h3GetUnidirectionalEdgeBoundary"
-"h3GetUnidirectionalEdgesFromHexagon"
-"h3HexAreaKm2"
-"h3HexAreaM2"
-"h3HexRing"
-"h3IndexesAreNeighbors"
-"h3IsPentagon"
-"h3IsResClassIII"
-"h3IsValid"
-"h3Line"
-"h3NumHexagons"
-"h3PointDistKm"
-"h3PointDistM"
-"h3PointDistRads"
-"h3ToCenterChild"
-"h3ToChildren"
-"h3ToGeo"
-"h3ToGeoBoundary"
-"h3ToParent"
-"h3ToString"
-"h3UnidirectionalEdgeIsValid"
-"h3kRing"
-"halfMD5"
-"has"
-"hasAll"
-"hasAny"
-"hasColumnInTable"
-"hasSubsequence"
-"hasSubsequenceCaseInsensitive"
-"hasSubsequenceCaseInsensitiveUTF8"
-"hasSubsequenceUTF8"
-"hasSubstr"
-"hasThreadFuzzer"
-"hasToken"
-"hasTokenCaseInsensitive"
-"hasTokenCaseInsensitiveOrNull"
-"hasTokenOrNull"
-"hex"
-"hilbertDecode"
-"hilbertEncode"
-"histogram"
-"histogramArgMax"
-"histogramArgMin"
-"histogramArray"
-"histogramDistinct"
-"histogramForEach"
-"histogramIf"
-"histogramMap"
-"histogramMerge"
-"histogramNull"
-"histogramOrDefault"
-"histogramOrNull"
-"histogramResample"
-"histogramSimpleState"
-"histogramState"
-"hiveHash"
-"hop"
-"hopEnd"
-"hopStart"
-"hostName"
-"hostname"
-"hypot"
-"identity"
-"idnaDecode"
-"idnaEncode"
-"if"
-"ifNotFinite"
-"ifNull"
-"ignore"
-"ilike"
-"in"
-"inIgnoreSet"
-"indexHint"
-"indexOf"
-"initcap"
-"initcapUTF8"
-"initialQueryID"
-"initial_query_id"
-"initializeAggregation"
-"instr"
-"intDiv"
-"intDivOrZero"
-"intExp10"
-"intExp2"
-"intHash32"
-"intHash64"
-"intervalLengthSum"
-"intervalLengthSumArgMax"
-"intervalLengthSumArgMin"
-"intervalLengthSumArray"
-"intervalLengthSumDistinct"
-"intervalLengthSumForEach"
-"intervalLengthSumIf"
-"intervalLengthSumMap"
-"intervalLengthSumMerge"
-"intervalLengthSumNull"
-"intervalLengthSumOrDefault"
-"intervalLengthSumOrNull"
-"intervalLengthSumResample"
-"intervalLengthSumSimpleState"
-"intervalLengthSumState"
-"isConstant"
-"isDecimalOverflow"
-"isFinite"
-"isIPAddressInRange"
-"isIPv4String"
-"isIPv6String"
-"isInfinite"
-"isNaN"
-"isNotDistinctFrom"
-"isNotNull"
-"isNull"
-"isNullable"
-"isValidJSON"
-"isValidUTF8"
-"isZeroOrNull"
-"jaroSimilarity"
-"jaroWinklerSimilarity"
-"javaHash"
-"javaHashUTF16LE"
-"joinGet"
-"joinGetOrNull"
-"jsonMergePatch"
-"jumpConsistentHash"
-"kafkaMurmurHash"
-"kolmogorovSmirnovTest"
-"kolmogorovSmirnovTestArgMax"
-"kolmogorovSmirnovTestArgMin"
-"kolmogorovSmirnovTestArray"
-"kolmogorovSmirnovTestDistinct"
-"kolmogorovSmirnovTestForEach"
-"kolmogorovSmirnovTestIf"
-"kolmogorovSmirnovTestMap"
-"kolmogorovSmirnovTestMerge"
-"kolmogorovSmirnovTestNull"
-"kolmogorovSmirnovTestOrDefault"
-"kolmogorovSmirnovTestOrNull"
-"kolmogorovSmirnovTestResample"
-"kolmogorovSmirnovTestSimpleState"
-"kolmogorovSmirnovTestState"
-"kostikConsistentHash"
-"kql_array_sort_asc"
-"kql_array_sort_desc"
-"kurtPop"
-"kurtPopArgMax"
-"kurtPopArgMin"
-"kurtPopArray"
-"kurtPopDistinct"
-"kurtPopForEach"
-"kurtPopIf"
-"kurtPopMap"
-"kurtPopMerge"
-"kurtPopNull"
-"kurtPopOrDefault"
-"kurtPopOrNull"
-"kurtPopResample"
-"kurtPopSimpleState"
-"kurtPopState"
-"kurtSamp"
-"kurtSampArgMax"
-"kurtSampArgMin"
-"kurtSampArray"
-"kurtSampDistinct"
-"kurtSampForEach"
-"kurtSampIf"
-"kurtSampMap"
-"kurtSampMerge"
-"kurtSampNull"
-"kurtSampOrDefault"
-"kurtSampOrNull"
-"kurtSampResample"
-"kurtSampSimpleState"
-"kurtSampState"
-"lagInFrame"
-"lagInFrameArgMax"
-"lagInFrameArgMin"
-"lagInFrameArray"
-"lagInFrameDistinct"
-"lagInFrameForEach"
-"lagInFrameIf"
-"lagInFrameMap"
-"lagInFrameMerge"
-"lagInFrameNull"
-"lagInFrameOrDefault"
-"lagInFrameOrNull"
-"lagInFrameResample"
-"lagInFrameSimpleState"
-"lagInFrameState"
-"largestTriangleThreeBuckets"
-"largestTriangleThreeBucketsArgMax"
-"largestTriangleThreeBucketsArgMin"
-"largestTriangleThreeBucketsArray"
-"largestTriangleThreeBucketsDistinct"
-"largestTriangleThreeBucketsForEach"
-"largestTriangleThreeBucketsIf"
-"largestTriangleThreeBucketsMap"
-"largestTriangleThreeBucketsMerge"
-"largestTriangleThreeBucketsNull"
-"largestTriangleThreeBucketsOrDefault"
-"largestTriangleThreeBucketsOrNull"
-"largestTriangleThreeBucketsResample"
-"largestTriangleThreeBucketsSimpleState"
-"largestTriangleThreeBucketsState"
-"last_value"
-"last_valueArgMax"
-"last_valueArgMin"
-"last_valueArray"
-"last_valueDistinct"
-"last_valueForEach"
-"last_valueIf"
-"last_valueMap"
-"last_valueMerge"
-"last_valueNull"
-"last_valueOrDefault"
-"last_valueOrNull"
-"last_valueResample"
-"last_valueSimpleState"
-"last_valueState"
-"last_value_respect_nulls"
-"last_value_respect_nullsArgMax"
-"last_value_respect_nullsArgMin"
-"last_value_respect_nullsArray"
-"last_value_respect_nullsDistinct"
-"last_value_respect_nullsForEach"
-"last_value_respect_nullsIf"
-"last_value_respect_nullsMap"
-"last_value_respect_nullsMerge"
-"last_value_respect_nullsNull"
-"last_value_respect_nullsOrDefault"
-"last_value_respect_nullsOrNull"
-"last_value_respect_nullsResample"
-"last_value_respect_nullsSimpleState"
-"last_value_respect_nullsState"
-"lcase"
-"lcm"
-"leadInFrame"
-"leadInFrameArgMax"
-"leadInFrameArgMin"
-"leadInFrameArray"
-"leadInFrameDistinct"
-"leadInFrameForEach"
-"leadInFrameIf"
-"leadInFrameMap"
-"leadInFrameMerge"
-"leadInFrameNull"
-"leadInFrameOrDefault"
-"leadInFrameOrNull"
-"leadInFrameResample"
-"leadInFrameSimpleState"
-"leadInFrameState"
-"least"
-"left"
-"leftPad"
-"leftPadUTF8"
-"leftUTF8"
-"lemmatize"
-"length"
-"lengthUTF8"
-"less"
-"lessOrEquals"
-"levenshteinDistance"
-"levenshteinDistanceUTF8"
-"lgamma"
-"like"
-"ln"
-"locate"
-"log"
-"log10"
-"log1p"
-"log2"
-"logTrace"
-"lowCardinalityIndices"
-"lowCardinalityKeys"
-"lower"
-"lowerUTF8"
-"lpad"
-"ltrim"
-"lttb"
-"lttbArgMax"
-"lttbArgMin"
-"lttbArray"
-"lttbDistinct"
-"lttbForEach"
-"lttbIf"
-"lttbMap"
-"lttbMerge"
-"lttbNull"
-"lttbOrDefault"
-"lttbOrNull"
-"lttbResample"
-"lttbSimpleState"
-"lttbState"
-"makeDate"
-"makeDate32"
-"makeDateTime"
-"makeDateTime64"
-"mannWhitneyUTest"
-"mannWhitneyUTestArgMax"
-"mannWhitneyUTestArgMin"
-"mannWhitneyUTestArray"
-"mannWhitneyUTestDistinct"
-"mannWhitneyUTestForEach"
-"mannWhitneyUTestIf"
-"mannWhitneyUTestMap"
-"mannWhitneyUTestMerge"
-"mannWhitneyUTestNull"
-"mannWhitneyUTestOrDefault"
-"mannWhitneyUTestOrNull"
-"mannWhitneyUTestResample"
-"mannWhitneyUTestSimpleState"
-"mannWhitneyUTestState"
-"map"
-"mapAdd"
-"mapAll"
-"mapApply"
-"mapConcat"
-"mapContains"
-"mapContainsKeyLike"
-"mapExists"
-"mapExtractKeyLike"
-"mapFilter"
-"mapFromArrays"
-"mapFromString"
-"mapKeys"
-"mapPartialReverseSort"
-"mapPartialSort"
-"mapPopulateSeries"
-"mapReverseSort"
-"mapSort"
-"mapSubtract"
-"mapUpdate"
-"mapValues"
-"match"
-"materialize"
-"max"
-"max2"
-"maxArgMax"
-"maxArgMin"
-"maxArray"
-"maxDistinct"
-"maxForEach"
-"maxIf"
-"maxIntersections"
-"maxIntersectionsArgMax"
-"maxIntersectionsArgMin"
-"maxIntersectionsArray"
-"maxIntersectionsDistinct"
-"maxIntersectionsForEach"
-"maxIntersectionsIf"
-"maxIntersectionsMap"
-"maxIntersectionsMerge"
-"maxIntersectionsNull"
-"maxIntersectionsOrDefault"
-"maxIntersectionsOrNull"
-"maxIntersectionsPosition"
-"maxIntersectionsPositionArgMax"
-"maxIntersectionsPositionArgMin"
-"maxIntersectionsPositionArray"
-"maxIntersectionsPositionDistinct"
-"maxIntersectionsPositionForEach"
-"maxIntersectionsPositionIf"
-"maxIntersectionsPositionMap"
-"maxIntersectionsPositionMerge"
-"maxIntersectionsPositionNull"
-"maxIntersectionsPositionOrDefault"
-"maxIntersectionsPositionOrNull"
-"maxIntersectionsPositionResample"
-"maxIntersectionsPositionSimpleState"
-"maxIntersectionsPositionState"
-"maxIntersectionsResample"
-"maxIntersectionsSimpleState"
-"maxIntersectionsState"
-"maxMap"
-"maxMappedArrays"
-"maxMappedArraysArgMax"
-"maxMappedArraysArgMin"
-"maxMappedArraysArray"
-"maxMappedArraysDistinct"
-"maxMappedArraysForEach"
-"maxMappedArraysIf"
-"maxMappedArraysMap"
-"maxMappedArraysMerge"
-"maxMappedArraysNull"
-"maxMappedArraysOrDefault"
-"maxMappedArraysOrNull"
-"maxMappedArraysResample"
-"maxMappedArraysSimpleState"
-"maxMappedArraysState"
-"maxMerge"
-"maxNull"
-"maxOrDefault"
-"maxOrNull"
-"maxResample"
-"maxSimpleState"
-"maxState"
-"meanZTest"
-"meanZTestArgMax"
-"meanZTestArgMin"
-"meanZTestArray"
-"meanZTestDistinct"
-"meanZTestForEach"
-"meanZTestIf"
-"meanZTestMap"
-"meanZTestMerge"
-"meanZTestNull"
-"meanZTestOrDefault"
-"meanZTestOrNull"
-"meanZTestResample"
-"meanZTestSimpleState"
-"meanZTestState"
-"median"
-"medianArgMax"
-"medianArgMin"
-"medianArray"
-"medianBFloat16"
-"medianBFloat16ArgMax"
-"medianBFloat16ArgMin"
-"medianBFloat16Array"
-"medianBFloat16Distinct"
-"medianBFloat16ForEach"
-"medianBFloat16If"
-"medianBFloat16Map"
-"medianBFloat16Merge"
-"medianBFloat16Null"
-"medianBFloat16OrDefault"
-"medianBFloat16OrNull"
-"medianBFloat16Resample"
-"medianBFloat16SimpleState"
-"medianBFloat16State"
-"medianBFloat16Weighted"
-"medianBFloat16WeightedArgMax"
-"medianBFloat16WeightedArgMin"
-"medianBFloat16WeightedArray"
-"medianBFloat16WeightedDistinct"
-"medianBFloat16WeightedForEach"
-"medianBFloat16WeightedIf"
-"medianBFloat16WeightedMap"
-"medianBFloat16WeightedMerge"
-"medianBFloat16WeightedNull"
-"medianBFloat16WeightedOrDefault"
-"medianBFloat16WeightedOrNull"
-"medianBFloat16WeightedResample"
-"medianBFloat16WeightedSimpleState"
-"medianBFloat16WeightedState"
-"medianDD"
-"medianDDArgMax"
-"medianDDArgMin"
-"medianDDArray"
-"medianDDDistinct"
-"medianDDForEach"
-"medianDDIf"
-"medianDDMap"
-"medianDDMerge"
-"medianDDNull"
-"medianDDOrDefault"
-"medianDDOrNull"
-"medianDDResample"
-"medianDDSimpleState"
-"medianDDState"
-"medianDeterministic"
-"medianDeterministicArgMax"
-"medianDeterministicArgMin"
-"medianDeterministicArray"
-"medianDeterministicDistinct"
-"medianDeterministicForEach"
-"medianDeterministicIf"
-"medianDeterministicMap"
-"medianDeterministicMerge"
-"medianDeterministicNull"
-"medianDeterministicOrDefault"
-"medianDeterministicOrNull"
-"medianDeterministicResample"
-"medianDeterministicSimpleState"
-"medianDeterministicState"
-"medianDistinct"
-"medianExact"
-"medianExactArgMax"
-"medianExactArgMin"
-"medianExactArray"
-"medianExactDistinct"
-"medianExactForEach"
-"medianExactHigh"
-"medianExactHighArgMax"
-"medianExactHighArgMin"
-"medianExactHighArray"
-"medianExactHighDistinct"
-"medianExactHighForEach"
-"medianExactHighIf"
-"medianExactHighMap"
-"medianExactHighMerge"
-"medianExactHighNull"
-"medianExactHighOrDefault"
-"medianExactHighOrNull"
-"medianExactHighResample"
-"medianExactHighSimpleState"
-"medianExactHighState"
-"medianExactIf"
-"medianExactLow"
-"medianExactLowArgMax"
-"medianExactLowArgMin"
-"medianExactLowArray"
-"medianExactLowDistinct"
-"medianExactLowForEach"
-"medianExactLowIf"
-"medianExactLowMap"
-"medianExactLowMerge"
-"medianExactLowNull"
-"medianExactLowOrDefault"
-"medianExactLowOrNull"
-"medianExactLowResample"
-"medianExactLowSimpleState"
-"medianExactLowState"
-"medianExactMap"
-"medianExactMerge"
-"medianExactNull"
-"medianExactOrDefault"
-"medianExactOrNull"
-"medianExactResample"
-"medianExactSimpleState"
-"medianExactState"
-"medianExactWeighted"
-"medianExactWeightedArgMax"
-"medianExactWeightedArgMin"
-"medianExactWeightedArray"
-"medianExactWeightedDistinct"
-"medianExactWeightedForEach"
-"medianExactWeightedIf"
-"medianExactWeightedMap"
-"medianExactWeightedMerge"
-"medianExactWeightedNull"
-"medianExactWeightedOrDefault"
-"medianExactWeightedOrNull"
-"medianExactWeightedResample"
-"medianExactWeightedSimpleState"
-"medianExactWeightedState"
-"medianForEach"
-"medianGK"
-"medianGKArgMax"
-"medianGKArgMin"
-"medianGKArray"
-"medianGKDistinct"
-"medianGKForEach"
-"medianGKIf"
-"medianGKMap"
-"medianGKMerge"
-"medianGKNull"
-"medianGKOrDefault"
-"medianGKOrNull"
-"medianGKResample"
-"medianGKSimpleState"
-"medianGKState"
-"medianIf"
-"medianInterpolatedWeighted"
-"medianInterpolatedWeightedArgMax"
-"medianInterpolatedWeightedArgMin"
-"medianInterpolatedWeightedArray"
-"medianInterpolatedWeightedDistinct"
-"medianInterpolatedWeightedForEach"
-"medianInterpolatedWeightedIf"
-"medianInterpolatedWeightedMap"
-"medianInterpolatedWeightedMerge"
-"medianInterpolatedWeightedNull"
-"medianInterpolatedWeightedOrDefault"
-"medianInterpolatedWeightedOrNull"
-"medianInterpolatedWeightedResample"
-"medianInterpolatedWeightedSimpleState"
-"medianInterpolatedWeightedState"
-"medianMap"
-"medianMerge"
-"medianNull"
-"medianOrDefault"
-"medianOrNull"
-"medianResample"
-"medianSimpleState"
-"medianState"
-"medianTDigest"
-"medianTDigestArgMax"
-"medianTDigestArgMin"
-"medianTDigestArray"
-"medianTDigestDistinct"
-"medianTDigestForEach"
-"medianTDigestIf"
-"medianTDigestMap"
-"medianTDigestMerge"
-"medianTDigestNull"
-"medianTDigestOrDefault"
-"medianTDigestOrNull"
-"medianTDigestResample"
-"medianTDigestSimpleState"
-"medianTDigestState"
-"medianTDigestWeighted"
-"medianTDigestWeightedArgMax"
-"medianTDigestWeightedArgMin"
-"medianTDigestWeightedArray"
-"medianTDigestWeightedDistinct"
-"medianTDigestWeightedForEach"
-"medianTDigestWeightedIf"
-"medianTDigestWeightedMap"
-"medianTDigestWeightedMerge"
-"medianTDigestWeightedNull"
-"medianTDigestWeightedOrDefault"
-"medianTDigestWeightedOrNull"
-"medianTDigestWeightedResample"
-"medianTDigestWeightedSimpleState"
-"medianTDigestWeightedState"
-"medianTiming"
-"medianTimingArgMax"
-"medianTimingArgMin"
-"medianTimingArray"
-"medianTimingDistinct"
-"medianTimingForEach"
-"medianTimingIf"
-"medianTimingMap"
-"medianTimingMerge"
-"medianTimingNull"
-"medianTimingOrDefault"
-"medianTimingOrNull"
-"medianTimingResample"
-"medianTimingSimpleState"
-"medianTimingState"
-"medianTimingWeighted"
-"medianTimingWeightedArgMax"
-"medianTimingWeightedArgMin"
-"medianTimingWeightedArray"
-"medianTimingWeightedDistinct"
-"medianTimingWeightedForEach"
-"medianTimingWeightedIf"
-"medianTimingWeightedMap"
-"medianTimingWeightedMerge"
-"medianTimingWeightedNull"
-"medianTimingWeightedOrDefault"
-"medianTimingWeightedOrNull"
-"medianTimingWeightedResample"
-"medianTimingWeightedSimpleState"
-"medianTimingWeightedState"
-"metroHash64"
-"mid"
-"min"
-"min2"
-"minArgMax"
-"minArgMin"
-"minArray"
-"minDistinct"
-"minForEach"
-"minIf"
-"minMap"
-"minMappedArrays"
-"minMappedArraysArgMax"
-"minMappedArraysArgMin"
-"minMappedArraysArray"
-"minMappedArraysDistinct"
-"minMappedArraysForEach"
-"minMappedArraysIf"
-"minMappedArraysMap"
-"minMappedArraysMerge"
-"minMappedArraysNull"
-"minMappedArraysOrDefault"
-"minMappedArraysOrNull"
-"minMappedArraysResample"
-"minMappedArraysSimpleState"
-"minMappedArraysState"
-"minMerge"
-"minNull"
-"minOrDefault"
-"minOrNull"
-"minResample"
-"minSampleSizeContinous"
-"minSampleSizeContinuous"
-"minSampleSizeConversion"
-"minSimpleState"
-"minState"
-"minus"
-"mismatches"
-"mod"
-"modulo"
-"moduloLegacy"
-"moduloOrZero"
-"monthName"
-"mortonDecode"
-"mortonEncode"
-"multiFuzzyMatchAllIndices"
-"multiFuzzyMatchAny"
-"multiFuzzyMatchAnyIndex"
-"multiIf"
-"multiMatchAllIndices"
-"multiMatchAny"
-"multiMatchAnyIndex"
-"multiSearchAllPositions"
-"multiSearchAllPositionsCaseInsensitive"
-"multiSearchAllPositionsCaseInsensitiveUTF8"
-"multiSearchAllPositionsUTF8"
-"multiSearchAny"
-"multiSearchAnyCaseInsensitive"
-"multiSearchAnyCaseInsensitiveUTF8"
-"multiSearchAnyUTF8"
-"multiSearchFirstIndex"
-"multiSearchFirstIndexCaseInsensitive"
-"multiSearchFirstIndexCaseInsensitiveUTF8"
-"multiSearchFirstIndexUTF8"
-"multiSearchFirstPosition"
-"multiSearchFirstPositionCaseInsensitive"
-"multiSearchFirstPositionCaseInsensitiveUTF8"
-"multiSearchFirstPositionUTF8"
-"multiply"
-"multiplyDecimal"
-"murmurHash2_32"
-"murmurHash2_64"
-"murmurHash3_128"
-"murmurHash3_32"
-"murmurHash3_64"
-"negate"
-"neighbor"
-"nested"
-"netloc"
-"ngramDistance"
-"ngramDistanceCaseInsensitive"
-"ngramDistanceCaseInsensitiveUTF8"
-"ngramDistanceUTF8"
-"ngramMinHash"
-"ngramMinHashArg"
-"ngramMinHashArgCaseInsensitive"
-"ngramMinHashArgCaseInsensitiveUTF8"
-"ngramMinHashArgUTF8"
-"ngramMinHashCaseInsensitive"
-"ngramMinHashCaseInsensitiveUTF8"
-"ngramMinHashUTF8"
-"ngramSearch"
-"ngramSearchCaseInsensitive"
-"ngramSearchCaseInsensitiveUTF8"
-"ngramSearchUTF8"
-"ngramSimHash"
-"ngramSimHashCaseInsensitive"
-"ngramSimHashCaseInsensitiveUTF8"
-"ngramSimHashUTF8"
-"ngrams"
-"nonNegativeDerivative"
-"nonNegativeDerivativeArgMax"
-"nonNegativeDerivativeArgMin"
-"nonNegativeDerivativeArray"
-"nonNegativeDerivativeDistinct"
-"nonNegativeDerivativeForEach"
-"nonNegativeDerivativeIf"
-"nonNegativeDerivativeMap"
-"nonNegativeDerivativeMerge"
-"nonNegativeDerivativeNull"
-"nonNegativeDerivativeOrDefault"
-"nonNegativeDerivativeOrNull"
-"nonNegativeDerivativeResample"
-"nonNegativeDerivativeSimpleState"
-"nonNegativeDerivativeState"
-"normL1"
-"normL2"
-"normL2Squared"
-"normLinf"
-"normLp"
-"normalizeL1"
-"normalizeL2"
-"normalizeLinf"
-"normalizeLp"
-"normalizeQuery"
-"normalizeQueryKeepNames"
-"normalizeUTF8NFC"
-"normalizeUTF8NFD"
-"normalizeUTF8NFKC"
-"normalizeUTF8NFKD"
-"normalizedQueryHash"
-"normalizedQueryHashKeepNames"
-"not"
-"notEmpty"
-"notEquals"
-"notILike"
-"notIn"
-"notInIgnoreSet"
-"notLike"
-"notNullIn"
-"notNullInIgnoreSet"
-"nothing"
-"nothingArgMax"
-"nothingArgMin"
-"nothingArray"
-"nothingDistinct"
-"nothingForEach"
-"nothingIf"
-"nothingMap"
-"nothingMerge"
-"nothingNull"
-"nothingNull"
-"nothingNullArgMax"
-"nothingNullArgMin"
-"nothingNullArray"
-"nothingNullDistinct"
-"nothingNullForEach"
-"nothingNullIf"
-"nothingNullMap"
-"nothingNullMerge"
-"nothingNullNull"
-"nothingNullOrDefault"
-"nothingNullOrNull"
-"nothingNullResample"
-"nothingNullSimpleState"
-"nothingNullState"
-"nothingOrDefault"
-"nothingOrNull"
-"nothingResample"
-"nothingSimpleState"
-"nothingState"
-"nothingUInt64"
-"nothingUInt64ArgMax"
-"nothingUInt64ArgMin"
-"nothingUInt64Array"
-"nothingUInt64Distinct"
-"nothingUInt64ForEach"
-"nothingUInt64If"
-"nothingUInt64Map"
-"nothingUInt64Merge"
-"nothingUInt64Null"
-"nothingUInt64OrDefault"
-"nothingUInt64OrNull"
-"nothingUInt64Resample"
-"nothingUInt64SimpleState"
-"nothingUInt64State"
-"now"
-"now64"
-"nowInBlock"
-"nth_value"
-"nth_valueArgMax"
-"nth_valueArgMin"
-"nth_valueArray"
-"nth_valueDistinct"
-"nth_valueForEach"
-"nth_valueIf"
-"nth_valueMap"
-"nth_valueMerge"
-"nth_valueNull"
-"nth_valueOrDefault"
-"nth_valueOrNull"
-"nth_valueResample"
-"nth_valueSimpleState"
-"nth_valueState"
-"ntile"
-"ntileArgMax"
-"ntileArgMin"
-"ntileArray"
-"ntileDistinct"
-"ntileForEach"
-"ntileIf"
-"ntileMap"
-"ntileMerge"
-"ntileNull"
-"ntileOrDefault"
-"ntileOrNull"
-"ntileResample"
-"ntileSimpleState"
-"ntileState"
-"nullIf"
-"nullIn"
-"nullInIgnoreSet"
-"or"
-"parseDateTime"
-"parseDateTime32BestEffort"
-"parseDateTime32BestEffortOrNull"
-"parseDateTime32BestEffortOrZero"
-"parseDateTime64BestEffort"
-"parseDateTime64BestEffortOrNull"
-"parseDateTime64BestEffortOrZero"
-"parseDateTime64BestEffortUS"
-"parseDateTime64BestEffortUSOrNull"
-"parseDateTime64BestEffortUSOrZero"
-"parseDateTimeBestEffort"
-"parseDateTimeBestEffortOrNull"
-"parseDateTimeBestEffortOrZero"
-"parseDateTimeBestEffortUS"
-"parseDateTimeBestEffortUSOrNull"
-"parseDateTimeBestEffortUSOrZero"
-"parseDateTimeInJodaSyntax"
-"parseDateTimeInJodaSyntaxOrNull"
-"parseDateTimeInJodaSyntaxOrZero"
-"parseDateTimeOrNull"
-"parseDateTimeOrZero"
-"parseReadableSize"
-"parseReadableSizeOrNull"
-"parseReadableSizeOrZero"
-"parseTimeDelta"
-"partitionID"
-"partitionId"
-"path"
-"pathFull"
-"percentRank"
-"percentRankArgMax"
-"percentRankArgMin"
-"percentRankArray"
-"percentRankDistinct"
-"percentRankForEach"
-"percentRankIf"
-"percentRankMap"
-"percentRankMerge"
-"percentRankNull"
-"percentRankOrDefault"
-"percentRankOrNull"
-"percentRankResample"
-"percentRankSimpleState"
-"percentRankState"
-"percent_rank"
-"percent_rankArgMax"
-"percent_rankArgMin"
-"percent_rankArray"
-"percent_rankDistinct"
-"percent_rankForEach"
-"percent_rankIf"
-"percent_rankMap"
-"percent_rankMerge"
-"percent_rankNull"
-"percent_rankOrDefault"
-"percent_rankOrNull"
-"percent_rankResample"
-"percent_rankSimpleState"
-"percent_rankState"
-"pi"
-"plus"
-"pmod"
-"pointInEllipses"
-"pointInPolygon"
-"polygonAreaCartesian"
-"polygonAreaSpherical"
-"polygonConvexHullCartesian"
-"polygonPerimeterCartesian"
-"polygonPerimeterSpherical"
-"polygonsDistanceCartesian"
-"polygonsDistanceSpherical"
-"polygonsEqualsCartesian"
-"polygonsIntersectionCartesian"
-"polygonsIntersectionSpherical"
-"polygonsSymDifferenceCartesian"
-"polygonsSymDifferenceSpherical"
-"polygonsUnionCartesian"
-"polygonsUnionSpherical"
-"polygonsWithinCartesian"
-"polygonsWithinSpherical"
-"port"
-"portRFC"
-"position"
-"positionCaseInsensitive"
-"positionCaseInsensitiveUTF8"
-"positionUTF8"
-"positiveModulo"
-"positive_modulo"
-"pow"
-"power"
-"printf"
-"proportionsZTest"
-"protocol"
-"punycodeDecode"
-"punycodeEncode"
-"quantile"
-"quantileArgMax"
-"quantileArgMin"
-"quantileArray"
-"quantileBFloat16"
-"quantileBFloat16ArgMax"
-"quantileBFloat16ArgMin"
-"quantileBFloat16Array"
-"quantileBFloat16Distinct"
-"quantileBFloat16ForEach"
-"quantileBFloat16If"
-"quantileBFloat16Map"
-"quantileBFloat16Merge"
-"quantileBFloat16Null"
-"quantileBFloat16OrDefault"
-"quantileBFloat16OrNull"
-"quantileBFloat16Resample"
-"quantileBFloat16SimpleState"
-"quantileBFloat16State"
-"quantileBFloat16Weighted"
-"quantileBFloat16WeightedArgMax"
-"quantileBFloat16WeightedArgMin"
-"quantileBFloat16WeightedArray"
-"quantileBFloat16WeightedDistinct"
-"quantileBFloat16WeightedForEach"
-"quantileBFloat16WeightedIf"
-"quantileBFloat16WeightedMap"
-"quantileBFloat16WeightedMerge"
-"quantileBFloat16WeightedNull"
-"quantileBFloat16WeightedOrDefault"
-"quantileBFloat16WeightedOrNull"
-"quantileBFloat16WeightedResample"
-"quantileBFloat16WeightedSimpleState"
-"quantileBFloat16WeightedState"
-"quantileDD"
-"quantileDDArgMax"
-"quantileDDArgMin"
-"quantileDDArray"
-"quantileDDDistinct"
-"quantileDDForEach"
-"quantileDDIf"
-"quantileDDMap"
-"quantileDDMerge"
-"quantileDDNull"
-"quantileDDOrDefault"
-"quantileDDOrNull"
-"quantileDDResample"
-"quantileDDSimpleState"
-"quantileDDState"
-"quantileDeterministic"
-"quantileDeterministicArgMax"
-"quantileDeterministicArgMin"
-"quantileDeterministicArray"
-"quantileDeterministicDistinct"
-"quantileDeterministicForEach"
-"quantileDeterministicIf"
-"quantileDeterministicMap"
-"quantileDeterministicMerge"
-"quantileDeterministicNull"
-"quantileDeterministicOrDefault"
-"quantileDeterministicOrNull"
-"quantileDeterministicResample"
-"quantileDeterministicSimpleState"
-"quantileDeterministicState"
-"quantileDistinct"
-"quantileExact"
-"quantileExactArgMax"
-"quantileExactArgMin"
-"quantileExactArray"
-"quantileExactDistinct"
-"quantileExactExclusive"
-"quantileExactExclusiveArgMax"
-"quantileExactExclusiveArgMin"
-"quantileExactExclusiveArray"
-"quantileExactExclusiveDistinct"
-"quantileExactExclusiveForEach"
-"quantileExactExclusiveIf"
-"quantileExactExclusiveMap"
-"quantileExactExclusiveMerge"
-"quantileExactExclusiveNull"
-"quantileExactExclusiveOrDefault"
-"quantileExactExclusiveOrNull"
-"quantileExactExclusiveResample"
-"quantileExactExclusiveSimpleState"
-"quantileExactExclusiveState"
-"quantileExactForEach"
-"quantileExactHigh"
-"quantileExactHighArgMax"
-"quantileExactHighArgMin"
-"quantileExactHighArray"
-"quantileExactHighDistinct"
-"quantileExactHighForEach"
-"quantileExactHighIf"
-"quantileExactHighMap"
-"quantileExactHighMerge"
-"quantileExactHighNull"
-"quantileExactHighOrDefault"
-"quantileExactHighOrNull"
-"quantileExactHighResample"
-"quantileExactHighSimpleState"
-"quantileExactHighState"
-"quantileExactIf"
-"quantileExactInclusive"
-"quantileExactInclusiveArgMax"
-"quantileExactInclusiveArgMin"
-"quantileExactInclusiveArray"
-"quantileExactInclusiveDistinct"
-"quantileExactInclusiveForEach"
-"quantileExactInclusiveIf"
-"quantileExactInclusiveMap"
-"quantileExactInclusiveMerge"
-"quantileExactInclusiveNull"
-"quantileExactInclusiveOrDefault"
-"quantileExactInclusiveOrNull"
-"quantileExactInclusiveResample"
-"quantileExactInclusiveSimpleState"
-"quantileExactInclusiveState"
-"quantileExactLow"
-"quantileExactLowArgMax"
-"quantileExactLowArgMin"
-"quantileExactLowArray"
-"quantileExactLowDistinct"
-"quantileExactLowForEach"
-"quantileExactLowIf"
-"quantileExactLowMap"
-"quantileExactLowMerge"
-"quantileExactLowNull"
-"quantileExactLowOrDefault"
-"quantileExactLowOrNull"
-"quantileExactLowResample"
-"quantileExactLowSimpleState"
-"quantileExactLowState"
-"quantileExactMap"
-"quantileExactMerge"
-"quantileExactNull"
-"quantileExactOrDefault"
-"quantileExactOrNull"
-"quantileExactResample"
-"quantileExactSimpleState"
-"quantileExactState"
-"quantileExactWeighted"
-"quantileExactWeightedArgMax"
-"quantileExactWeightedArgMin"
-"quantileExactWeightedArray"
-"quantileExactWeightedDistinct"
-"quantileExactWeightedForEach"
-"quantileExactWeightedIf"
-"quantileExactWeightedMap"
-"quantileExactWeightedMerge"
-"quantileExactWeightedNull"
-"quantileExactWeightedOrDefault"
-"quantileExactWeightedOrNull"
-"quantileExactWeightedResample"
-"quantileExactWeightedSimpleState"
-"quantileExactWeightedState"
-"quantileForEach"
-"quantileGK"
-"quantileGKArgMax"
-"quantileGKArgMin"
-"quantileGKArray"
-"quantileGKDistinct"
-"quantileGKForEach"
-"quantileGKIf"
-"quantileGKMap"
-"quantileGKMerge"
-"quantileGKNull"
-"quantileGKOrDefault"
-"quantileGKOrNull"
-"quantileGKResample"
-"quantileGKSimpleState"
-"quantileGKState"
-"quantileIf"
-"quantileInterpolatedWeighted"
-"quantileInterpolatedWeightedArgMax"
-"quantileInterpolatedWeightedArgMin"
-"quantileInterpolatedWeightedArray"
-"quantileInterpolatedWeightedDistinct"
-"quantileInterpolatedWeightedForEach"
-"quantileInterpolatedWeightedIf"
-"quantileInterpolatedWeightedMap"
-"quantileInterpolatedWeightedMerge"
-"quantileInterpolatedWeightedNull"
-"quantileInterpolatedWeightedOrDefault"
-"quantileInterpolatedWeightedOrNull"
-"quantileInterpolatedWeightedResample"
-"quantileInterpolatedWeightedSimpleState"
-"quantileInterpolatedWeightedState"
-"quantileMap"
-"quantileMerge"
-"quantileNull"
-"quantileOrDefault"
-"quantileOrNull"
-"quantileResample"
-"quantileSimpleState"
-"quantileState"
-"quantileTDigest"
-"quantileTDigestArgMax"
-"quantileTDigestArgMin"
-"quantileTDigestArray"
-"quantileTDigestDistinct"
-"quantileTDigestForEach"
-"quantileTDigestIf"
-"quantileTDigestMap"
-"quantileTDigestMerge"
-"quantileTDigestNull"
-"quantileTDigestOrDefault"
-"quantileTDigestOrNull"
-"quantileTDigestResample"
-"quantileTDigestSimpleState"
-"quantileTDigestState"
-"quantileTDigestWeighted"
-"quantileTDigestWeightedArgMax"
-"quantileTDigestWeightedArgMin"
-"quantileTDigestWeightedArray"
-"quantileTDigestWeightedDistinct"
-"quantileTDigestWeightedForEach"
-"quantileTDigestWeightedIf"
-"quantileTDigestWeightedMap"
-"quantileTDigestWeightedMerge"
-"quantileTDigestWeightedNull"
-"quantileTDigestWeightedOrDefault"
-"quantileTDigestWeightedOrNull"
-"quantileTDigestWeightedResample"
-"quantileTDigestWeightedSimpleState"
-"quantileTDigestWeightedState"
-"quantileTiming"
-"quantileTimingArgMax"
-"quantileTimingArgMin"
-"quantileTimingArray"
-"quantileTimingDistinct"
-"quantileTimingForEach"
-"quantileTimingIf"
-"quantileTimingMap"
-"quantileTimingMerge"
-"quantileTimingNull"
-"quantileTimingOrDefault"
-"quantileTimingOrNull"
-"quantileTimingResample"
-"quantileTimingSimpleState"
-"quantileTimingState"
-"quantileTimingWeighted"
-"quantileTimingWeightedArgMax"
-"quantileTimingWeightedArgMin"
-"quantileTimingWeightedArray"
-"quantileTimingWeightedDistinct"
-"quantileTimingWeightedForEach"
-"quantileTimingWeightedIf"
-"quantileTimingWeightedMap"
-"quantileTimingWeightedMerge"
-"quantileTimingWeightedNull"
-"quantileTimingWeightedOrDefault"
-"quantileTimingWeightedOrNull"
-"quantileTimingWeightedResample"
-"quantileTimingWeightedSimpleState"
-"quantileTimingWeightedState"
-"quantiles"
-"quantilesArgMax"
-"quantilesArgMin"
-"quantilesArray"
-"quantilesBFloat16"
-"quantilesBFloat16ArgMax"
-"quantilesBFloat16ArgMin"
-"quantilesBFloat16Array"
-"quantilesBFloat16Distinct"
-"quantilesBFloat16ForEach"
-"quantilesBFloat16If"
-"quantilesBFloat16Map"
-"quantilesBFloat16Merge"
-"quantilesBFloat16Null"
-"quantilesBFloat16OrDefault"
-"quantilesBFloat16OrNull"
-"quantilesBFloat16Resample"
-"quantilesBFloat16SimpleState"
-"quantilesBFloat16State"
-"quantilesBFloat16Weighted"
-"quantilesBFloat16WeightedArgMax"
-"quantilesBFloat16WeightedArgMin"
-"quantilesBFloat16WeightedArray"
-"quantilesBFloat16WeightedDistinct"
-"quantilesBFloat16WeightedForEach"
-"quantilesBFloat16WeightedIf"
-"quantilesBFloat16WeightedMap"
-"quantilesBFloat16WeightedMerge"
-"quantilesBFloat16WeightedNull"
-"quantilesBFloat16WeightedOrDefault"
-"quantilesBFloat16WeightedOrNull"
-"quantilesBFloat16WeightedResample"
-"quantilesBFloat16WeightedSimpleState"
-"quantilesBFloat16WeightedState"
-"quantilesDD"
-"quantilesDDArgMax"
-"quantilesDDArgMin"
-"quantilesDDArray"
-"quantilesDDDistinct"
-"quantilesDDForEach"
-"quantilesDDIf"
-"quantilesDDMap"
-"quantilesDDMerge"
-"quantilesDDNull"
-"quantilesDDOrDefault"
-"quantilesDDOrNull"
-"quantilesDDResample"
-"quantilesDDSimpleState"
-"quantilesDDState"
-"quantilesDeterministic"
-"quantilesDeterministicArgMax"
-"quantilesDeterministicArgMin"
-"quantilesDeterministicArray"
-"quantilesDeterministicDistinct"
-"quantilesDeterministicForEach"
-"quantilesDeterministicIf"
-"quantilesDeterministicMap"
-"quantilesDeterministicMerge"
-"quantilesDeterministicNull"
-"quantilesDeterministicOrDefault"
-"quantilesDeterministicOrNull"
-"quantilesDeterministicResample"
-"quantilesDeterministicSimpleState"
-"quantilesDeterministicState"
-"quantilesDistinct"
-"quantilesExact"
-"quantilesExactArgMax"
-"quantilesExactArgMin"
-"quantilesExactArray"
-"quantilesExactDistinct"
-"quantilesExactExclusive"
-"quantilesExactExclusiveArgMax"
-"quantilesExactExclusiveArgMin"
-"quantilesExactExclusiveArray"
-"quantilesExactExclusiveDistinct"
-"quantilesExactExclusiveForEach"
-"quantilesExactExclusiveIf"
-"quantilesExactExclusiveMap"
-"quantilesExactExclusiveMerge"
-"quantilesExactExclusiveNull"
-"quantilesExactExclusiveOrDefault"
-"quantilesExactExclusiveOrNull"
-"quantilesExactExclusiveResample"
-"quantilesExactExclusiveSimpleState"
-"quantilesExactExclusiveState"
-"quantilesExactForEach"
-"quantilesExactHigh"
-"quantilesExactHighArgMax"
-"quantilesExactHighArgMin"
-"quantilesExactHighArray"
-"quantilesExactHighDistinct"
-"quantilesExactHighForEach"
-"quantilesExactHighIf"
-"quantilesExactHighMap"
-"quantilesExactHighMerge"
-"quantilesExactHighNull"
-"quantilesExactHighOrDefault"
-"quantilesExactHighOrNull"
-"quantilesExactHighResample"
-"quantilesExactHighSimpleState"
-"quantilesExactHighState"
-"quantilesExactIf"
-"quantilesExactInclusive"
-"quantilesExactInclusiveArgMax"
-"quantilesExactInclusiveArgMin"
-"quantilesExactInclusiveArray"
-"quantilesExactInclusiveDistinct"
-"quantilesExactInclusiveForEach"
-"quantilesExactInclusiveIf"
-"quantilesExactInclusiveMap"
-"quantilesExactInclusiveMerge"
-"quantilesExactInclusiveNull"
-"quantilesExactInclusiveOrDefault"
-"quantilesExactInclusiveOrNull"
-"quantilesExactInclusiveResample"
-"quantilesExactInclusiveSimpleState"
-"quantilesExactInclusiveState"
-"quantilesExactLow"
-"quantilesExactLowArgMax"
-"quantilesExactLowArgMin"
-"quantilesExactLowArray"
-"quantilesExactLowDistinct"
-"quantilesExactLowForEach"
-"quantilesExactLowIf"
-"quantilesExactLowMap"
-"quantilesExactLowMerge"
-"quantilesExactLowNull"
-"quantilesExactLowOrDefault"
-"quantilesExactLowOrNull"
-"quantilesExactLowResample"
-"quantilesExactLowSimpleState"
-"quantilesExactLowState"
-"quantilesExactMap"
-"quantilesExactMerge"
-"quantilesExactNull"
-"quantilesExactOrDefault"
-"quantilesExactOrNull"
-"quantilesExactResample"
-"quantilesExactSimpleState"
-"quantilesExactState"
-"quantilesExactWeighted"
-"quantilesExactWeightedArgMax"
-"quantilesExactWeightedArgMin"
-"quantilesExactWeightedArray"
-"quantilesExactWeightedDistinct"
-"quantilesExactWeightedForEach"
-"quantilesExactWeightedIf"
-"quantilesExactWeightedMap"
-"quantilesExactWeightedMerge"
-"quantilesExactWeightedNull"
-"quantilesExactWeightedOrDefault"
-"quantilesExactWeightedOrNull"
-"quantilesExactWeightedResample"
-"quantilesExactWeightedSimpleState"
-"quantilesExactWeightedState"
-"quantilesForEach"
-"quantilesGK"
-"quantilesGKArgMax"
-"quantilesGKArgMin"
-"quantilesGKArray"
-"quantilesGKDistinct"
-"quantilesGKForEach"
-"quantilesGKIf"
-"quantilesGKMap"
-"quantilesGKMerge"
-"quantilesGKNull"
-"quantilesGKOrDefault"
-"quantilesGKOrNull"
-"quantilesGKResample"
-"quantilesGKSimpleState"
-"quantilesGKState"
-"quantilesIf"
-"quantilesInterpolatedWeighted"
-"quantilesInterpolatedWeightedArgMax"
-"quantilesInterpolatedWeightedArgMin"
-"quantilesInterpolatedWeightedArray"
-"quantilesInterpolatedWeightedDistinct"
-"quantilesInterpolatedWeightedForEach"
-"quantilesInterpolatedWeightedIf"
-"quantilesInterpolatedWeightedMap"
-"quantilesInterpolatedWeightedMerge"
-"quantilesInterpolatedWeightedNull"
-"quantilesInterpolatedWeightedOrDefault"
-"quantilesInterpolatedWeightedOrNull"
-"quantilesInterpolatedWeightedResample"
-"quantilesInterpolatedWeightedSimpleState"
-"quantilesInterpolatedWeightedState"
-"quantilesMap"
-"quantilesMerge"
-"quantilesNull"
-"quantilesOrDefault"
-"quantilesOrNull"
-"quantilesResample"
-"quantilesSimpleState"
-"quantilesState"
-"quantilesTDigest"
-"quantilesTDigestArgMax"
-"quantilesTDigestArgMin"
-"quantilesTDigestArray"
-"quantilesTDigestDistinct"
-"quantilesTDigestForEach"
-"quantilesTDigestIf"
-"quantilesTDigestMap"
-"quantilesTDigestMerge"
-"quantilesTDigestNull"
-"quantilesTDigestOrDefault"
-"quantilesTDigestOrNull"
-"quantilesTDigestResample"
-"quantilesTDigestSimpleState"
-"quantilesTDigestState"
-"quantilesTDigestWeighted"
-"quantilesTDigestWeightedArgMax"
-"quantilesTDigestWeightedArgMin"
-"quantilesTDigestWeightedArray"
-"quantilesTDigestWeightedDistinct"
-"quantilesTDigestWeightedForEach"
-"quantilesTDigestWeightedIf"
-"quantilesTDigestWeightedMap"
-"quantilesTDigestWeightedMerge"
-"quantilesTDigestWeightedNull"
-"quantilesTDigestWeightedOrDefault"
-"quantilesTDigestWeightedOrNull"
-"quantilesTDigestWeightedResample"
-"quantilesTDigestWeightedSimpleState"
-"quantilesTDigestWeightedState"
-"quantilesTiming"
-"quantilesTimingArgMax"
-"quantilesTimingArgMin"
-"quantilesTimingArray"
-"quantilesTimingDistinct"
-"quantilesTimingForEach"
-"quantilesTimingIf"
-"quantilesTimingMap"
-"quantilesTimingMerge"
-"quantilesTimingNull"
-"quantilesTimingOrDefault"
-"quantilesTimingOrNull"
-"quantilesTimingResample"
-"quantilesTimingSimpleState"
-"quantilesTimingState"
-"quantilesTimingWeighted"
-"quantilesTimingWeightedArgMax"
-"quantilesTimingWeightedArgMin"
-"quantilesTimingWeightedArray"
-"quantilesTimingWeightedDistinct"
-"quantilesTimingWeightedForEach"
-"quantilesTimingWeightedIf"
-"quantilesTimingWeightedMap"
-"quantilesTimingWeightedMerge"
-"quantilesTimingWeightedNull"
-"quantilesTimingWeightedOrDefault"
-"quantilesTimingWeightedOrNull"
-"quantilesTimingWeightedResample"
-"quantilesTimingWeightedSimpleState"
-"quantilesTimingWeightedState"
-"queryID"
-"queryString"
-"queryStringAndFragment"
-"query_id"
-"radians"
-"rand"
-"rand32"
-"rand64"
-"randBernoulli"
-"randBinomial"
-"randCanonical"
-"randChiSquared"
-"randConstant"
-"randExponential"
-"randFisherF"
-"randLogNormal"
-"randNegativeBinomial"
-"randNormal"
-"randPoisson"
-"randStudentT"
-"randUniform"
-"randomFixedString"
-"randomPrintableASCII"
-"randomString"
-"randomStringUTF8"
-"range"
-"rank"
-"rankArgMax"
-"rankArgMin"
-"rankArray"
-"rankCorr"
-"rankCorrArgMax"
-"rankCorrArgMin"
-"rankCorrArray"
-"rankCorrDistinct"
-"rankCorrForEach"
-"rankCorrIf"
-"rankCorrMap"
-"rankCorrMerge"
-"rankCorrNull"
-"rankCorrOrDefault"
-"rankCorrOrNull"
-"rankCorrResample"
-"rankCorrSimpleState"
-"rankCorrState"
-"rankDistinct"
-"rankForEach"
-"rankIf"
-"rankMap"
-"rankMerge"
-"rankNull"
-"rankOrDefault"
-"rankOrNull"
-"rankResample"
-"rankSimpleState"
-"rankState"
-"readWKTLineString"
-"readWKTMultiLineString"
-"readWKTMultiPolygon"
-"readWKTPoint"
-"readWKTPolygon"
-"readWKTRing"
-"regexpExtract"
-"regexpQuoteMeta"
-"regionHierarchy"
-"regionIn"
-"regionToArea"
-"regionToCity"
-"regionToContinent"
-"regionToCountry"
-"regionToDistrict"
-"regionToName"
-"regionToPopulation"
-"regionToTopContinent"
-"reinterpret"
-"reinterpretAsDate"
-"reinterpretAsDateTime"
-"reinterpretAsFixedString"
-"reinterpretAsFloat32"
-"reinterpretAsFloat64"
-"reinterpretAsInt128"
-"reinterpretAsInt16"
-"reinterpretAsInt256"
-"reinterpretAsInt32"
-"reinterpretAsInt64"
-"reinterpretAsInt8"
-"reinterpretAsString"
-"reinterpretAsUInt128"
-"reinterpretAsUInt16"
-"reinterpretAsUInt256"
-"reinterpretAsUInt32"
-"reinterpretAsUInt64"
-"reinterpretAsUInt8"
-"reinterpretAsUUID"
-"repeat"
-"replace"
-"replaceAll"
-"replaceOne"
-"replaceRegexpAll"
-"replaceRegexpOne"
-"replicate"
-"retention"
-"retentionArgMax"
-"retentionArgMin"
-"retentionArray"
-"retentionDistinct"
-"retentionForEach"
-"retentionIf"
-"retentionMap"
-"retentionMerge"
-"retentionNull"
-"retentionOrDefault"
-"retentionOrNull"
-"retentionResample"
-"retentionSimpleState"
-"retentionState"
-"reverse"
-"reverseUTF8"
-"revision"
-"right"
-"rightPad"
-"rightPadUTF8"
-"rightUTF8"
-"round"
-"roundAge"
-"roundBankers"
-"roundDown"
-"roundDuration"
-"roundToExp2"
-"rowNumberInAllBlocks"
-"rowNumberInBlock"
-"row_number"
-"row_numberArgMax"
-"row_numberArgMin"
-"row_numberArray"
-"row_numberDistinct"
-"row_numberForEach"
-"row_numberIf"
-"row_numberMap"
-"row_numberMerge"
-"row_numberNull"
-"row_numberOrDefault"
-"row_numberOrNull"
-"row_numberResample"
-"row_numberSimpleState"
-"row_numberState"
-"rpad"
-"rtrim"
-"runningAccumulate"
-"runningConcurrency"
-"runningDifference"
-"runningDifferenceStartingWithFirstValue"
-"s2CapContains"
-"s2CapUnion"
-"s2CellsIntersect"
-"s2GetNeighbors"
-"s2RectAdd"
-"s2RectContains"
-"s2RectIntersection"
-"s2RectUnion"
-"s2ToGeo"
-"scalarProduct"
-"sequenceCount"
-"sequenceCountArgMax"
-"sequenceCountArgMin"
-"sequenceCountArray"
-"sequenceCountDistinct"
-"sequenceCountForEach"
-"sequenceCountIf"
-"sequenceCountMap"
-"sequenceCountMerge"
-"sequenceCountNull"
-"sequenceCountOrDefault"
-"sequenceCountOrNull"
-"sequenceCountResample"
-"sequenceCountSimpleState"
-"sequenceCountState"
-"sequenceMatch"
-"sequenceMatchArgMax"
-"sequenceMatchArgMin"
-"sequenceMatchArray"
-"sequenceMatchDistinct"
-"sequenceMatchForEach"
-"sequenceMatchIf"
-"sequenceMatchMap"
-"sequenceMatchMerge"
-"sequenceMatchNull"
-"sequenceMatchOrDefault"
-"sequenceMatchOrNull"
-"sequenceMatchResample"
-"sequenceMatchSimpleState"
-"sequenceMatchState"
-"sequenceNextNode"
-"sequenceNextNodeArgMax"
-"sequenceNextNodeArgMin"
-"sequenceNextNodeArray"
-"sequenceNextNodeDistinct"
-"sequenceNextNodeForEach"
-"sequenceNextNodeIf"
-"sequenceNextNodeMap"
-"sequenceNextNodeMerge"
-"sequenceNextNodeNull"
-"sequenceNextNodeOrDefault"
-"sequenceNextNodeOrNull"
-"sequenceNextNodeResample"
-"sequenceNextNodeSimpleState"
-"sequenceNextNodeState"
-"seriesDecomposeSTL"
-"seriesOutliersDetectTukey"
-"seriesPeriodDetectFFT"
-"serverTimeZone"
-"serverTimezone"
-"serverUUID"
-"shardCount"
-"shardNum"
-"showCertificate"
-"sigmoid"
-"sign"
-"simpleJSONExtractBool"
-"simpleJSONExtractFloat"
-"simpleJSONExtractInt"
-"simpleJSONExtractRaw"
-"simpleJSONExtractString"
-"simpleJSONExtractUInt"
-"simpleJSONHas"
-"simpleLinearRegression"
-"simpleLinearRegressionArgMax"
-"simpleLinearRegressionArgMin"
-"simpleLinearRegressionArray"
-"simpleLinearRegressionDistinct"
-"simpleLinearRegressionForEach"
-"simpleLinearRegressionIf"
-"simpleLinearRegressionMap"
-"simpleLinearRegressionMerge"
-"simpleLinearRegressionNull"
-"simpleLinearRegressionOrDefault"
-"simpleLinearRegressionOrNull"
-"simpleLinearRegressionResample"
-"simpleLinearRegressionSimpleState"
-"simpleLinearRegressionState"
-"sin"
-"singleValueOrNull"
-"singleValueOrNullArgMax"
-"singleValueOrNullArgMin"
-"singleValueOrNullArray"
-"singleValueOrNullDistinct"
-"singleValueOrNullForEach"
-"singleValueOrNullIf"
-"singleValueOrNullMap"
-"singleValueOrNullMerge"
-"singleValueOrNullNull"
-"singleValueOrNullOrDefault"
-"singleValueOrNullOrNull"
-"singleValueOrNullResample"
-"singleValueOrNullSimpleState"
-"singleValueOrNullState"
-"sinh"
-"sipHash128"
-"sipHash128Keyed"
-"sipHash128Reference"
-"sipHash128ReferenceKeyed"
-"sipHash64"
-"sipHash64Keyed"
-"skewPop"
-"skewPopArgMax"
-"skewPopArgMin"
-"skewPopArray"
-"skewPopDistinct"
-"skewPopForEach"
-"skewPopIf"
-"skewPopMap"
-"skewPopMerge"
-"skewPopNull"
-"skewPopOrDefault"
-"skewPopOrNull"
-"skewPopResample"
-"skewPopSimpleState"
-"skewPopState"
-"skewSamp"
-"skewSampArgMax"
-"skewSampArgMin"
-"skewSampArray"
-"skewSampDistinct"
-"skewSampForEach"
-"skewSampIf"
-"skewSampMap"
-"skewSampMerge"
-"skewSampNull"
-"skewSampOrDefault"
-"skewSampOrNull"
-"skewSampResample"
-"skewSampSimpleState"
-"skewSampState"
-"sleep"
-"sleepEachRow"
-"snowflakeIDToDateTime"
-"snowflakeIDToDateTime64"
-"snowflakeToDateTime"
-"snowflakeToDateTime64"
-"soundex"
-"space"
-"sparkBar"
-"sparkBarArgMax"
-"sparkBarArgMin"
-"sparkBarArray"
-"sparkBarDistinct"
-"sparkBarForEach"
-"sparkBarIf"
-"sparkBarMap"
-"sparkBarMerge"
-"sparkBarNull"
-"sparkBarOrDefault"
-"sparkBarOrNull"
-"sparkBarResample"
-"sparkBarSimpleState"
-"sparkBarState"
-"sparkbar"
-"sparkbarArgMax"
-"sparkbarArgMin"
-"sparkbarArray"
-"sparkbarDistinct"
-"sparkbarForEach"
-"sparkbarIf"
-"sparkbarMap"
-"sparkbarMerge"
-"sparkbarNull"
-"sparkbarOrDefault"
-"sparkbarOrNull"
-"sparkbarResample"
-"sparkbarSimpleState"
-"sparkbarState"
-"splitByAlpha"
-"splitByChar"
-"splitByNonAlpha"
-"splitByRegexp"
-"splitByString"
-"splitByWhitespace"
-"sqid"
-"sqidDecode"
-"sqidEncode"
-"sqrt"
-"startsWith"
-"startsWithUTF8"
-"stddevPop"
-"stddevPopArgMax"
-"stddevPopArgMin"
-"stddevPopArray"
-"stddevPopDistinct"
-"stddevPopForEach"
-"stddevPopIf"
-"stddevPopMap"
-"stddevPopMerge"
-"stddevPopNull"
-"stddevPopOrDefault"
-"stddevPopOrNull"
-"stddevPopResample"
-"stddevPopSimpleState"
-"stddevPopStable"
-"stddevPopStableArgMax"
-"stddevPopStableArgMin"
-"stddevPopStableArray"
-"stddevPopStableDistinct"
-"stddevPopStableForEach"
-"stddevPopStableIf"
-"stddevPopStableMap"
-"stddevPopStableMerge"
-"stddevPopStableNull"
-"stddevPopStableOrDefault"
-"stddevPopStableOrNull"
-"stddevPopStableResample"
-"stddevPopStableSimpleState"
-"stddevPopStableState"
-"stddevPopState"
-"stddevSamp"
-"stddevSampArgMax"
-"stddevSampArgMin"
-"stddevSampArray"
-"stddevSampDistinct"
-"stddevSampForEach"
-"stddevSampIf"
-"stddevSampMap"
-"stddevSampMerge"
-"stddevSampNull"
-"stddevSampOrDefault"
-"stddevSampOrNull"
-"stddevSampResample"
-"stddevSampSimpleState"
-"stddevSampStable"
-"stddevSampStableArgMax"
-"stddevSampStableArgMin"
-"stddevSampStableArray"
-"stddevSampStableDistinct"
-"stddevSampStableForEach"
-"stddevSampStableIf"
-"stddevSampStableMap"
-"stddevSampStableMerge"
-"stddevSampStableNull"
-"stddevSampStableOrDefault"
-"stddevSampStableOrNull"
-"stddevSampStableResample"
-"stddevSampStableSimpleState"
-"stddevSampStableState"
-"stddevSampState"
-"stem"
-"stochasticLinearRegression"
-"stochasticLinearRegressionArgMax"
-"stochasticLinearRegressionArgMin"
-"stochasticLinearRegressionArray"
-"stochasticLinearRegressionDistinct"
-"stochasticLinearRegressionForEach"
-"stochasticLinearRegressionIf"
-"stochasticLinearRegressionMap"
-"stochasticLinearRegressionMerge"
-"stochasticLinearRegressionNull"
-"stochasticLinearRegressionOrDefault"
-"stochasticLinearRegressionOrNull"
-"stochasticLinearRegressionResample"
-"stochasticLinearRegressionSimpleState"
-"stochasticLinearRegressionState"
-"stochasticLogisticRegression"
-"stochasticLogisticRegressionArgMax"
-"stochasticLogisticRegressionArgMin"
-"stochasticLogisticRegressionArray"
-"stochasticLogisticRegressionDistinct"
-"stochasticLogisticRegressionForEach"
-"stochasticLogisticRegressionIf"
-"stochasticLogisticRegressionMap"
-"stochasticLogisticRegressionMerge"
-"stochasticLogisticRegressionNull"
-"stochasticLogisticRegressionOrDefault"
-"stochasticLogisticRegressionOrNull"
-"stochasticLogisticRegressionResample"
-"stochasticLogisticRegressionSimpleState"
-"stochasticLogisticRegressionState"
-"str_to_date"
-"str_to_map"
-"stringJaccardIndex"
-"stringJaccardIndexUTF8"
-"stringToH3"
-"structureToCapnProtoSchema"
-"structureToProtobufSchema"
-"studentTTest"
-"studentTTestArgMax"
-"studentTTestArgMin"
-"studentTTestArray"
-"studentTTestDistinct"
-"studentTTestForEach"
-"studentTTestIf"
-"studentTTestMap"
-"studentTTestMerge"
-"studentTTestNull"
-"studentTTestOrDefault"
-"studentTTestOrNull"
-"studentTTestResample"
-"studentTTestSimpleState"
-"studentTTestState"
-"subBitmap"
-"subDate"
-"substr"
-"substring"
-"substringIndex"
-"substringIndexUTF8"
-"substringUTF8"
-"subtractDays"
-"subtractHours"
-"subtractInterval"
-"subtractMicroseconds"
-"subtractMilliseconds"
-"subtractMinutes"
-"subtractMonths"
-"subtractNanoseconds"
-"subtractQuarters"
-"subtractSeconds"
-"subtractTupleOfIntervals"
-"subtractWeeks"
-"subtractYears"
-"sum"
-"sumArgMax"
-"sumArgMin"
-"sumArray"
-"sumCount"
-"sumCountArgMax"
-"sumCountArgMin"
-"sumCountArray"
-"sumCountDistinct"
-"sumCountForEach"
-"sumCountIf"
-"sumCountMap"
-"sumCountMerge"
-"sumCountNull"
-"sumCountOrDefault"
-"sumCountOrNull"
-"sumCountResample"
-"sumCountSimpleState"
-"sumCountState"
-"sumDistinct"
-"sumForEach"
-"sumIf"
-"sumKahan"
-"sumKahanArgMax"
-"sumKahanArgMin"
-"sumKahanArray"
-"sumKahanDistinct"
-"sumKahanForEach"
-"sumKahanIf"
-"sumKahanMap"
-"sumKahanMerge"
-"sumKahanNull"
-"sumKahanOrDefault"
-"sumKahanOrNull"
-"sumKahanResample"
-"sumKahanSimpleState"
-"sumKahanState"
-"sumMap"
-"sumMapFiltered"
-"sumMapFilteredArgMax"
-"sumMapFilteredArgMin"
-"sumMapFilteredArray"
-"sumMapFilteredDistinct"
-"sumMapFilteredForEach"
-"sumMapFilteredIf"
-"sumMapFilteredMap"
-"sumMapFilteredMerge"
-"sumMapFilteredNull"
-"sumMapFilteredOrDefault"
-"sumMapFilteredOrNull"
-"sumMapFilteredResample"
-"sumMapFilteredSimpleState"
-"sumMapFilteredState"
-"sumMapFilteredWithOverflow"
-"sumMapFilteredWithOverflowArgMax"
-"sumMapFilteredWithOverflowArgMin"
-"sumMapFilteredWithOverflowArray"
-"sumMapFilteredWithOverflowDistinct"
-"sumMapFilteredWithOverflowForEach"
-"sumMapFilteredWithOverflowIf"
-"sumMapFilteredWithOverflowMap"
-"sumMapFilteredWithOverflowMerge"
-"sumMapFilteredWithOverflowNull"
-"sumMapFilteredWithOverflowOrDefault"
-"sumMapFilteredWithOverflowOrNull"
-"sumMapFilteredWithOverflowResample"
-"sumMapFilteredWithOverflowSimpleState"
-"sumMapFilteredWithOverflowState"
-"sumMapWithOverflow"
-"sumMapWithOverflowArgMax"
-"sumMapWithOverflowArgMin"
-"sumMapWithOverflowArray"
-"sumMapWithOverflowDistinct"
-"sumMapWithOverflowForEach"
-"sumMapWithOverflowIf"
-"sumMapWithOverflowMap"
-"sumMapWithOverflowMerge"
-"sumMapWithOverflowNull"
-"sumMapWithOverflowOrDefault"
-"sumMapWithOverflowOrNull"
-"sumMapWithOverflowResample"
-"sumMapWithOverflowSimpleState"
-"sumMapWithOverflowState"
-"sumMappedArrays"
-"sumMappedArraysArgMax"
-"sumMappedArraysArgMin"
-"sumMappedArraysArray"
-"sumMappedArraysDistinct"
-"sumMappedArraysForEach"
-"sumMappedArraysIf"
-"sumMappedArraysMap"
-"sumMappedArraysMerge"
-"sumMappedArraysNull"
-"sumMappedArraysOrDefault"
-"sumMappedArraysOrNull"
-"sumMappedArraysResample"
-"sumMappedArraysSimpleState"
-"sumMappedArraysState"
-"sumMerge"
-"sumNull"
-"sumOrDefault"
-"sumOrNull"
-"sumResample"
-"sumSimpleState"
-"sumState"
-"sumWithOverflow"
-"sumWithOverflowArgMax"
-"sumWithOverflowArgMin"
-"sumWithOverflowArray"
-"sumWithOverflowDistinct"
-"sumWithOverflowForEach"
-"sumWithOverflowIf"
-"sumWithOverflowMap"
-"sumWithOverflowMerge"
-"sumWithOverflowNull"
-"sumWithOverflowOrDefault"
-"sumWithOverflowOrNull"
-"sumWithOverflowResample"
-"sumWithOverflowSimpleState"
-"sumWithOverflowState"
-"svg"
-"synonyms"
-"tan"
-"tanh"
-"tcpPort"
-"tgamma"
-"theilsU"
-"theilsUArgMax"
-"theilsUArgMin"
-"theilsUArray"
-"theilsUDistinct"
-"theilsUForEach"
-"theilsUIf"
-"theilsUMap"
-"theilsUMerge"
-"theilsUNull"
-"theilsUOrDefault"
-"theilsUOrNull"
-"theilsUResample"
-"theilsUSimpleState"
-"theilsUState"
-"throwIf"
-"tid"
-"timeDiff"
-"timeSlot"
-"timeSlots"
-"timeZone"
-"timeZoneOf"
-"timeZoneOffset"
-"timestamp"
-"timestampDiff"
-"timestamp_diff"
-"timezone"
-"timezoneOf"
-"timezoneOffset"
-"toBool"
-"toColumnTypeName"
-"toDate"
-"toDate32"
-"toDate32OrDefault"
-"toDate32OrNull"
-"toDate32OrZero"
-"toDateOrDefault"
-"toDateOrNull"
-"toDateOrZero"
-"toDateTime"
-"toDateTime32"
-"toDateTime64"
-"toDateTime64OrDefault"
-"toDateTime64OrNull"
-"toDateTime64OrZero"
-"toDateTimeOrDefault"
-"toDateTimeOrNull"
-"toDateTimeOrZero"
-"toDayOfMonth"
-"toDayOfWeek"
-"toDayOfYear"
-"toDaysSinceYearZero"
-"toDecimal128"
-"toDecimal128OrDefault"
-"toDecimal128OrNull"
-"toDecimal128OrZero"
-"toDecimal256"
-"toDecimal256OrDefault"
-"toDecimal256OrNull"
-"toDecimal256OrZero"
-"toDecimal32"
-"toDecimal32OrDefault"
-"toDecimal32OrNull"
-"toDecimal32OrZero"
-"toDecimal64"
-"toDecimal64OrDefault"
-"toDecimal64OrNull"
-"toDecimal64OrZero"
-"toDecimalString"
-"toFixedString"
-"toFloat32"
-"toFloat32OrDefault"
-"toFloat32OrNull"
-"toFloat32OrZero"
-"toFloat64"
-"toFloat64OrDefault"
-"toFloat64OrNull"
-"toFloat64OrZero"
-"toHour"
-"toIPv4"
-"toIPv4OrDefault"
-"toIPv4OrNull"
-"toIPv4OrZero"
-"toIPv6"
-"toIPv6OrDefault"
-"toIPv6OrNull"
-"toIPv6OrZero"
-"toISOWeek"
-"toISOYear"
-"toInt128"
-"toInt128OrDefault"
-"toInt128OrNull"
-"toInt128OrZero"
-"toInt16"
-"toInt16OrDefault"
-"toInt16OrNull"
-"toInt16OrZero"
-"toInt256"
-"toInt256OrDefault"
-"toInt256OrNull"
-"toInt256OrZero"
-"toInt32"
-"toInt32OrDefault"
-"toInt32OrNull"
-"toInt32OrZero"
-"toInt64"
-"toInt64OrDefault"
-"toInt64OrNull"
-"toInt64OrZero"
-"toInt8"
-"toInt8OrDefault"
-"toInt8OrNull"
-"toInt8OrZero"
-"toIntervalDay"
-"toIntervalHour"
-"toIntervalMicrosecond"
-"toIntervalMillisecond"
-"toIntervalMinute"
-"toIntervalMonth"
-"toIntervalNanosecond"
-"toIntervalQuarter"
-"toIntervalSecond"
-"toIntervalWeek"
-"toIntervalYear"
-"toJSONString"
-"toLastDayOfMonth"
-"toLastDayOfWeek"
-"toLowCardinality"
-"toMillisecond"
-"toMinute"
-"toModifiedJulianDay"
-"toModifiedJulianDayOrNull"
-"toMonday"
-"toMonth"
-"toNullable"
-"toQuarter"
-"toRelativeDayNum"
-"toRelativeHourNum"
-"toRelativeMinuteNum"
-"toRelativeMonthNum"
-"toRelativeQuarterNum"
-"toRelativeSecondNum"
-"toRelativeWeekNum"
-"toRelativeYearNum"
-"toSecond"
-"toStartOfDay"
-"toStartOfFifteenMinutes"
-"toStartOfFiveMinute"
-"toStartOfFiveMinutes"
-"toStartOfHour"
-"toStartOfISOYear"
-"toStartOfInterval"
-"toStartOfMicrosecond"
-"toStartOfMillisecond"
-"toStartOfMinute"
-"toStartOfMonth"
-"toStartOfNanosecond"
-"toStartOfQuarter"
-"toStartOfSecond"
-"toStartOfTenMinutes"
-"toStartOfWeek"
-"toStartOfYear"
-"toString"
-"toStringCutToZero"
-"toTime"
-"toTimeZone"
-"toTimezone"
-"toTypeName"
-"toUInt128"
-"toUInt128OrDefault"
-"toUInt128OrNull"
-"toUInt128OrZero"
-"toUInt16"
-"toUInt16OrDefault"
-"toUInt16OrNull"
-"toUInt16OrZero"
-"toUInt256"
-"toUInt256OrDefault"
-"toUInt256OrNull"
-"toUInt256OrZero"
-"toUInt32"
-"toUInt32OrDefault"
-"toUInt32OrNull"
-"toUInt32OrZero"
-"toUInt64"
-"toUInt64OrDefault"
-"toUInt64OrNull"
-"toUInt64OrZero"
-"toUInt8"
-"toUInt8OrDefault"
-"toUInt8OrNull"
-"toUInt8OrZero"
-"toUTCTimestamp"
-"toUUID"
-"toUUIDOrDefault"
-"toUUIDOrNull"
-"toUUIDOrZero"
-"toUnixTimestamp"
-"toUnixTimestamp64Micro"
-"toUnixTimestamp64Milli"
-"toUnixTimestamp64Nano"
-"toValidUTF8"
-"toWeek"
-"toYYYYMM"
-"toYYYYMMDD"
-"toYYYYMMDDhhmmss"
-"toYear"
-"toYearWeek"
-"to_utc_timestamp"
-"today"
-"tokens"
-"topK"
-"topKArgMax"
-"topKArgMin"
-"topKArray"
-"topKDistinct"
-"topKForEach"
-"topKIf"
-"topKMap"
-"topKMerge"
-"topKNull"
-"topKOrDefault"
-"topKOrNull"
-"topKResample"
-"topKSimpleState"
-"topKState"
-"topKWeighted"
-"topKWeightedArgMax"
-"topKWeightedArgMin"
-"topKWeightedArray"
-"topKWeightedDistinct"
-"topKWeightedForEach"
-"topKWeightedIf"
-"topKWeightedMap"
-"topKWeightedMerge"
-"topKWeightedNull"
-"topKWeightedOrDefault"
-"topKWeightedOrNull"
-"topKWeightedResample"
-"topKWeightedSimpleState"
-"topKWeightedState"
-"topLevelDomain"
-"topLevelDomainRFC"
-"transactionID"
-"transactionLatestSnapshot"
-"transactionOldestSnapshot"
-"transform"
-"translate"
-"translateUTF8"
-"trim"
-"trimBoth"
-"trimLeft"
-"trimRight"
-"trunc"
-"truncate"
-"tryBase58Decode"
-"tryBase64Decode"
-"tryBase64URLDecode"
-"tryDecrypt"
-"tryIdnaEncode"
-"tryPunycodeDecode"
-"tumble"
-"tumbleEnd"
-"tumbleStart"
-"tuple"
-"tupleConcat"
-"tupleDivide"
-"tupleDivideByNumber"
-"tupleElement"
-"tupleHammingDistance"
-"tupleIntDiv"
-"tupleIntDivByNumber"
-"tupleIntDivOrZero"
-"tupleIntDivOrZeroByNumber"
-"tupleMinus"
-"tupleModulo"
-"tupleModuloByNumber"
-"tupleMultiply"
-"tupleMultiplyByNumber"
-"tupleNames"
-"tupleNegate"
-"tuplePlus"
-"tupleToNameValuePairs"
-"ucase"
-"unbin"
-"unhex"
-"uniq"
-"uniqArgMax"
-"uniqArgMin"
-"uniqArray"
-"uniqCombined"
-"uniqCombined64"
-"uniqCombined64ArgMax"
-"uniqCombined64ArgMin"
-"uniqCombined64Array"
-"uniqCombined64Distinct"
-"uniqCombined64ForEach"
-"uniqCombined64If"
-"uniqCombined64Map"
-"uniqCombined64Merge"
-"uniqCombined64Null"
-"uniqCombined64OrDefault"
-"uniqCombined64OrNull"
-"uniqCombined64Resample"
-"uniqCombined64SimpleState"
-"uniqCombined64State"
-"uniqCombinedArgMax"
-"uniqCombinedArgMin"
-"uniqCombinedArray"
-"uniqCombinedDistinct"
-"uniqCombinedForEach"
-"uniqCombinedIf"
-"uniqCombinedMap"
-"uniqCombinedMerge"
-"uniqCombinedNull"
-"uniqCombinedOrDefault"
-"uniqCombinedOrNull"
-"uniqCombinedResample"
-"uniqCombinedSimpleState"
-"uniqCombinedState"
-"uniqDistinct"
-"uniqExact"
-"uniqExactArgMax"
-"uniqExactArgMin"
-"uniqExactArray"
-"uniqExactDistinct"
-"uniqExactForEach"
-"uniqExactIf"
-"uniqExactMap"
-"uniqExactMerge"
-"uniqExactNull"
-"uniqExactOrDefault"
-"uniqExactOrNull"
-"uniqExactResample"
-"uniqExactSimpleState"
-"uniqExactState"
-"uniqForEach"
-"uniqHLL12"
-"uniqHLL12ArgMax"
-"uniqHLL12ArgMin"
-"uniqHLL12Array"
-"uniqHLL12Distinct"
-"uniqHLL12ForEach"
-"uniqHLL12If"
-"uniqHLL12Map"
-"uniqHLL12Merge"
-"uniqHLL12Null"
-"uniqHLL12OrDefault"
-"uniqHLL12OrNull"
-"uniqHLL12Resample"
-"uniqHLL12SimpleState"
-"uniqHLL12State"
-"uniqIf"
-"uniqMap"
-"uniqMerge"
-"uniqNull"
-"uniqOrDefault"
-"uniqOrNull"
-"uniqResample"
-"uniqSimpleState"
-"uniqState"
-"uniqTheta"
-"uniqThetaArgMax"
-"uniqThetaArgMin"
-"uniqThetaArray"
-"uniqThetaDistinct"
-"uniqThetaForEach"
-"uniqThetaIf"
-"uniqThetaIntersect"
-"uniqThetaMap"
-"uniqThetaMerge"
-"uniqThetaNot"
-"uniqThetaNull"
-"uniqThetaOrDefault"
-"uniqThetaOrNull"
-"uniqThetaResample"
-"uniqThetaSimpleState"
-"uniqThetaState"
-"uniqThetaUnion"
-"uniqUpTo"
-"uniqUpToArgMax"
-"uniqUpToArgMin"
-"uniqUpToArray"
-"uniqUpToDistinct"
-"uniqUpToForEach"
-"uniqUpToIf"
-"uniqUpToMap"
-"uniqUpToMerge"
-"uniqUpToNull"
-"uniqUpToOrDefault"
-"uniqUpToOrNull"
-"uniqUpToResample"
-"uniqUpToSimpleState"
-"uniqUpToState"
-"upper"
-"upperUTF8"
-"uptime"
-"user"
-"validateNestedArraySizes"
-"varPop"
-"varPopArgMax"
-"varPopArgMin"
-"varPopArray"
-"varPopDistinct"
-"varPopForEach"
-"varPopIf"
-"varPopMap"
-"varPopMerge"
-"varPopNull"
-"varPopOrDefault"
-"varPopOrNull"
-"varPopResample"
-"varPopSimpleState"
-"varPopStable"
-"varPopStableArgMax"
-"varPopStableArgMin"
-"varPopStableArray"
-"varPopStableDistinct"
-"varPopStableForEach"
-"varPopStableIf"
-"varPopStableMap"
-"varPopStableMerge"
-"varPopStableNull"
-"varPopStableOrDefault"
-"varPopStableOrNull"
-"varPopStableResample"
-"varPopStableSimpleState"
-"varPopStableState"
-"varPopState"
-"varSamp"
-"varSampArgMax"
-"varSampArgMin"
-"varSampArray"
-"varSampDistinct"
-"varSampForEach"
-"varSampIf"
-"varSampMap"
-"varSampMerge"
-"varSampNull"
-"varSampOrDefault"
-"varSampOrNull"
-"varSampResample"
-"varSampSimpleState"
-"varSampStable"
-"varSampStableArgMax"
-"varSampStableArgMin"
-"varSampStableArray"
-"varSampStableDistinct"
-"varSampStableForEach"
-"varSampStableIf"
-"varSampStableMap"
-"varSampStableMerge"
-"varSampStableNull"
-"varSampStableOrDefault"
-"varSampStableOrNull"
-"varSampStableResample"
-"varSampStableSimpleState"
-"varSampStableState"
-"varSampState"
-"variantElement"
-"variantType"
-"vectorDifference"
-"vectorSum"
-"version"
-"visibleWidth"
-"visitParamExtractBool"
-"visitParamExtractFloat"
-"visitParamExtractInt"
-"visitParamExtractRaw"
-"visitParamExtractString"
-"visitParamExtractUInt"
-"visitParamHas"
-"week"
-"welchTTest"
-"welchTTestArgMax"
-"welchTTestArgMin"
-"welchTTestArray"
-"welchTTestDistinct"
-"welchTTestForEach"
-"welchTTestIf"
-"welchTTestMap"
-"welchTTestMerge"
-"welchTTestNull"
-"welchTTestOrDefault"
-"welchTTestOrNull"
-"welchTTestResample"
-"welchTTestSimpleState"
-"welchTTestState"
-"widthBucket"
-"width_bucket"
-"windowFunnel"
-"windowFunnelArgMax"
-"windowFunnelArgMin"
-"windowFunnelArray"
-"windowFunnelDistinct"
-"windowFunnelForEach"
-"windowFunnelIf"
-"windowFunnelMap"
-"windowFunnelMerge"
-"windowFunnelNull"
-"windowFunnelOrDefault"
-"windowFunnelOrNull"
-"windowFunnelResample"
-"windowFunnelSimpleState"
-"windowFunnelState"
-"windowID"
-"wkt"
-"wordShingleMinHash"
-"wordShingleMinHashArg"
-"wordShingleMinHashArgCaseInsensitive"
-"wordShingleMinHashArgCaseInsensitiveUTF8"
-"wordShingleMinHashArgUTF8"
-"wordShingleMinHashCaseInsensitive"
-"wordShingleMinHashCaseInsensitiveUTF8"
-"wordShingleMinHashUTF8"
-"wordShingleSimHash"
-"wordShingleSimHashCaseInsensitive"
-"wordShingleSimHashCaseInsensitiveUTF8"
-"wordShingleSimHashUTF8"
-"wyHash64"
-"xor"
-"xxHash32"
-"xxHash64"
-"xxh3"
-"yandexConsistentHash"
-"yearweek"
-"yesterday"
-"zookeeperSessionUptime"
+"bool"
+"boolean"
diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict
index e562595fb67..e69de29bb2d 100644
--- a/tests/fuzz/dictionaries/functions.dict
+++ b/tests/fuzz/dictionaries/functions.dict
@@ -1,4283 +0,0 @@
-"BIT_AND"
-"BIT_ANDArgMax"
-"BIT_ANDArgMin"
-"BIT_ANDArray"
-"BIT_ANDDistinct"
-"BIT_ANDForEach"
-"BIT_ANDIf"
-"BIT_ANDMap"
-"BIT_ANDMerge"
-"BIT_ANDNull"
-"BIT_ANDOrDefault"
-"BIT_ANDOrNull"
-"BIT_ANDResample"
-"BIT_ANDSimpleState"
-"BIT_ANDState"
-"BIT_OR"
-"BIT_ORArgMax"
-"BIT_ORArgMin"
-"BIT_ORArray"
-"BIT_ORDistinct"
-"BIT_ORForEach"
-"BIT_ORIf"
-"BIT_ORMap"
-"BIT_ORMerge"
-"BIT_ORNull"
-"BIT_OROrDefault"
-"BIT_OROrNull"
-"BIT_ORResample"
-"BIT_ORSimpleState"
-"BIT_ORState"
-"BIT_XOR"
-"BIT_XORArgMax"
-"BIT_XORArgMin"
-"BIT_XORArray"
-"BIT_XORDistinct"
-"BIT_XORForEach"
-"BIT_XORIf"
-"BIT_XORMap"
-"BIT_XORMerge"
-"BIT_XORNull"
-"BIT_XOROrDefault"
-"BIT_XOROrNull"
-"BIT_XORResample"
-"BIT_XORSimpleState"
-"BIT_XORState"
-"BLAKE3"
-"CAST"
-"CHARACTER_LENGTH"
-"CHAR_LENGTH"
-"COVAR_POP"
-"COVAR_POPArgMax"
-"COVAR_POPArgMin"
-"COVAR_POPArray"
-"COVAR_POPDistinct"
-"COVAR_POPForEach"
-"COVAR_POPIf"
-"COVAR_POPMap"
-"COVAR_POPMerge"
-"COVAR_POPNull"
-"COVAR_POPOrDefault"
-"COVAR_POPOrNull"
-"COVAR_POPResample"
-"COVAR_POPSimpleState"
-"COVAR_POPState"
-"COVAR_SAMP"
-"COVAR_SAMPArgMax"
-"COVAR_SAMPArgMin"
-"COVAR_SAMPArray"
-"COVAR_SAMPDistinct"
-"COVAR_SAMPForEach"
-"COVAR_SAMPIf"
-"COVAR_SAMPMap"
-"COVAR_SAMPMerge"
-"COVAR_SAMPNull"
-"COVAR_SAMPOrDefault"
-"COVAR_SAMPOrNull"
-"COVAR_SAMPResample"
-"COVAR_SAMPSimpleState"
-"COVAR_SAMPState"
-"CRC32"
-"CRC32IEEE"
-"CRC64"
-"DATABASE"
-"DATE"
-"DATE_DIFF"
-"DATE_FORMAT"
-"DATE_TRUNC"
-"DAY"
-"DAYOFMONTH"
-"DAYOFWEEK"
-"DAYOFYEAR"
-"FORMAT_BYTES"
-"FQDN"
-"FROM_BASE64"
-"FROM_DAYS"
-"FROM_UNIXTIME"
-"HOUR"
-"INET6_ATON"
-"INET6_NTOA"
-"INET_ATON"
-"INET_NTOA"
-"IPv4CIDRToRange"
-"IPv4NumToString"
-"IPv4NumToStringClassC"
-"IPv4StringToNum"
-"IPv4StringToNumOrDefault"
-"IPv4StringToNumOrNull"
-"IPv4ToIPv6"
-"IPv6CIDRToRange"
-"IPv6NumToString"
-"IPv6StringToNum"
-"IPv6StringToNumOrDefault"
-"IPv6StringToNumOrNull"
-"JSONArrayLength"
-"JSONExtract"
-"JSONExtractArrayRaw"
-"JSONExtractBool"
-"JSONExtractFloat"
-"JSONExtractInt"
-"JSONExtractKeys"
-"JSONExtractKeysAndValues"
-"JSONExtractKeysAndValuesRaw"
-"JSONExtractRaw"
-"JSONExtractString"
-"JSONExtractUInt"
-"JSONHas"
-"JSONKey"
-"JSONLength"
-"JSONMergePatch"
-"JSONType"
-"JSON_ARRAY_LENGTH"
-"JSON_EXISTS"
-"JSON_QUERY"
-"JSON_VALUE"
-"L1Distance"
-"L1Norm"
-"L1Normalize"
-"L2Distance"
-"L2Norm"
-"L2Normalize"
-"L2SquaredDistance"
-"L2SquaredNorm"
-"LAST_DAY"
-"LinfDistance"
-"LinfNorm"
-"LinfNormalize"
-"LpDistance"
-"LpNorm"
-"LpNormalize"
-"MACNumToString"
-"MACStringToNum"
-"MACStringToOUI"
-"MAP_FROM_ARRAYS"
-"MD4"
-"MD5"
-"MILLISECOND"
-"MINUTE"
-"MONTH"
-"OCTET_LENGTH"
-"QUARTER"
-"REGEXP_EXTRACT"
-"REGEXP_MATCHES"
-"REGEXP_REPLACE"
-"SCHEMA"
-"SECOND"
-"SHA1"
-"SHA224"
-"SHA256"
-"SHA384"
-"SHA512"
-"SHA512_256"
-"STD"
-"STDArgMax"
-"STDArgMin"
-"STDArray"
-"STDDEV_POP"
-"STDDEV_POPArgMax"
-"STDDEV_POPArgMin"
-"STDDEV_POPArray"
-"STDDEV_POPDistinct"
-"STDDEV_POPForEach"
-"STDDEV_POPIf"
-"STDDEV_POPMap"
-"STDDEV_POPMerge"
-"STDDEV_POPNull"
-"STDDEV_POPOrDefault"
-"STDDEV_POPOrNull"
-"STDDEV_POPResample"
-"STDDEV_POPSimpleState"
-"STDDEV_POPState"
-"STDDEV_SAMP"
-"STDDEV_SAMPArgMax"
-"STDDEV_SAMPArgMin"
-"STDDEV_SAMPArray"
-"STDDEV_SAMPDistinct"
-"STDDEV_SAMPForEach"
-"STDDEV_SAMPIf"
-"STDDEV_SAMPMap"
-"STDDEV_SAMPMerge"
-"STDDEV_SAMPNull"
-"STDDEV_SAMPOrDefault"
-"STDDEV_SAMPOrNull"
-"STDDEV_SAMPResample"
-"STDDEV_SAMPSimpleState"
-"STDDEV_SAMPState"
-"STDDistinct"
-"STDForEach"
-"STDIf"
-"STDMap"
-"STDMerge"
-"STDNull"
-"STDOrDefault"
-"STDOrNull"
-"STDResample"
-"STDSimpleState"
-"STDState"
-"SUBSTRING_INDEX"
-"SVG"
-"TIMESTAMP_DIFF"
-"TO_BASE64"
-"TO_DAYS"
-"TO_UNIXTIME"
-"ULIDStringToDateTime"
-"URLHash"
-"URLHierarchy"
-"URLPathHierarchy"
-"UTCTimestamp"
-"UTC_timestamp"
-"UUIDNumToString"
-"UUIDStringToNum"
-"UUIDToNum"
-"UUIDv7ToDateTime"
-"VAR_POP"
-"VAR_POPArgMax"
-"VAR_POPArgMin"
-"VAR_POPArray"
-"VAR_POPDistinct"
-"VAR_POPForEach"
-"VAR_POPIf"
-"VAR_POPMap"
-"VAR_POPMerge"
-"VAR_POPNull"
-"VAR_POPOrDefault"
-"VAR_POPOrNull"
-"VAR_POPResample"
-"VAR_POPSimpleState"
-"VAR_POPState"
-"VAR_SAMP"
-"VAR_SAMPArgMax"
-"VAR_SAMPArgMin"
-"VAR_SAMPArray"
-"VAR_SAMPDistinct"
-"VAR_SAMPForEach"
-"VAR_SAMPIf"
-"VAR_SAMPMap"
-"VAR_SAMPMerge"
-"VAR_SAMPNull"
-"VAR_SAMPOrDefault"
-"VAR_SAMPOrNull"
-"VAR_SAMPResample"
-"VAR_SAMPSimpleState"
-"VAR_SAMPState"
-"YEAR"
-"YYYYMMDDToDate"
-"YYYYMMDDToDate32"
-"YYYYMMDDhhmmssToDateTime"
-"YYYYMMDDhhmmssToDateTime64"
-"_CAST"
-"__actionName"
-"__bitBoolMaskAnd"
-"__bitBoolMaskOr"
-"__bitSwapLastTwo"
-"__bitWrapperFunc"
-"__getScalar"
-"__scalarSubqueryResult"
-"abs"
-"accurateCast"
-"accurateCastOrDefault"
-"accurateCastOrNull"
-"acos"
-"acosh"
-"addDate"
-"addDays"
-"addHours"
-"addInterval"
-"addMicroseconds"
-"addMilliseconds"
-"addMinutes"
-"addMonths"
-"addNanoseconds"
-"addQuarters"
-"addSeconds"
-"addTupleOfIntervals"
-"addWeeks"
-"addYears"
-"addressToLine"
-"addressToLineWithInlines"
-"addressToSymbol"
-"aes_decrypt_mysql"
-"aes_encrypt_mysql"
-"age"
-"aggThrow"
-"aggThrowArgMax"
-"aggThrowArgMin"
-"aggThrowArray"
-"aggThrowDistinct"
-"aggThrowForEach"
-"aggThrowIf"
-"aggThrowMap"
-"aggThrowMerge"
-"aggThrowNull"
-"aggThrowOrDefault"
-"aggThrowOrNull"
-"aggThrowResample"
-"aggThrowSimpleState"
-"aggThrowState"
-"alphaTokens"
-"analysisOfVariance"
-"analysisOfVarianceArgMax"
-"analysisOfVarianceArgMin"
-"analysisOfVarianceArray"
-"analysisOfVarianceDistinct"
-"analysisOfVarianceForEach"
-"analysisOfVarianceIf"
-"analysisOfVarianceMap"
-"analysisOfVarianceMerge"
-"analysisOfVarianceNull"
-"analysisOfVarianceOrDefault"
-"analysisOfVarianceOrNull"
-"analysisOfVarianceResample"
-"analysisOfVarianceSimpleState"
-"analysisOfVarianceState"
-"and"
-"anova"
-"anovaArgMax"
-"anovaArgMin"
-"anovaArray"
-"anovaDistinct"
-"anovaForEach"
-"anovaIf"
-"anovaMap"
-"anovaMerge"
-"anovaNull"
-"anovaOrDefault"
-"anovaOrNull"
-"anovaResample"
-"anovaSimpleState"
-"anovaState"
-"any"
-"anyArgMax"
-"anyArgMin"
-"anyArray"
-"anyDistinct"
-"anyForEach"
-"anyHeavy"
-"anyHeavyArgMax"
-"anyHeavyArgMin"
-"anyHeavyArray"
-"anyHeavyDistinct"
-"anyHeavyForEach"
-"anyHeavyIf"
-"anyHeavyMap"
-"anyHeavyMerge"
-"anyHeavyNull"
-"anyHeavyOrDefault"
-"anyHeavyOrNull"
-"anyHeavyResample"
-"anyHeavySimpleState"
-"anyHeavyState"
-"anyIf"
-"anyLast"
-"anyLastArgMax"
-"anyLastArgMin"
-"anyLastArray"
-"anyLastDistinct"
-"anyLastForEach"
-"anyLastIf"
-"anyLastMap"
-"anyLastMerge"
-"anyLastNull"
-"anyLastOrDefault"
-"anyLastOrNull"
-"anyLastResample"
-"anyLastSimpleState"
-"anyLastState"
-"anyLast_respect_nulls"
-"anyLast_respect_nullsArgMax"
-"anyLast_respect_nullsArgMin"
-"anyLast_respect_nullsArray"
-"anyLast_respect_nullsDistinct"
-"anyLast_respect_nullsForEach"
-"anyLast_respect_nullsIf"
-"anyLast_respect_nullsMap"
-"anyLast_respect_nullsMerge"
-"anyLast_respect_nullsNull"
-"anyLast_respect_nullsOrDefault"
-"anyLast_respect_nullsOrNull"
-"anyLast_respect_nullsResample"
-"anyLast_respect_nullsSimpleState"
-"anyLast_respect_nullsState"
-"anyMap"
-"anyMerge"
-"anyNull"
-"anyOrDefault"
-"anyOrNull"
-"anyResample"
-"anySimpleState"
-"anyState"
-"any_respect_nulls"
-"any_respect_nullsArgMax"
-"any_respect_nullsArgMin"
-"any_respect_nullsArray"
-"any_respect_nullsDistinct"
-"any_respect_nullsForEach"
-"any_respect_nullsIf"
-"any_respect_nullsMap"
-"any_respect_nullsMerge"
-"any_respect_nullsNull"
-"any_respect_nullsOrDefault"
-"any_respect_nullsOrNull"
-"any_respect_nullsResample"
-"any_respect_nullsSimpleState"
-"any_respect_nullsState"
-"any_value"
-"any_valueArgMax"
-"any_valueArgMin"
-"any_valueArray"
-"any_valueDistinct"
-"any_valueForEach"
-"any_valueIf"
-"any_valueMap"
-"any_valueMerge"
-"any_valueNull"
-"any_valueOrDefault"
-"any_valueOrNull"
-"any_valueResample"
-"any_valueSimpleState"
-"any_valueState"
-"any_value_respect_nulls"
-"any_value_respect_nullsArgMax"
-"any_value_respect_nullsArgMin"
-"any_value_respect_nullsArray"
-"any_value_respect_nullsDistinct"
-"any_value_respect_nullsForEach"
-"any_value_respect_nullsIf"
-"any_value_respect_nullsMap"
-"any_value_respect_nullsMerge"
-"any_value_respect_nullsNull"
-"any_value_respect_nullsOrDefault"
-"any_value_respect_nullsOrNull"
-"any_value_respect_nullsResample"
-"any_value_respect_nullsSimpleState"
-"any_value_respect_nullsState"
-"appendTrailingCharIfAbsent"
-"approx_top_count"
-"approx_top_countArgMax"
-"approx_top_countArgMin"
-"approx_top_countArray"
-"approx_top_countDistinct"
-"approx_top_countForEach"
-"approx_top_countIf"
-"approx_top_countMap"
-"approx_top_countMerge"
-"approx_top_countNull"
-"approx_top_countOrDefault"
-"approx_top_countOrNull"
-"approx_top_countResample"
-"approx_top_countSimpleState"
-"approx_top_countState"
-"approx_top_k"
-"approx_top_kArgMax"
-"approx_top_kArgMin"
-"approx_top_kArray"
-"approx_top_kDistinct"
-"approx_top_kForEach"
-"approx_top_kIf"
-"approx_top_kMap"
-"approx_top_kMerge"
-"approx_top_kNull"
-"approx_top_kOrDefault"
-"approx_top_kOrNull"
-"approx_top_kResample"
-"approx_top_kSimpleState"
-"approx_top_kState"
-"approx_top_sum"
-"approx_top_sumArgMax"
-"approx_top_sumArgMin"
-"approx_top_sumArray"
-"approx_top_sumDistinct"
-"approx_top_sumForEach"
-"approx_top_sumIf"
-"approx_top_sumMap"
-"approx_top_sumMerge"
-"approx_top_sumNull"
-"approx_top_sumOrDefault"
-"approx_top_sumOrNull"
-"approx_top_sumResample"
-"approx_top_sumSimpleState"
-"approx_top_sumState"
-"argMax"
-"argMaxArgMax"
-"argMaxArgMin"
-"argMaxArray"
-"argMaxDistinct"
-"argMaxForEach"
-"argMaxIf"
-"argMaxMap"
-"argMaxMerge"
-"argMaxNull"
-"argMaxOrDefault"
-"argMaxOrNull"
-"argMaxResample"
-"argMaxSimpleState"
-"argMaxState"
-"argMin"
-"argMinArgMax"
-"argMinArgMin"
-"argMinArray"
-"argMinDistinct"
-"argMinForEach"
-"argMinIf"
-"argMinMap"
-"argMinMerge"
-"argMinNull"
-"argMinOrDefault"
-"argMinOrNull"
-"argMinResample"
-"argMinSimpleState"
-"argMinState"
-"array"
-"arrayAUC"
-"arrayAll"
-"arrayAvg"
-"arrayCompact"
-"arrayConcat"
-"arrayCount"
-"arrayCumSum"
-"arrayCumSumNonNegative"
-"arrayDifference"
-"arrayDistinct"
-"arrayDotProduct"
-"arrayElement"
-"arrayEnumerate"
-"arrayEnumerateDense"
-"arrayEnumerateDenseRanked"
-"arrayEnumerateUniq"
-"arrayEnumerateUniqRanked"
-"arrayExists"
-"arrayFill"
-"arrayFilter"
-"arrayFirst"
-"arrayFirstIndex"
-"arrayFirstOrNull"
-"arrayFlatten"
-"arrayFold"
-"arrayIntersect"
-"arrayJaccardIndex"
-"arrayJoin"
-"arrayLast"
-"arrayLastIndex"
-"arrayLastOrNull"
-"arrayMap"
-"arrayMax"
-"arrayMin"
-"arrayPartialReverseSort"
-"arrayPartialShuffle"
-"arrayPartialSort"
-"arrayPopBack"
-"arrayPopFront"
-"arrayProduct"
-"arrayPushBack"
-"arrayPushFront"
-"arrayRandomSample"
-"arrayReduce"
-"arrayReduceInRanges"
-"arrayResize"
-"arrayReverse"
-"arrayReverseFill"
-"arrayReverseSort"
-"arrayReverseSplit"
-"arrayRotateLeft"
-"arrayRotateRight"
-"arrayShiftLeft"
-"arrayShiftRight"
-"arrayShingles"
-"arrayShuffle"
-"arraySlice"
-"arraySort"
-"arraySplit"
-"arrayStringConcat"
-"arraySum"
-"arrayUniq"
-"arrayWithConstant"
-"arrayZip"
-"array_agg"
-"array_aggArgMax"
-"array_aggArgMin"
-"array_aggArray"
-"array_aggDistinct"
-"array_aggForEach"
-"array_aggIf"
-"array_aggMap"
-"array_aggMerge"
-"array_aggNull"
-"array_aggOrDefault"
-"array_aggOrNull"
-"array_aggResample"
-"array_aggSimpleState"
-"array_aggState"
-"array_concat_agg"
-"array_concat_aggArgMax"
-"array_concat_aggArgMin"
-"array_concat_aggArray"
-"array_concat_aggDistinct"
-"array_concat_aggForEach"
-"array_concat_aggIf"
-"array_concat_aggMap"
-"array_concat_aggMerge"
-"array_concat_aggNull"
-"array_concat_aggOrDefault"
-"array_concat_aggOrNull"
-"array_concat_aggResample"
-"array_concat_aggSimpleState"
-"array_concat_aggState"
-"ascii"
-"asin"
-"asinh"
-"assumeNotNull"
-"atan"
-"atan2"
-"atanh"
-"avg"
-"avgArgMax"
-"avgArgMin"
-"avgArray"
-"avgDistinct"
-"avgForEach"
-"avgIf"
-"avgMap"
-"avgMerge"
-"avgNull"
-"avgOrDefault"
-"avgOrNull"
-"avgResample"
-"avgSimpleState"
-"avgState"
-"avgWeighted"
-"avgWeightedArgMax"
-"avgWeightedArgMin"
-"avgWeightedArray"
-"avgWeightedDistinct"
-"avgWeightedForEach"
-"avgWeightedIf"
-"avgWeightedMap"
-"avgWeightedMerge"
-"avgWeightedNull"
-"avgWeightedOrDefault"
-"avgWeightedOrNull"
-"avgWeightedResample"
-"avgWeightedSimpleState"
-"avgWeightedState"
-"bar"
-"base58Decode"
-"base58Encode"
-"base64Decode"
-"base64Encode"
-"base64URLDecode"
-"base64URLEncode"
-"basename"
-"bin"
-"bitAnd"
-"bitCount"
-"bitHammingDistance"
-"bitNot"
-"bitOr"
-"bitPositionsToArray"
-"bitRotateLeft"
-"bitRotateRight"
-"bitShiftLeft"
-"bitShiftRight"
-"bitSlice"
-"bitTest"
-"bitTestAll"
-"bitTestAny"
-"bitXor"
-"bitmapAnd"
-"bitmapAndCardinality"
-"bitmapAndnot"
-"bitmapAndnotCardinality"
-"bitmapBuild"
-"bitmapCardinality"
-"bitmapContains"
-"bitmapHasAll"
-"bitmapHasAny"
-"bitmapMax"
-"bitmapMin"
-"bitmapOr"
-"bitmapOrCardinality"
-"bitmapSubsetInRange"
-"bitmapSubsetLimit"
-"bitmapToArray"
-"bitmapTransform"
-"bitmapXor"
-"bitmapXorCardinality"
-"bitmaskToArray"
-"bitmaskToList"
-"blockNumber"
-"blockSerializedSize"
-"blockSize"
-"boundingRatio"
-"boundingRatioArgMax"
-"boundingRatioArgMin"
-"boundingRatioArray"
-"boundingRatioDistinct"
-"boundingRatioForEach"
-"boundingRatioIf"
-"boundingRatioMap"
-"boundingRatioMerge"
-"boundingRatioNull"
-"boundingRatioOrDefault"
-"boundingRatioOrNull"
-"boundingRatioResample"
-"boundingRatioSimpleState"
-"boundingRatioState"
-"buildId"
-"byteHammingDistance"
-"byteSize"
-"byteSlice"
-"byteSwap"
-"caseWithExpr"
-"caseWithExpression"
-"caseWithoutExpr"
-"caseWithoutExpression"
-"catboostEvaluate"
-"categoricalInformationValue"
-"categoricalInformationValueArgMax"
-"categoricalInformationValueArgMin"
-"categoricalInformationValueArray"
-"categoricalInformationValueDistinct"
-"categoricalInformationValueForEach"
-"categoricalInformationValueIf"
-"categoricalInformationValueMap"
-"categoricalInformationValueMerge"
-"categoricalInformationValueNull"
-"categoricalInformationValueOrDefault"
-"categoricalInformationValueOrNull"
-"categoricalInformationValueResample"
-"categoricalInformationValueSimpleState"
-"categoricalInformationValueState"
-"cbrt"
-"ceil"
-"ceiling"
-"changeDay"
-"changeHour"
-"changeMinute"
-"changeMonth"
-"changeSecond"
-"changeYear"
-"char"
-"cityHash64"
-"clamp"
-"coalesce"
-"concat"
-"concatAssumeInjective"
-"concatWithSeparator"
-"concatWithSeparatorAssumeInjective"
-"concat_ws"
-"connectionId"
-"connection_id"
-"contingency"
-"contingencyArgMax"
-"contingencyArgMin"
-"contingencyArray"
-"contingencyDistinct"
-"contingencyForEach"
-"contingencyIf"
-"contingencyMap"
-"contingencyMerge"
-"contingencyNull"
-"contingencyOrDefault"
-"contingencyOrNull"
-"contingencyResample"
-"contingencySimpleState"
-"contingencyState"
-"convertCharset"
-"corr"
-"corrArgMax"
-"corrArgMin"
-"corrArray"
-"corrDistinct"
-"corrForEach"
-"corrIf"
-"corrMap"
-"corrMatrix"
-"corrMatrixArgMax"
-"corrMatrixArgMin"
-"corrMatrixArray"
-"corrMatrixDistinct"
-"corrMatrixForEach"
-"corrMatrixIf"
-"corrMatrixMap"
-"corrMatrixMerge"
-"corrMatrixNull"
-"corrMatrixOrDefault"
-"corrMatrixOrNull"
-"corrMatrixResample"
-"corrMatrixSimpleState"
-"corrMatrixState"
-"corrMerge"
-"corrNull"
-"corrOrDefault"
-"corrOrNull"
-"corrResample"
-"corrSimpleState"
-"corrStable"
-"corrStableArgMax"
-"corrStableArgMin"
-"corrStableArray"
-"corrStableDistinct"
-"corrStableForEach"
-"corrStableIf"
-"corrStableMap"
-"corrStableMerge"
-"corrStableNull"
-"corrStableOrDefault"
-"corrStableOrNull"
-"corrStableResample"
-"corrStableSimpleState"
-"corrStableState"
-"corrState"
-"cos"
-"cosh"
-"cosineDistance"
-"count"
-"countArgMax"
-"countArgMin"
-"countArray"
-"countDigits"
-"countDistinct"
-"countEqual"
-"countForEach"
-"countIf"
-"countMap"
-"countMatches"
-"countMatchesCaseInsensitive"
-"countMerge"
-"countNull"
-"countOrDefault"
-"countOrNull"
-"countResample"
-"countSimpleState"
-"countState"
-"countSubstrings"
-"countSubstringsCaseInsensitive"
-"countSubstringsCaseInsensitiveUTF8"
-"covarPop"
-"covarPopArgMax"
-"covarPopArgMin"
-"covarPopArray"
-"covarPopDistinct"
-"covarPopForEach"
-"covarPopIf"
-"covarPopMap"
-"covarPopMatrix"
-"covarPopMatrixArgMax"
-"covarPopMatrixArgMin"
-"covarPopMatrixArray"
-"covarPopMatrixDistinct"
-"covarPopMatrixForEach"
-"covarPopMatrixIf"
-"covarPopMatrixMap"
-"covarPopMatrixMerge"
-"covarPopMatrixNull"
-"covarPopMatrixOrDefault"
-"covarPopMatrixOrNull"
-"covarPopMatrixResample"
-"covarPopMatrixSimpleState"
-"covarPopMatrixState"
-"covarPopMerge"
-"covarPopNull"
-"covarPopOrDefault"
-"covarPopOrNull"
-"covarPopResample"
-"covarPopSimpleState"
-"covarPopStable"
-"covarPopStableArgMax"
-"covarPopStableArgMin"
-"covarPopStableArray"
-"covarPopStableDistinct"
-"covarPopStableForEach"
-"covarPopStableIf"
-"covarPopStableMap"
-"covarPopStableMerge"
-"covarPopStableNull"
-"covarPopStableOrDefault"
-"covarPopStableOrNull"
-"covarPopStableResample"
-"covarPopStableSimpleState"
-"covarPopStableState"
-"covarPopState"
-"covarSamp"
-"covarSampArgMax"
-"covarSampArgMin"
-"covarSampArray"
-"covarSampDistinct"
-"covarSampForEach"
-"covarSampIf"
-"covarSampMap"
-"covarSampMatrix"
-"covarSampMatrixArgMax"
-"covarSampMatrixArgMin"
-"covarSampMatrixArray"
-"covarSampMatrixDistinct"
-"covarSampMatrixForEach"
-"covarSampMatrixIf"
-"covarSampMatrixMap"
-"covarSampMatrixMerge"
-"covarSampMatrixNull"
-"covarSampMatrixOrDefault"
-"covarSampMatrixOrNull"
-"covarSampMatrixResample"
-"covarSampMatrixSimpleState"
-"covarSampMatrixState"
-"covarSampMerge"
-"covarSampNull"
-"covarSampOrDefault"
-"covarSampOrNull"
-"covarSampResample"
-"covarSampSimpleState"
-"covarSampStable"
-"covarSampStableArgMax"
-"covarSampStableArgMin"
-"covarSampStableArray"
-"covarSampStableDistinct"
-"covarSampStableForEach"
-"covarSampStableIf"
-"covarSampStableMap"
-"covarSampStableMerge"
-"covarSampStableNull"
-"covarSampStableOrDefault"
-"covarSampStableOrNull"
-"covarSampStableResample"
-"covarSampStableSimpleState"
-"covarSampStableState"
-"covarSampState"
-"cramersV"
-"cramersVArgMax"
-"cramersVArgMin"
-"cramersVArray"
-"cramersVBiasCorrected"
-"cramersVBiasCorrectedArgMax"
-"cramersVBiasCorrectedArgMin"
-"cramersVBiasCorrectedArray"
-"cramersVBiasCorrectedDistinct"
-"cramersVBiasCorrectedForEach"
-"cramersVBiasCorrectedIf"
-"cramersVBiasCorrectedMap"
-"cramersVBiasCorrectedMerge"
-"cramersVBiasCorrectedNull"
-"cramersVBiasCorrectedOrDefault"
-"cramersVBiasCorrectedOrNull"
-"cramersVBiasCorrectedResample"
-"cramersVBiasCorrectedSimpleState"
-"cramersVBiasCorrectedState"
-"cramersVDistinct"
-"cramersVForEach"
-"cramersVIf"
-"cramersVMap"
-"cramersVMerge"
-"cramersVNull"
-"cramersVOrDefault"
-"cramersVOrNull"
-"cramersVResample"
-"cramersVSimpleState"
-"cramersVState"
-"curdate"
-"currentDatabase"
-"currentProfiles"
-"currentRoles"
-"currentSchemas"
-"currentUser"
-"current_database"
-"current_date"
-"current_schemas"
-"current_timestamp"
-"current_user"
-"cutFragment"
-"cutIPv6"
-"cutQueryString"
-"cutQueryStringAndFragment"
-"cutToFirstSignificantSubdomain"
-"cutToFirstSignificantSubdomainCustom"
-"cutToFirstSignificantSubdomainCustomRFC"
-"cutToFirstSignificantSubdomainCustomWithWWW"
-"cutToFirstSignificantSubdomainCustomWithWWWRFC"
-"cutToFirstSignificantSubdomainRFC"
-"cutToFirstSignificantSubdomainWithWWW"
-"cutToFirstSignificantSubdomainWithWWWRFC"
-"cutURLParameter"
-"cutWWW"
-"damerauLevenshteinDistance"
-"dateDiff"
-"dateName"
-"dateTime64ToSnowflake"
-"dateTime64ToSnowflakeID"
-"dateTimeToSnowflake"
-"dateTimeToSnowflakeID"
-"dateTrunc"
-"date_diff"
-"decodeHTMLComponent"
-"decodeURLComponent"
-"decodeURLFormComponent"
-"decodeXMLComponent"
-"decrypt"
-"defaultProfiles"
-"defaultRoles"
-"defaultValueOfArgumentType"
-"defaultValueOfTypeName"
-"degrees"
-"deltaSum"
-"deltaSumArgMax"
-"deltaSumArgMin"
-"deltaSumArray"
-"deltaSumDistinct"
-"deltaSumForEach"
-"deltaSumIf"
-"deltaSumMap"
-"deltaSumMerge"
-"deltaSumNull"
-"deltaSumOrDefault"
-"deltaSumOrNull"
-"deltaSumResample"
-"deltaSumSimpleState"
-"deltaSumState"
-"deltaSumTimestamp"
-"deltaSumTimestampArgMax"
-"deltaSumTimestampArgMin"
-"deltaSumTimestampArray"
-"deltaSumTimestampDistinct"
-"deltaSumTimestampForEach"
-"deltaSumTimestampIf"
-"deltaSumTimestampMap"
-"deltaSumTimestampMerge"
-"deltaSumTimestampNull"
-"deltaSumTimestampOrDefault"
-"deltaSumTimestampOrNull"
-"deltaSumTimestampResample"
-"deltaSumTimestampSimpleState"
-"deltaSumTimestampState"
-"demangle"
-"denseRank"
-"denseRankArgMax"
-"denseRankArgMin"
-"denseRankArray"
-"denseRankDistinct"
-"denseRankForEach"
-"denseRankIf"
-"denseRankMap"
-"denseRankMerge"
-"denseRankNull"
-"denseRankOrDefault"
-"denseRankOrNull"
-"denseRankResample"
-"denseRankSimpleState"
-"denseRankState"
-"dense_rank"
-"dense_rankArgMax"
-"dense_rankArgMin"
-"dense_rankArray"
-"dense_rankDistinct"
-"dense_rankForEach"
-"dense_rankIf"
-"dense_rankMap"
-"dense_rankMerge"
-"dense_rankNull"
-"dense_rankOrDefault"
-"dense_rankOrNull"
-"dense_rankResample"
-"dense_rankSimpleState"
-"dense_rankState"
-"detectCharset"
-"detectLanguage"
-"detectLanguageMixed"
-"detectLanguageUnknown"
-"detectProgrammingLanguage"
-"detectTonality"
-"dictGet"
-"dictGetAll"
-"dictGetChildren"
-"dictGetDate"
-"dictGetDateOrDefault"
-"dictGetDateTime"
-"dictGetDateTimeOrDefault"
-"dictGetDescendants"
-"dictGetFloat32"
-"dictGetFloat32OrDefault"
-"dictGetFloat64"
-"dictGetFloat64OrDefault"
-"dictGetHierarchy"
-"dictGetIPv4"
-"dictGetIPv4OrDefault"
-"dictGetIPv6"
-"dictGetIPv6OrDefault"
-"dictGetInt16"
-"dictGetInt16OrDefault"
-"dictGetInt32"
-"dictGetInt32OrDefault"
-"dictGetInt64"
-"dictGetInt64OrDefault"
-"dictGetInt8"
-"dictGetInt8OrDefault"
-"dictGetOrDefault"
-"dictGetOrNull"
-"dictGetString"
-"dictGetStringOrDefault"
-"dictGetUInt16"
-"dictGetUInt16OrDefault"
-"dictGetUInt32"
-"dictGetUInt32OrDefault"
-"dictGetUInt64"
-"dictGetUInt64OrDefault"
-"dictGetUInt8"
-"dictGetUInt8OrDefault"
-"dictGetUUID"
-"dictGetUUIDOrDefault"
-"dictHas"
-"dictIsIn"
-"displayName"
-"distanceL1"
-"distanceL2"
-"distanceL2Squared"
-"distanceLinf"
-"distanceLp"
-"divide"
-"divideDecimal"
-"domain"
-"domainRFC"
-"domainWithoutWWW"
-"domainWithoutWWWRFC"
-"dotProduct"
-"dumpColumnStructure"
-"dynamicElement"
-"dynamicType"
-"e"
-"editDistance"
-"editDistanceUTF8"
-"empty"
-"emptyArrayDate"
-"emptyArrayDateTime"
-"emptyArrayFloat32"
-"emptyArrayFloat64"
-"emptyArrayInt16"
-"emptyArrayInt32"
-"emptyArrayInt64"
-"emptyArrayInt8"
-"emptyArrayString"
-"emptyArrayToSingle"
-"emptyArrayUInt16"
-"emptyArrayUInt32"
-"emptyArrayUInt64"
-"emptyArrayUInt8"
-"enabledProfiles"
-"enabledRoles"
-"encodeURLComponent"
-"encodeURLFormComponent"
-"encodeXMLComponent"
-"encrypt"
-"endsWith"
-"endsWithUTF8"
-"entropy"
-"entropyArgMax"
-"entropyArgMin"
-"entropyArray"
-"entropyDistinct"
-"entropyForEach"
-"entropyIf"
-"entropyMap"
-"entropyMerge"
-"entropyNull"
-"entropyOrDefault"
-"entropyOrNull"
-"entropyResample"
-"entropySimpleState"
-"entropyState"
-"equals"
-"erf"
-"erfc"
-"errorCodeToName"
-"evalMLMethod"
-"exp"
-"exp10"
-"exp2"
-"exponentialMovingAverage"
-"exponentialMovingAverageArgMax"
-"exponentialMovingAverageArgMin"
-"exponentialMovingAverageArray"
-"exponentialMovingAverageDistinct"
-"exponentialMovingAverageForEach"
-"exponentialMovingAverageIf"
-"exponentialMovingAverageMap"
-"exponentialMovingAverageMerge"
-"exponentialMovingAverageNull"
-"exponentialMovingAverageOrDefault"
-"exponentialMovingAverageOrNull"
-"exponentialMovingAverageResample"
-"exponentialMovingAverageSimpleState"
-"exponentialMovingAverageState"
-"exponentialTimeDecayedAvg"
-"exponentialTimeDecayedAvgArgMax"
-"exponentialTimeDecayedAvgArgMin"
-"exponentialTimeDecayedAvgArray"
-"exponentialTimeDecayedAvgDistinct"
-"exponentialTimeDecayedAvgForEach"
-"exponentialTimeDecayedAvgIf"
-"exponentialTimeDecayedAvgMap"
-"exponentialTimeDecayedAvgMerge"
-"exponentialTimeDecayedAvgNull"
-"exponentialTimeDecayedAvgOrDefault"
-"exponentialTimeDecayedAvgOrNull"
-"exponentialTimeDecayedAvgResample"
-"exponentialTimeDecayedAvgSimpleState"
-"exponentialTimeDecayedAvgState"
-"exponentialTimeDecayedCount"
-"exponentialTimeDecayedCountArgMax"
-"exponentialTimeDecayedCountArgMin"
-"exponentialTimeDecayedCountArray"
-"exponentialTimeDecayedCountDistinct"
-"exponentialTimeDecayedCountForEach"
-"exponentialTimeDecayedCountIf"
-"exponentialTimeDecayedCountMap"
-"exponentialTimeDecayedCountMerge"
-"exponentialTimeDecayedCountNull"
-"exponentialTimeDecayedCountOrDefault"
-"exponentialTimeDecayedCountOrNull"
-"exponentialTimeDecayedCountResample"
-"exponentialTimeDecayedCountSimpleState"
-"exponentialTimeDecayedCountState"
-"exponentialTimeDecayedMax"
-"exponentialTimeDecayedMaxArgMax"
-"exponentialTimeDecayedMaxArgMin"
-"exponentialTimeDecayedMaxArray"
-"exponentialTimeDecayedMaxDistinct"
-"exponentialTimeDecayedMaxForEach"
-"exponentialTimeDecayedMaxIf"
-"exponentialTimeDecayedMaxMap"
-"exponentialTimeDecayedMaxMerge"
-"exponentialTimeDecayedMaxNull"
-"exponentialTimeDecayedMaxOrDefault"
-"exponentialTimeDecayedMaxOrNull"
-"exponentialTimeDecayedMaxResample"
-"exponentialTimeDecayedMaxSimpleState"
-"exponentialTimeDecayedMaxState"
-"exponentialTimeDecayedSum"
-"exponentialTimeDecayedSumArgMax"
-"exponentialTimeDecayedSumArgMin"
-"exponentialTimeDecayedSumArray"
-"exponentialTimeDecayedSumDistinct"
-"exponentialTimeDecayedSumForEach"
-"exponentialTimeDecayedSumIf"
-"exponentialTimeDecayedSumMap"
-"exponentialTimeDecayedSumMerge"
-"exponentialTimeDecayedSumNull"
-"exponentialTimeDecayedSumOrDefault"
-"exponentialTimeDecayedSumOrNull"
-"exponentialTimeDecayedSumResample"
-"exponentialTimeDecayedSumSimpleState"
-"exponentialTimeDecayedSumState"
-"extract"
-"extractAll"
-"extractAllGroups"
-"extractAllGroupsHorizontal"
-"extractAllGroupsVertical"
-"extractGroups"
-"extractKeyValuePairs"
-"extractKeyValuePairsWithEscaping"
-"extractTextFromHTML"
-"extractURLParameter"
-"extractURLParameterNames"
-"extractURLParameters"
-"factorial"
-"farmFingerprint64"
-"farmHash64"
-"file"
-"filesystemAvailable"
-"filesystemCapacity"
-"filesystemUnreserved"
-"finalizeAggregation"
-"firstLine"
-"firstSignificantSubdomain"
-"firstSignificantSubdomainCustom"
-"firstSignificantSubdomainCustomRFC"
-"firstSignificantSubdomainRFC"
-"first_value"
-"first_valueArgMax"
-"first_valueArgMin"
-"first_valueArray"
-"first_valueDistinct"
-"first_valueForEach"
-"first_valueIf"
-"first_valueMap"
-"first_valueMerge"
-"first_valueNull"
-"first_valueOrDefault"
-"first_valueOrNull"
-"first_valueResample"
-"first_valueSimpleState"
-"first_valueState"
-"first_value_respect_nulls"
-"first_value_respect_nullsArgMax"
-"first_value_respect_nullsArgMin"
-"first_value_respect_nullsArray"
-"first_value_respect_nullsDistinct"
-"first_value_respect_nullsForEach"
-"first_value_respect_nullsIf"
-"first_value_respect_nullsMap"
-"first_value_respect_nullsMerge"
-"first_value_respect_nullsNull"
-"first_value_respect_nullsOrDefault"
-"first_value_respect_nullsOrNull"
-"first_value_respect_nullsResample"
-"first_value_respect_nullsSimpleState"
-"first_value_respect_nullsState"
-"flameGraph"
-"flameGraphArgMax"
-"flameGraphArgMin"
-"flameGraphArray"
-"flameGraphDistinct"
-"flameGraphForEach"
-"flameGraphIf"
-"flameGraphMap"
-"flameGraphMerge"
-"flameGraphNull"
-"flameGraphOrDefault"
-"flameGraphOrNull"
-"flameGraphResample"
-"flameGraphSimpleState"
-"flameGraphState"
-"flatten"
-"flattenTuple"
-"floor"
-"format"
-"formatDateTime"
-"formatDateTimeInJodaSyntax"
-"formatQuery"
-"formatQueryOrNull"
-"formatQuerySingleLine"
-"formatQuerySingleLineOrNull"
-"formatReadableDecimalSize"
-"formatReadableQuantity"
-"formatReadableSize"
-"formatReadableTimeDelta"
-"formatRow"
-"formatRowNoNewline"
-"fragment"
-"fromDaysSinceYearZero"
-"fromDaysSinceYearZero32"
-"fromModifiedJulianDay"
-"fromModifiedJulianDayOrNull"
-"fromUTCTimestamp"
-"fromUnixTimestamp"
-"fromUnixTimestamp64Micro"
-"fromUnixTimestamp64Milli"
-"fromUnixTimestamp64Nano"
-"fromUnixTimestampInJodaSyntax"
-"from_utc_timestamp"
-"fullHostName"
-"fuzzBits"
-"gccMurmurHash"
-"gcd"
-"generateRandomStructure"
-"generateSnowflakeID"
-"generateULID"
-"generateUUIDv4"
-"generateUUIDv7"
-"geoDistance"
-"geoToH3"
-"geoToS2"
-"geohashDecode"
-"geohashEncode"
-"geohashesInBox"
-"getClientHTTPHeader"
-"getMacro"
-"getOSKernelVersion"
-"getServerPort"
-"getSetting"
-"getSizeOfEnumType"
-"getSubcolumn"
-"getTypeSerializationStreams"
-"globalIn"
-"globalInIgnoreSet"
-"globalNotIn"
-"globalNotInIgnoreSet"
-"globalNotNullIn"
-"globalNotNullInIgnoreSet"
-"globalNullIn"
-"globalNullInIgnoreSet"
-"globalVariable"
-"greatCircleAngle"
-"greatCircleDistance"
-"greater"
-"greaterOrEquals"
-"greatest"
-"groupArray"
-"groupArrayArgMax"
-"groupArrayArgMin"
-"groupArrayArray"
-"groupArrayDistinct"
-"groupArrayForEach"
-"groupArrayIf"
-"groupArrayInsertAt"
-"groupArrayInsertAtArgMax"
-"groupArrayInsertAtArgMin"
-"groupArrayInsertAtArray"
-"groupArrayInsertAtDistinct"
-"groupArrayInsertAtForEach"
-"groupArrayInsertAtIf"
-"groupArrayInsertAtMap"
-"groupArrayInsertAtMerge"
-"groupArrayInsertAtNull"
-"groupArrayInsertAtOrDefault"
-"groupArrayInsertAtOrNull"
-"groupArrayInsertAtResample"
-"groupArrayInsertAtSimpleState"
-"groupArrayInsertAtState"
-"groupArrayIntersect"
-"groupArrayIntersectArgMax"
-"groupArrayIntersectArgMin"
-"groupArrayIntersectArray"
-"groupArrayIntersectDistinct"
-"groupArrayIntersectForEach"
-"groupArrayIntersectIf"
-"groupArrayIntersectMap"
-"groupArrayIntersectMerge"
-"groupArrayIntersectNull"
-"groupArrayIntersectOrDefault"
-"groupArrayIntersectOrNull"
-"groupArrayIntersectResample"
-"groupArrayIntersectSimpleState"
-"groupArrayIntersectState"
-"groupArrayLast"
-"groupArrayLastArgMax"
-"groupArrayLastArgMin"
-"groupArrayLastArray"
-"groupArrayLastDistinct"
-"groupArrayLastForEach"
-"groupArrayLastIf"
-"groupArrayLastMap"
-"groupArrayLastMerge"
-"groupArrayLastNull"
-"groupArrayLastOrDefault"
-"groupArrayLastOrNull"
-"groupArrayLastResample"
-"groupArrayLastSimpleState"
-"groupArrayLastState"
-"groupArrayMap"
-"groupArrayMerge"
-"groupArrayMovingAvg"
-"groupArrayMovingAvgArgMax"
-"groupArrayMovingAvgArgMin"
-"groupArrayMovingAvgArray"
-"groupArrayMovingAvgDistinct"
-"groupArrayMovingAvgForEach"
-"groupArrayMovingAvgIf"
-"groupArrayMovingAvgMap"
-"groupArrayMovingAvgMerge"
-"groupArrayMovingAvgNull"
-"groupArrayMovingAvgOrDefault"
-"groupArrayMovingAvgOrNull"
-"groupArrayMovingAvgResample"
-"groupArrayMovingAvgSimpleState"
-"groupArrayMovingAvgState"
-"groupArrayMovingSum"
-"groupArrayMovingSumArgMax"
-"groupArrayMovingSumArgMin"
-"groupArrayMovingSumArray"
-"groupArrayMovingSumDistinct"
-"groupArrayMovingSumForEach"
-"groupArrayMovingSumIf"
-"groupArrayMovingSumMap"
-"groupArrayMovingSumMerge"
-"groupArrayMovingSumNull"
-"groupArrayMovingSumOrDefault"
-"groupArrayMovingSumOrNull"
-"groupArrayMovingSumResample"
-"groupArrayMovingSumSimpleState"
-"groupArrayMovingSumState"
-"groupArrayNull"
-"groupArrayOrDefault"
-"groupArrayOrNull"
-"groupArrayResample"
-"groupArraySample"
-"groupArraySampleArgMax"
-"groupArraySampleArgMin"
-"groupArraySampleArray"
-"groupArraySampleDistinct"
-"groupArraySampleForEach"
-"groupArraySampleIf"
-"groupArraySampleMap"
-"groupArraySampleMerge"
-"groupArraySampleNull"
-"groupArraySampleOrDefault"
-"groupArraySampleOrNull"
-"groupArraySampleResample"
-"groupArraySampleSimpleState"
-"groupArraySampleState"
-"groupArraySimpleState"
-"groupArraySorted"
-"groupArraySortedArgMax"
-"groupArraySortedArgMin"
-"groupArraySortedArray"
-"groupArraySortedDistinct"
-"groupArraySortedForEach"
-"groupArraySortedIf"
-"groupArraySortedMap"
-"groupArraySortedMerge"
-"groupArraySortedNull"
-"groupArraySortedOrDefault"
-"groupArraySortedOrNull"
-"groupArraySortedResample"
-"groupArraySortedSimpleState"
-"groupArraySortedState"
-"groupArrayState"
-"groupBitAnd"
-"groupBitAndArgMax"
-"groupBitAndArgMin"
-"groupBitAndArray"
-"groupBitAndDistinct"
-"groupBitAndForEach"
-"groupBitAndIf"
-"groupBitAndMap"
-"groupBitAndMerge"
-"groupBitAndNull"
-"groupBitAndOrDefault"
-"groupBitAndOrNull"
-"groupBitAndResample"
-"groupBitAndSimpleState"
-"groupBitAndState"
-"groupBitOr"
-"groupBitOrArgMax"
-"groupBitOrArgMin"
-"groupBitOrArray"
-"groupBitOrDistinct"
-"groupBitOrForEach"
-"groupBitOrIf"
-"groupBitOrMap"
-"groupBitOrMerge"
-"groupBitOrNull"
-"groupBitOrOrDefault"
-"groupBitOrOrNull"
-"groupBitOrResample"
-"groupBitOrSimpleState"
-"groupBitOrState"
-"groupBitXor"
-"groupBitXorArgMax"
-"groupBitXorArgMin"
-"groupBitXorArray"
-"groupBitXorDistinct"
-"groupBitXorForEach"
-"groupBitXorIf"
-"groupBitXorMap"
-"groupBitXorMerge"
-"groupBitXorNull"
-"groupBitXorOrDefault"
-"groupBitXorOrNull"
-"groupBitXorResample"
-"groupBitXorSimpleState"
-"groupBitXorState"
-"groupBitmap"
-"groupBitmapAnd"
-"groupBitmapAndArgMax"
-"groupBitmapAndArgMin"
-"groupBitmapAndArray"
-"groupBitmapAndDistinct"
-"groupBitmapAndForEach"
-"groupBitmapAndIf"
-"groupBitmapAndMap"
-"groupBitmapAndMerge"
-"groupBitmapAndNull"
-"groupBitmapAndOrDefault"
-"groupBitmapAndOrNull"
-"groupBitmapAndResample"
-"groupBitmapAndSimpleState"
-"groupBitmapAndState"
-"groupBitmapArgMax"
-"groupBitmapArgMin"
-"groupBitmapArray"
-"groupBitmapDistinct"
-"groupBitmapForEach"
-"groupBitmapIf"
-"groupBitmapMap"
-"groupBitmapMerge"
-"groupBitmapNull"
-"groupBitmapOr"
-"groupBitmapOrArgMax"
-"groupBitmapOrArgMin"
-"groupBitmapOrArray"
-"groupBitmapOrDefault"
-"groupBitmapOrDistinct"
-"groupBitmapOrForEach"
-"groupBitmapOrIf"
-"groupBitmapOrMap"
-"groupBitmapOrMerge"
-"groupBitmapOrNull"
-"groupBitmapOrNull"
-"groupBitmapOrOrDefault"
-"groupBitmapOrOrNull"
-"groupBitmapOrResample"
-"groupBitmapOrSimpleState"
-"groupBitmapOrState"
-"groupBitmapResample"
-"groupBitmapSimpleState"
-"groupBitmapState"
-"groupBitmapXor"
-"groupBitmapXorArgMax"
-"groupBitmapXorArgMin"
-"groupBitmapXorArray"
-"groupBitmapXorDistinct"
-"groupBitmapXorForEach"
-"groupBitmapXorIf"
-"groupBitmapXorMap"
-"groupBitmapXorMerge"
-"groupBitmapXorNull"
-"groupBitmapXorOrDefault"
-"groupBitmapXorOrNull"
-"groupBitmapXorResample"
-"groupBitmapXorSimpleState"
-"groupBitmapXorState"
-"groupConcat"
-"groupConcatArgMax"
-"groupConcatArgMin"
-"groupConcatArray"
-"groupConcatDistinct"
-"groupConcatForEach"
-"groupConcatIf"
-"groupConcatMap"
-"groupConcatMerge"
-"groupConcatNull"
-"groupConcatOrDefault"
-"groupConcatOrNull"
-"groupConcatResample"
-"groupConcatSimpleState"
-"groupConcatState"
-"groupUniqArray"
-"groupUniqArrayArgMax"
-"groupUniqArrayArgMin"
-"groupUniqArrayArray"
-"groupUniqArrayDistinct"
-"groupUniqArrayForEach"
-"groupUniqArrayIf"
-"groupUniqArrayMap"
-"groupUniqArrayMerge"
-"groupUniqArrayNull"
-"groupUniqArrayOrDefault"
-"groupUniqArrayOrNull"
-"groupUniqArrayResample"
-"groupUniqArraySimpleState"
-"groupUniqArrayState"
-"group_concat"
-"group_concatArgMax"
-"group_concatArgMin"
-"group_concatArray"
-"group_concatDistinct"
-"group_concatForEach"
-"group_concatIf"
-"group_concatMap"
-"group_concatMerge"
-"group_concatNull"
-"group_concatOrDefault"
-"group_concatOrNull"
-"group_concatResample"
-"group_concatSimpleState"
-"group_concatState"
-"h3CellAreaM2"
-"h3CellAreaRads2"
-"h3Distance"
-"h3EdgeAngle"
-"h3EdgeLengthKm"
-"h3EdgeLengthM"
-"h3ExactEdgeLengthKm"
-"h3ExactEdgeLengthM"
-"h3ExactEdgeLengthRads"
-"h3GetBaseCell"
-"h3GetDestinationIndexFromUnidirectionalEdge"
-"h3GetFaces"
-"h3GetIndexesFromUnidirectionalEdge"
-"h3GetOriginIndexFromUnidirectionalEdge"
-"h3GetPentagonIndexes"
-"h3GetRes0Indexes"
-"h3GetResolution"
-"h3GetUnidirectionalEdge"
-"h3GetUnidirectionalEdgeBoundary"
-"h3GetUnidirectionalEdgesFromHexagon"
-"h3HexAreaKm2"
-"h3HexAreaM2"
-"h3HexRing"
-"h3IndexesAreNeighbors"
-"h3IsPentagon"
-"h3IsResClassIII"
-"h3IsValid"
-"h3Line"
-"h3NumHexagons"
-"h3PointDistKm"
-"h3PointDistM"
-"h3PointDistRads"
-"h3ToCenterChild"
-"h3ToChildren"
-"h3ToGeo"
-"h3ToGeoBoundary"
-"h3ToParent"
-"h3ToString"
-"h3UnidirectionalEdgeIsValid"
-"h3kRing"
-"halfMD5"
-"has"
-"hasAll"
-"hasAny"
-"hasColumnInTable"
-"hasSubsequence"
-"hasSubsequenceCaseInsensitive"
-"hasSubsequenceCaseInsensitiveUTF8"
-"hasSubsequenceUTF8"
-"hasSubstr"
-"hasThreadFuzzer"
-"hasToken"
-"hasTokenCaseInsensitive"
-"hasTokenCaseInsensitiveOrNull"
-"hasTokenOrNull"
-"hex"
-"hilbertDecode"
-"hilbertEncode"
-"histogram"
-"histogramArgMax"
-"histogramArgMin"
-"histogramArray"
-"histogramDistinct"
-"histogramForEach"
-"histogramIf"
-"histogramMap"
-"histogramMerge"
-"histogramNull"
-"histogramOrDefault"
-"histogramOrNull"
-"histogramResample"
-"histogramSimpleState"
-"histogramState"
-"hiveHash"
-"hop"
-"hopEnd"
-"hopStart"
-"hostName"
-"hostname"
-"hypot"
-"identity"
-"idnaDecode"
-"idnaEncode"
-"if"
-"ifNotFinite"
-"ifNull"
-"ignore"
-"ilike"
-"in"
-"inIgnoreSet"
-"indexHint"
-"indexOf"
-"initcap"
-"initcapUTF8"
-"initialQueryID"
-"initial_query_id"
-"initializeAggregation"
-"instr"
-"intDiv"
-"intDivOrZero"
-"intExp10"
-"intExp2"
-"intHash32"
-"intHash64"
-"intervalLengthSum"
-"intervalLengthSumArgMax"
-"intervalLengthSumArgMin"
-"intervalLengthSumArray"
-"intervalLengthSumDistinct"
-"intervalLengthSumForEach"
-"intervalLengthSumIf"
-"intervalLengthSumMap"
-"intervalLengthSumMerge"
-"intervalLengthSumNull"
-"intervalLengthSumOrDefault"
-"intervalLengthSumOrNull"
-"intervalLengthSumResample"
-"intervalLengthSumSimpleState"
-"intervalLengthSumState"
-"isConstant"
-"isDecimalOverflow"
-"isFinite"
-"isIPAddressInRange"
-"isIPv4String"
-"isIPv6String"
-"isInfinite"
-"isNaN"
-"isNotDistinctFrom"
-"isNotNull"
-"isNull"
-"isNullable"
-"isValidJSON"
-"isValidUTF8"
-"isZeroOrNull"
-"jaroSimilarity"
-"jaroWinklerSimilarity"
-"javaHash"
-"javaHashUTF16LE"
-"joinGet"
-"joinGetOrNull"
-"jsonMergePatch"
-"jumpConsistentHash"
-"kafkaMurmurHash"
-"kolmogorovSmirnovTest"
-"kolmogorovSmirnovTestArgMax"
-"kolmogorovSmirnovTestArgMin"
-"kolmogorovSmirnovTestArray"
-"kolmogorovSmirnovTestDistinct"
-"kolmogorovSmirnovTestForEach"
-"kolmogorovSmirnovTestIf"
-"kolmogorovSmirnovTestMap"
-"kolmogorovSmirnovTestMerge"
-"kolmogorovSmirnovTestNull"
-"kolmogorovSmirnovTestOrDefault"
-"kolmogorovSmirnovTestOrNull"
-"kolmogorovSmirnovTestResample"
-"kolmogorovSmirnovTestSimpleState"
-"kolmogorovSmirnovTestState"
-"kostikConsistentHash"
-"kql_array_sort_asc"
-"kql_array_sort_desc"
-"kurtPop"
-"kurtPopArgMax"
-"kurtPopArgMin"
-"kurtPopArray"
-"kurtPopDistinct"
-"kurtPopForEach"
-"kurtPopIf"
-"kurtPopMap"
-"kurtPopMerge"
-"kurtPopNull"
-"kurtPopOrDefault"
-"kurtPopOrNull"
-"kurtPopResample"
-"kurtPopSimpleState"
-"kurtPopState"
-"kurtSamp"
-"kurtSampArgMax"
-"kurtSampArgMin"
-"kurtSampArray"
-"kurtSampDistinct"
-"kurtSampForEach"
-"kurtSampIf"
-"kurtSampMap"
-"kurtSampMerge"
-"kurtSampNull"
-"kurtSampOrDefault"
-"kurtSampOrNull"
-"kurtSampResample"
-"kurtSampSimpleState"
-"kurtSampState"
-"lagInFrame"
-"lagInFrameArgMax"
-"lagInFrameArgMin"
-"lagInFrameArray"
-"lagInFrameDistinct"
-"lagInFrameForEach"
-"lagInFrameIf"
-"lagInFrameMap"
-"lagInFrameMerge"
-"lagInFrameNull"
-"lagInFrameOrDefault"
-"lagInFrameOrNull"
-"lagInFrameResample"
-"lagInFrameSimpleState"
-"lagInFrameState"
-"largestTriangleThreeBuckets"
-"largestTriangleThreeBucketsArgMax"
-"largestTriangleThreeBucketsArgMin"
-"largestTriangleThreeBucketsArray"
-"largestTriangleThreeBucketsDistinct"
-"largestTriangleThreeBucketsForEach"
-"largestTriangleThreeBucketsIf"
-"largestTriangleThreeBucketsMap"
-"largestTriangleThreeBucketsMerge"
-"largestTriangleThreeBucketsNull"
-"largestTriangleThreeBucketsOrDefault"
-"largestTriangleThreeBucketsOrNull"
-"largestTriangleThreeBucketsResample"
-"largestTriangleThreeBucketsSimpleState"
-"largestTriangleThreeBucketsState"
-"last_value"
-"last_valueArgMax"
-"last_valueArgMin"
-"last_valueArray"
-"last_valueDistinct"
-"last_valueForEach"
-"last_valueIf"
-"last_valueMap"
-"last_valueMerge"
-"last_valueNull"
-"last_valueOrDefault"
-"last_valueOrNull"
-"last_valueResample"
-"last_valueSimpleState"
-"last_valueState"
-"last_value_respect_nulls"
-"last_value_respect_nullsArgMax"
-"last_value_respect_nullsArgMin"
-"last_value_respect_nullsArray"
-"last_value_respect_nullsDistinct"
-"last_value_respect_nullsForEach"
-"last_value_respect_nullsIf"
-"last_value_respect_nullsMap"
-"last_value_respect_nullsMerge"
-"last_value_respect_nullsNull"
-"last_value_respect_nullsOrDefault"
-"last_value_respect_nullsOrNull"
-"last_value_respect_nullsResample"
-"last_value_respect_nullsSimpleState"
-"last_value_respect_nullsState"
-"lcase"
-"lcm"
-"leadInFrame"
-"leadInFrameArgMax"
-"leadInFrameArgMin"
-"leadInFrameArray"
-"leadInFrameDistinct"
-"leadInFrameForEach"
-"leadInFrameIf"
-"leadInFrameMap"
-"leadInFrameMerge"
-"leadInFrameNull"
-"leadInFrameOrDefault"
-"leadInFrameOrNull"
-"leadInFrameResample"
-"leadInFrameSimpleState"
-"leadInFrameState"
-"least"
-"left"
-"leftPad"
-"leftPadUTF8"
-"leftUTF8"
-"lemmatize"
-"length"
-"lengthUTF8"
-"less"
-"lessOrEquals"
-"levenshteinDistance"
-"levenshteinDistanceUTF8"
-"lgamma"
-"like"
-"ln"
-"locate"
-"log"
-"log10"
-"log1p"
-"log2"
-"logTrace"
-"lowCardinalityIndices"
-"lowCardinalityKeys"
-"lower"
-"lowerUTF8"
-"lpad"
-"ltrim"
-"lttb"
-"lttbArgMax"
-"lttbArgMin"
-"lttbArray"
-"lttbDistinct"
-"lttbForEach"
-"lttbIf"
-"lttbMap"
-"lttbMerge"
-"lttbNull"
-"lttbOrDefault"
-"lttbOrNull"
-"lttbResample"
-"lttbSimpleState"
-"lttbState"
-"makeDate"
-"makeDate32"
-"makeDateTime"
-"makeDateTime64"
-"mannWhitneyUTest"
-"mannWhitneyUTestArgMax"
-"mannWhitneyUTestArgMin"
-"mannWhitneyUTestArray"
-"mannWhitneyUTestDistinct"
-"mannWhitneyUTestForEach"
-"mannWhitneyUTestIf"
-"mannWhitneyUTestMap"
-"mannWhitneyUTestMerge"
-"mannWhitneyUTestNull"
-"mannWhitneyUTestOrDefault"
-"mannWhitneyUTestOrNull"
-"mannWhitneyUTestResample"
-"mannWhitneyUTestSimpleState"
-"mannWhitneyUTestState"
-"map"
-"mapAdd"
-"mapAll"
-"mapApply"
-"mapConcat"
-"mapContains"
-"mapContainsKeyLike"
-"mapExists"
-"mapExtractKeyLike"
-"mapFilter"
-"mapFromArrays"
-"mapFromString"
-"mapKeys"
-"mapPartialReverseSort"
-"mapPartialSort"
-"mapPopulateSeries"
-"mapReverseSort"
-"mapSort"
-"mapSubtract"
-"mapUpdate"
-"mapValues"
-"match"
-"materialize"
-"max"
-"max2"
-"maxArgMax"
-"maxArgMin"
-"maxArray"
-"maxDistinct"
-"maxForEach"
-"maxIf"
-"maxIntersections"
-"maxIntersectionsArgMax"
-"maxIntersectionsArgMin"
-"maxIntersectionsArray"
-"maxIntersectionsDistinct"
-"maxIntersectionsForEach"
-"maxIntersectionsIf"
-"maxIntersectionsMap"
-"maxIntersectionsMerge"
-"maxIntersectionsNull"
-"maxIntersectionsOrDefault"
-"maxIntersectionsOrNull"
-"maxIntersectionsPosition"
-"maxIntersectionsPositionArgMax"
-"maxIntersectionsPositionArgMin"
-"maxIntersectionsPositionArray"
-"maxIntersectionsPositionDistinct"
-"maxIntersectionsPositionForEach"
-"maxIntersectionsPositionIf"
-"maxIntersectionsPositionMap"
-"maxIntersectionsPositionMerge"
-"maxIntersectionsPositionNull"
-"maxIntersectionsPositionOrDefault"
-"maxIntersectionsPositionOrNull"
-"maxIntersectionsPositionResample"
-"maxIntersectionsPositionSimpleState"
-"maxIntersectionsPositionState"
-"maxIntersectionsResample"
-"maxIntersectionsSimpleState"
-"maxIntersectionsState"
-"maxMap"
-"maxMappedArrays"
-"maxMappedArraysArgMax"
-"maxMappedArraysArgMin"
-"maxMappedArraysArray"
-"maxMappedArraysDistinct"
-"maxMappedArraysForEach"
-"maxMappedArraysIf"
-"maxMappedArraysMap"
-"maxMappedArraysMerge"
-"maxMappedArraysNull"
-"maxMappedArraysOrDefault"
-"maxMappedArraysOrNull"
-"maxMappedArraysResample"
-"maxMappedArraysSimpleState"
-"maxMappedArraysState"
-"maxMerge"
-"maxNull"
-"maxOrDefault"
-"maxOrNull"
-"maxResample"
-"maxSimpleState"
-"maxState"
-"meanZTest"
-"meanZTestArgMax"
-"meanZTestArgMin"
-"meanZTestArray"
-"meanZTestDistinct"
-"meanZTestForEach"
-"meanZTestIf"
-"meanZTestMap"
-"meanZTestMerge"
-"meanZTestNull"
-"meanZTestOrDefault"
-"meanZTestOrNull"
-"meanZTestResample"
-"meanZTestSimpleState"
-"meanZTestState"
-"median"
-"medianArgMax"
-"medianArgMin"
-"medianArray"
-"medianBFloat16"
-"medianBFloat16ArgMax"
-"medianBFloat16ArgMin"
-"medianBFloat16Array"
-"medianBFloat16Distinct"
-"medianBFloat16ForEach"
-"medianBFloat16If"
-"medianBFloat16Map"
-"medianBFloat16Merge"
-"medianBFloat16Null"
-"medianBFloat16OrDefault"
-"medianBFloat16OrNull"
-"medianBFloat16Resample"
-"medianBFloat16SimpleState"
-"medianBFloat16State"
-"medianBFloat16Weighted"
-"medianBFloat16WeightedArgMax"
-"medianBFloat16WeightedArgMin"
-"medianBFloat16WeightedArray"
-"medianBFloat16WeightedDistinct"
-"medianBFloat16WeightedForEach"
-"medianBFloat16WeightedIf"
-"medianBFloat16WeightedMap"
-"medianBFloat16WeightedMerge"
-"medianBFloat16WeightedNull"
-"medianBFloat16WeightedOrDefault"
-"medianBFloat16WeightedOrNull"
-"medianBFloat16WeightedResample"
-"medianBFloat16WeightedSimpleState"
-"medianBFloat16WeightedState"
-"medianDD"
-"medianDDArgMax"
-"medianDDArgMin"
-"medianDDArray"
-"medianDDDistinct"
-"medianDDForEach"
-"medianDDIf"
-"medianDDMap"
-"medianDDMerge"
-"medianDDNull"
-"medianDDOrDefault"
-"medianDDOrNull"
-"medianDDResample"
-"medianDDSimpleState"
-"medianDDState"
-"medianDeterministic"
-"medianDeterministicArgMax"
-"medianDeterministicArgMin"
-"medianDeterministicArray"
-"medianDeterministicDistinct"
-"medianDeterministicForEach"
-"medianDeterministicIf"
-"medianDeterministicMap"
-"medianDeterministicMerge"
-"medianDeterministicNull"
-"medianDeterministicOrDefault"
-"medianDeterministicOrNull"
-"medianDeterministicResample"
-"medianDeterministicSimpleState"
-"medianDeterministicState"
-"medianDistinct"
-"medianExact"
-"medianExactArgMax"
-"medianExactArgMin"
-"medianExactArray"
-"medianExactDistinct"
-"medianExactForEach"
-"medianExactHigh"
-"medianExactHighArgMax"
-"medianExactHighArgMin"
-"medianExactHighArray"
-"medianExactHighDistinct"
-"medianExactHighForEach"
-"medianExactHighIf"
-"medianExactHighMap"
-"medianExactHighMerge"
-"medianExactHighNull"
-"medianExactHighOrDefault"
-"medianExactHighOrNull"
-"medianExactHighResample"
-"medianExactHighSimpleState"
-"medianExactHighState"
-"medianExactIf"
-"medianExactLow"
-"medianExactLowArgMax"
-"medianExactLowArgMin"
-"medianExactLowArray"
-"medianExactLowDistinct"
-"medianExactLowForEach"
-"medianExactLowIf"
-"medianExactLowMap"
-"medianExactLowMerge"
-"medianExactLowNull"
-"medianExactLowOrDefault"
-"medianExactLowOrNull"
-"medianExactLowResample"
-"medianExactLowSimpleState"
-"medianExactLowState"
-"medianExactMap"
-"medianExactMerge"
-"medianExactNull"
-"medianExactOrDefault"
-"medianExactOrNull"
-"medianExactResample"
-"medianExactSimpleState"
-"medianExactState"
-"medianExactWeighted"
-"medianExactWeightedArgMax"
-"medianExactWeightedArgMin"
-"medianExactWeightedArray"
-"medianExactWeightedDistinct"
-"medianExactWeightedForEach"
-"medianExactWeightedIf"
-"medianExactWeightedMap"
-"medianExactWeightedMerge"
-"medianExactWeightedNull"
-"medianExactWeightedOrDefault"
-"medianExactWeightedOrNull"
-"medianExactWeightedResample"
-"medianExactWeightedSimpleState"
-"medianExactWeightedState"
-"medianForEach"
-"medianGK"
-"medianGKArgMax"
-"medianGKArgMin"
-"medianGKArray"
-"medianGKDistinct"
-"medianGKForEach"
-"medianGKIf"
-"medianGKMap"
-"medianGKMerge"
-"medianGKNull"
-"medianGKOrDefault"
-"medianGKOrNull"
-"medianGKResample"
-"medianGKSimpleState"
-"medianGKState"
-"medianIf"
-"medianInterpolatedWeighted"
-"medianInterpolatedWeightedArgMax"
-"medianInterpolatedWeightedArgMin"
-"medianInterpolatedWeightedArray"
-"medianInterpolatedWeightedDistinct"
-"medianInterpolatedWeightedForEach"
-"medianInterpolatedWeightedIf"
-"medianInterpolatedWeightedMap"
-"medianInterpolatedWeightedMerge"
-"medianInterpolatedWeightedNull"
-"medianInterpolatedWeightedOrDefault"
-"medianInterpolatedWeightedOrNull"
-"medianInterpolatedWeightedResample"
-"medianInterpolatedWeightedSimpleState"
-"medianInterpolatedWeightedState"
-"medianMap"
-"medianMerge"
-"medianNull"
-"medianOrDefault"
-"medianOrNull"
-"medianResample"
-"medianSimpleState"
-"medianState"
-"medianTDigest"
-"medianTDigestArgMax"
-"medianTDigestArgMin"
-"medianTDigestArray"
-"medianTDigestDistinct"
-"medianTDigestForEach"
-"medianTDigestIf"
-"medianTDigestMap"
-"medianTDigestMerge"
-"medianTDigestNull"
-"medianTDigestOrDefault"
-"medianTDigestOrNull"
-"medianTDigestResample"
-"medianTDigestSimpleState"
-"medianTDigestState"
-"medianTDigestWeighted"
-"medianTDigestWeightedArgMax"
-"medianTDigestWeightedArgMin"
-"medianTDigestWeightedArray"
-"medianTDigestWeightedDistinct"
-"medianTDigestWeightedForEach"
-"medianTDigestWeightedIf"
-"medianTDigestWeightedMap"
-"medianTDigestWeightedMerge"
-"medianTDigestWeightedNull"
-"medianTDigestWeightedOrDefault"
-"medianTDigestWeightedOrNull"
-"medianTDigestWeightedResample"
-"medianTDigestWeightedSimpleState"
-"medianTDigestWeightedState"
-"medianTiming"
-"medianTimingArgMax"
-"medianTimingArgMin"
-"medianTimingArray"
-"medianTimingDistinct"
-"medianTimingForEach"
-"medianTimingIf"
-"medianTimingMap"
-"medianTimingMerge"
-"medianTimingNull"
-"medianTimingOrDefault"
-"medianTimingOrNull"
-"medianTimingResample"
-"medianTimingSimpleState"
-"medianTimingState"
-"medianTimingWeighted"
-"medianTimingWeightedArgMax"
-"medianTimingWeightedArgMin"
-"medianTimingWeightedArray"
-"medianTimingWeightedDistinct"
-"medianTimingWeightedForEach"
-"medianTimingWeightedIf"
-"medianTimingWeightedMap"
-"medianTimingWeightedMerge"
-"medianTimingWeightedNull"
-"medianTimingWeightedOrDefault"
-"medianTimingWeightedOrNull"
-"medianTimingWeightedResample"
-"medianTimingWeightedSimpleState"
-"medianTimingWeightedState"
-"metroHash64"
-"mid"
-"min"
-"min2"
-"minArgMax"
-"minArgMin"
-"minArray"
-"minDistinct"
-"minForEach"
-"minIf"
-"minMap"
-"minMappedArrays"
-"minMappedArraysArgMax"
-"minMappedArraysArgMin"
-"minMappedArraysArray"
-"minMappedArraysDistinct"
-"minMappedArraysForEach"
-"minMappedArraysIf"
-"minMappedArraysMap"
-"minMappedArraysMerge"
-"minMappedArraysNull"
-"minMappedArraysOrDefault"
-"minMappedArraysOrNull"
-"minMappedArraysResample"
-"minMappedArraysSimpleState"
-"minMappedArraysState"
-"minMerge"
-"minNull"
-"minOrDefault"
-"minOrNull"
-"minResample"
-"minSampleSizeContinous"
-"minSampleSizeContinuous"
-"minSampleSizeConversion"
-"minSimpleState"
-"minState"
-"minus"
-"mismatches"
-"mod"
-"modulo"
-"moduloLegacy"
-"moduloOrZero"
-"monthName"
-"mortonDecode"
-"mortonEncode"
-"multiFuzzyMatchAllIndices"
-"multiFuzzyMatchAny"
-"multiFuzzyMatchAnyIndex"
-"multiIf"
-"multiMatchAllIndices"
-"multiMatchAny"
-"multiMatchAnyIndex"
-"multiSearchAllPositions"
-"multiSearchAllPositionsCaseInsensitive"
-"multiSearchAllPositionsCaseInsensitiveUTF8"
-"multiSearchAllPositionsUTF8"
-"multiSearchAny"
-"multiSearchAnyCaseInsensitive"
-"multiSearchAnyCaseInsensitiveUTF8"
-"multiSearchAnyUTF8"
-"multiSearchFirstIndex"
-"multiSearchFirstIndexCaseInsensitive"
-"multiSearchFirstIndexCaseInsensitiveUTF8"
-"multiSearchFirstIndexUTF8"
-"multiSearchFirstPosition"
-"multiSearchFirstPositionCaseInsensitive"
-"multiSearchFirstPositionCaseInsensitiveUTF8"
-"multiSearchFirstPositionUTF8"
-"multiply"
-"multiplyDecimal"
-"murmurHash2_32"
-"murmurHash2_64"
-"murmurHash3_128"
-"murmurHash3_32"
-"murmurHash3_64"
-"negate"
-"neighbor"
-"nested"
-"netloc"
-"ngramDistance"
-"ngramDistanceCaseInsensitive"
-"ngramDistanceCaseInsensitiveUTF8"
-"ngramDistanceUTF8"
-"ngramMinHash"
-"ngramMinHashArg"
-"ngramMinHashArgCaseInsensitive"
-"ngramMinHashArgCaseInsensitiveUTF8"
-"ngramMinHashArgUTF8"
-"ngramMinHashCaseInsensitive"
-"ngramMinHashCaseInsensitiveUTF8"
-"ngramMinHashUTF8"
-"ngramSearch"
-"ngramSearchCaseInsensitive"
-"ngramSearchCaseInsensitiveUTF8"
-"ngramSearchUTF8"
-"ngramSimHash"
-"ngramSimHashCaseInsensitive"
-"ngramSimHashCaseInsensitiveUTF8"
-"ngramSimHashUTF8"
-"ngrams"
-"nonNegativeDerivative"
-"nonNegativeDerivativeArgMax"
-"nonNegativeDerivativeArgMin"
-"nonNegativeDerivativeArray"
-"nonNegativeDerivativeDistinct"
-"nonNegativeDerivativeForEach"
-"nonNegativeDerivativeIf"
-"nonNegativeDerivativeMap"
-"nonNegativeDerivativeMerge"
-"nonNegativeDerivativeNull"
-"nonNegativeDerivativeOrDefault"
-"nonNegativeDerivativeOrNull"
-"nonNegativeDerivativeResample"
-"nonNegativeDerivativeSimpleState"
-"nonNegativeDerivativeState"
-"normL1"
-"normL2"
-"normL2Squared"
-"normLinf"
-"normLp"
-"normalizeL1"
-"normalizeL2"
-"normalizeLinf"
-"normalizeLp"
-"normalizeQuery"
-"normalizeQueryKeepNames"
-"normalizeUTF8NFC"
-"normalizeUTF8NFD"
-"normalizeUTF8NFKC"
-"normalizeUTF8NFKD"
-"normalizedQueryHash"
-"normalizedQueryHashKeepNames"
-"not"
-"notEmpty"
-"notEquals"
-"notILike"
-"notIn"
-"notInIgnoreSet"
-"notLike"
-"notNullIn"
-"notNullInIgnoreSet"
-"nothing"
-"nothingArgMax"
-"nothingArgMin"
-"nothingArray"
-"nothingDistinct"
-"nothingForEach"
-"nothingIf"
-"nothingMap"
-"nothingMerge"
-"nothingNull"
-"nothingNull"
-"nothingNullArgMax"
-"nothingNullArgMin"
-"nothingNullArray"
-"nothingNullDistinct"
-"nothingNullForEach"
-"nothingNullIf"
-"nothingNullMap"
-"nothingNullMerge"
-"nothingNullNull"
-"nothingNullOrDefault"
-"nothingNullOrNull"
-"nothingNullResample"
-"nothingNullSimpleState"
-"nothingNullState"
-"nothingOrDefault"
-"nothingOrNull"
-"nothingResample"
-"nothingSimpleState"
-"nothingState"
-"nothingUInt64"
-"nothingUInt64ArgMax"
-"nothingUInt64ArgMin"
-"nothingUInt64Array"
-"nothingUInt64Distinct"
-"nothingUInt64ForEach"
-"nothingUInt64If"
-"nothingUInt64Map"
-"nothingUInt64Merge"
-"nothingUInt64Null"
-"nothingUInt64OrDefault"
-"nothingUInt64OrNull"
-"nothingUInt64Resample"
-"nothingUInt64SimpleState"
-"nothingUInt64State"
-"now"
-"now64"
-"nowInBlock"
-"nth_value"
-"nth_valueArgMax"
-"nth_valueArgMin"
-"nth_valueArray"
-"nth_valueDistinct"
-"nth_valueForEach"
-"nth_valueIf"
-"nth_valueMap"
-"nth_valueMerge"
-"nth_valueNull"
-"nth_valueOrDefault"
-"nth_valueOrNull"
-"nth_valueResample"
-"nth_valueSimpleState"
-"nth_valueState"
-"ntile"
-"ntileArgMax"
-"ntileArgMin"
-"ntileArray"
-"ntileDistinct"
-"ntileForEach"
-"ntileIf"
-"ntileMap"
-"ntileMerge"
-"ntileNull"
-"ntileOrDefault"
-"ntileOrNull"
-"ntileResample"
-"ntileSimpleState"
-"ntileState"
-"nullIf"
-"nullIn"
-"nullInIgnoreSet"
-"or"
-"parseDateTime"
-"parseDateTime32BestEffort"
-"parseDateTime32BestEffortOrNull"
-"parseDateTime32BestEffortOrZero"
-"parseDateTime64BestEffort"
-"parseDateTime64BestEffortOrNull"
-"parseDateTime64BestEffortOrZero"
-"parseDateTime64BestEffortUS"
-"parseDateTime64BestEffortUSOrNull"
-"parseDateTime64BestEffortUSOrZero"
-"parseDateTimeBestEffort"
-"parseDateTimeBestEffortOrNull"
-"parseDateTimeBestEffortOrZero"
-"parseDateTimeBestEffortUS"
-"parseDateTimeBestEffortUSOrNull"
-"parseDateTimeBestEffortUSOrZero"
-"parseDateTimeInJodaSyntax"
-"parseDateTimeInJodaSyntaxOrNull"
-"parseDateTimeInJodaSyntaxOrZero"
-"parseDateTimeOrNull"
-"parseDateTimeOrZero"
-"parseReadableSize"
-"parseReadableSizeOrNull"
-"parseReadableSizeOrZero"
-"parseTimeDelta"
-"partitionID"
-"partitionId"
-"path"
-"pathFull"
-"percentRank"
-"percentRankArgMax"
-"percentRankArgMin"
-"percentRankArray"
-"percentRankDistinct"
-"percentRankForEach"
-"percentRankIf"
-"percentRankMap"
-"percentRankMerge"
-"percentRankNull"
-"percentRankOrDefault"
-"percentRankOrNull"
-"percentRankResample"
-"percentRankSimpleState"
-"percentRankState"
-"percent_rank"
-"percent_rankArgMax"
-"percent_rankArgMin"
-"percent_rankArray"
-"percent_rankDistinct"
-"percent_rankForEach"
-"percent_rankIf"
-"percent_rankMap"
-"percent_rankMerge"
-"percent_rankNull"
-"percent_rankOrDefault"
-"percent_rankOrNull"
-"percent_rankResample"
-"percent_rankSimpleState"
-"percent_rankState"
-"pi"
-"plus"
-"pmod"
-"pointInEllipses"
-"pointInPolygon"
-"polygonAreaCartesian"
-"polygonAreaSpherical"
-"polygonConvexHullCartesian"
-"polygonPerimeterCartesian"
-"polygonPerimeterSpherical"
-"polygonsDistanceCartesian"
-"polygonsDistanceSpherical"
-"polygonsEqualsCartesian"
-"polygonsIntersectionCartesian"
-"polygonsIntersectionSpherical"
-"polygonsSymDifferenceCartesian"
-"polygonsSymDifferenceSpherical"
-"polygonsUnionCartesian"
-"polygonsUnionSpherical"
-"polygonsWithinCartesian"
-"polygonsWithinSpherical"
-"port"
-"portRFC"
-"position"
-"positionCaseInsensitive"
-"positionCaseInsensitiveUTF8"
-"positionUTF8"
-"positiveModulo"
-"positive_modulo"
-"pow"
-"power"
-"printf"
-"proportionsZTest"
-"protocol"
-"punycodeDecode"
-"punycodeEncode"
-"quantile"
-"quantileArgMax"
-"quantileArgMin"
-"quantileArray"
-"quantileBFloat16"
-"quantileBFloat16ArgMax"
-"quantileBFloat16ArgMin"
-"quantileBFloat16Array"
-"quantileBFloat16Distinct"
-"quantileBFloat16ForEach"
-"quantileBFloat16If"
-"quantileBFloat16Map"
-"quantileBFloat16Merge"
-"quantileBFloat16Null"
-"quantileBFloat16OrDefault"
-"quantileBFloat16OrNull"
-"quantileBFloat16Resample"
-"quantileBFloat16SimpleState"
-"quantileBFloat16State"
-"quantileBFloat16Weighted"
-"quantileBFloat16WeightedArgMax"
-"quantileBFloat16WeightedArgMin"
-"quantileBFloat16WeightedArray"
-"quantileBFloat16WeightedDistinct"
-"quantileBFloat16WeightedForEach"
-"quantileBFloat16WeightedIf"
-"quantileBFloat16WeightedMap"
-"quantileBFloat16WeightedMerge"
-"quantileBFloat16WeightedNull"
-"quantileBFloat16WeightedOrDefault"
-"quantileBFloat16WeightedOrNull"
-"quantileBFloat16WeightedResample"
-"quantileBFloat16WeightedSimpleState"
-"quantileBFloat16WeightedState"
-"quantileDD"
-"quantileDDArgMax"
-"quantileDDArgMin"
-"quantileDDArray"
-"quantileDDDistinct"
-"quantileDDForEach"
-"quantileDDIf"
-"quantileDDMap"
-"quantileDDMerge"
-"quantileDDNull"
-"quantileDDOrDefault"
-"quantileDDOrNull"
-"quantileDDResample"
-"quantileDDSimpleState"
-"quantileDDState"
-"quantileDeterministic"
-"quantileDeterministicArgMax"
-"quantileDeterministicArgMin"
-"quantileDeterministicArray"
-"quantileDeterministicDistinct"
-"quantileDeterministicForEach"
-"quantileDeterministicIf"
-"quantileDeterministicMap"
-"quantileDeterministicMerge"
-"quantileDeterministicNull"
-"quantileDeterministicOrDefault"
-"quantileDeterministicOrNull"
-"quantileDeterministicResample"
-"quantileDeterministicSimpleState"
-"quantileDeterministicState"
-"quantileDistinct"
-"quantileExact"
-"quantileExactArgMax"
-"quantileExactArgMin"
-"quantileExactArray"
-"quantileExactDistinct"
-"quantileExactExclusive"
-"quantileExactExclusiveArgMax"
-"quantileExactExclusiveArgMin"
-"quantileExactExclusiveArray"
-"quantileExactExclusiveDistinct"
-"quantileExactExclusiveForEach"
-"quantileExactExclusiveIf"
-"quantileExactExclusiveMap"
-"quantileExactExclusiveMerge"
-"quantileExactExclusiveNull"
-"quantileExactExclusiveOrDefault"
-"quantileExactExclusiveOrNull"
-"quantileExactExclusiveResample"
-"quantileExactExclusiveSimpleState"
-"quantileExactExclusiveState"
-"quantileExactForEach"
-"quantileExactHigh"
-"quantileExactHighArgMax"
-"quantileExactHighArgMin"
-"quantileExactHighArray"
-"quantileExactHighDistinct"
-"quantileExactHighForEach"
-"quantileExactHighIf"
-"quantileExactHighMap"
-"quantileExactHighMerge"
-"quantileExactHighNull"
-"quantileExactHighOrDefault"
-"quantileExactHighOrNull"
-"quantileExactHighResample"
-"quantileExactHighSimpleState"
-"quantileExactHighState"
-"quantileExactIf"
-"quantileExactInclusive"
-"quantileExactInclusiveArgMax"
-"quantileExactInclusiveArgMin"
-"quantileExactInclusiveArray"
-"quantileExactInclusiveDistinct"
-"quantileExactInclusiveForEach"
-"quantileExactInclusiveIf"
-"quantileExactInclusiveMap"
-"quantileExactInclusiveMerge"
-"quantileExactInclusiveNull"
-"quantileExactInclusiveOrDefault"
-"quantileExactInclusiveOrNull"
-"quantileExactInclusiveResample"
-"quantileExactInclusiveSimpleState"
-"quantileExactInclusiveState"
-"quantileExactLow"
-"quantileExactLowArgMax"
-"quantileExactLowArgMin"
-"quantileExactLowArray"
-"quantileExactLowDistinct"
-"quantileExactLowForEach"
-"quantileExactLowIf"
-"quantileExactLowMap"
-"quantileExactLowMerge"
-"quantileExactLowNull"
-"quantileExactLowOrDefault"
-"quantileExactLowOrNull"
-"quantileExactLowResample"
-"quantileExactLowSimpleState"
-"quantileExactLowState"
-"quantileExactMap"
-"quantileExactMerge"
-"quantileExactNull"
-"quantileExactOrDefault"
-"quantileExactOrNull"
-"quantileExactResample"
-"quantileExactSimpleState"
-"quantileExactState"
-"quantileExactWeighted"
-"quantileExactWeightedArgMax"
-"quantileExactWeightedArgMin"
-"quantileExactWeightedArray"
-"quantileExactWeightedDistinct"
-"quantileExactWeightedForEach"
-"quantileExactWeightedIf"
-"quantileExactWeightedMap"
-"quantileExactWeightedMerge"
-"quantileExactWeightedNull"
-"quantileExactWeightedOrDefault"
-"quantileExactWeightedOrNull"
-"quantileExactWeightedResample"
-"quantileExactWeightedSimpleState"
-"quantileExactWeightedState"
-"quantileForEach"
-"quantileGK"
-"quantileGKArgMax"
-"quantileGKArgMin"
-"quantileGKArray"
-"quantileGKDistinct"
-"quantileGKForEach"
-"quantileGKIf"
-"quantileGKMap"
-"quantileGKMerge"
-"quantileGKNull"
-"quantileGKOrDefault"
-"quantileGKOrNull"
-"quantileGKResample"
-"quantileGKSimpleState"
-"quantileGKState"
-"quantileIf"
-"quantileInterpolatedWeighted"
-"quantileInterpolatedWeightedArgMax"
-"quantileInterpolatedWeightedArgMin"
-"quantileInterpolatedWeightedArray"
-"quantileInterpolatedWeightedDistinct"
-"quantileInterpolatedWeightedForEach"
-"quantileInterpolatedWeightedIf"
-"quantileInterpolatedWeightedMap"
-"quantileInterpolatedWeightedMerge"
-"quantileInterpolatedWeightedNull"
-"quantileInterpolatedWeightedOrDefault"
-"quantileInterpolatedWeightedOrNull"
-"quantileInterpolatedWeightedResample"
-"quantileInterpolatedWeightedSimpleState"
-"quantileInterpolatedWeightedState"
-"quantileMap"
-"quantileMerge"
-"quantileNull"
-"quantileOrDefault"
-"quantileOrNull"
-"quantileResample"
-"quantileSimpleState"
-"quantileState"
-"quantileTDigest"
-"quantileTDigestArgMax"
-"quantileTDigestArgMin"
-"quantileTDigestArray"
-"quantileTDigestDistinct"
-"quantileTDigestForEach"
-"quantileTDigestIf"
-"quantileTDigestMap"
-"quantileTDigestMerge"
-"quantileTDigestNull"
-"quantileTDigestOrDefault"
-"quantileTDigestOrNull"
-"quantileTDigestResample"
-"quantileTDigestSimpleState"
-"quantileTDigestState"
-"quantileTDigestWeighted"
-"quantileTDigestWeightedArgMax"
-"quantileTDigestWeightedArgMin"
-"quantileTDigestWeightedArray"
-"quantileTDigestWeightedDistinct"
-"quantileTDigestWeightedForEach"
-"quantileTDigestWeightedIf"
-"quantileTDigestWeightedMap"
-"quantileTDigestWeightedMerge"
-"quantileTDigestWeightedNull"
-"quantileTDigestWeightedOrDefault"
-"quantileTDigestWeightedOrNull"
-"quantileTDigestWeightedResample"
-"quantileTDigestWeightedSimpleState"
-"quantileTDigestWeightedState"
-"quantileTiming"
-"quantileTimingArgMax"
-"quantileTimingArgMin"
-"quantileTimingArray"
-"quantileTimingDistinct"
-"quantileTimingForEach"
-"quantileTimingIf"
-"quantileTimingMap"
-"quantileTimingMerge"
-"quantileTimingNull"
-"quantileTimingOrDefault"
-"quantileTimingOrNull"
-"quantileTimingResample"
-"quantileTimingSimpleState"
-"quantileTimingState"
-"quantileTimingWeighted"
-"quantileTimingWeightedArgMax"
-"quantileTimingWeightedArgMin"
-"quantileTimingWeightedArray"
-"quantileTimingWeightedDistinct"
-"quantileTimingWeightedForEach"
-"quantileTimingWeightedIf"
-"quantileTimingWeightedMap"
-"quantileTimingWeightedMerge"
-"quantileTimingWeightedNull"
-"quantileTimingWeightedOrDefault"
-"quantileTimingWeightedOrNull"
-"quantileTimingWeightedResample"
-"quantileTimingWeightedSimpleState"
-"quantileTimingWeightedState"
-"quantiles"
-"quantilesArgMax"
-"quantilesArgMin"
-"quantilesArray"
-"quantilesBFloat16"
-"quantilesBFloat16ArgMax"
-"quantilesBFloat16ArgMin"
-"quantilesBFloat16Array"
-"quantilesBFloat16Distinct"
-"quantilesBFloat16ForEach"
-"quantilesBFloat16If"
-"quantilesBFloat16Map"
-"quantilesBFloat16Merge"
-"quantilesBFloat16Null"
-"quantilesBFloat16OrDefault"
-"quantilesBFloat16OrNull"
-"quantilesBFloat16Resample"
-"quantilesBFloat16SimpleState"
-"quantilesBFloat16State"
-"quantilesBFloat16Weighted"
-"quantilesBFloat16WeightedArgMax"
-"quantilesBFloat16WeightedArgMin"
-"quantilesBFloat16WeightedArray"
-"quantilesBFloat16WeightedDistinct"
-"quantilesBFloat16WeightedForEach"
-"quantilesBFloat16WeightedIf"
-"quantilesBFloat16WeightedMap"
-"quantilesBFloat16WeightedMerge"
-"quantilesBFloat16WeightedNull"
-"quantilesBFloat16WeightedOrDefault"
-"quantilesBFloat16WeightedOrNull"
-"quantilesBFloat16WeightedResample"
-"quantilesBFloat16WeightedSimpleState"
-"quantilesBFloat16WeightedState"
-"quantilesDD"
-"quantilesDDArgMax"
-"quantilesDDArgMin"
-"quantilesDDArray"
-"quantilesDDDistinct"
-"quantilesDDForEach"
-"quantilesDDIf"
-"quantilesDDMap"
-"quantilesDDMerge"
-"quantilesDDNull"
-"quantilesDDOrDefault"
-"quantilesDDOrNull"
-"quantilesDDResample"
-"quantilesDDSimpleState"
-"quantilesDDState"
-"quantilesDeterministic"
-"quantilesDeterministicArgMax"
-"quantilesDeterministicArgMin"
-"quantilesDeterministicArray"
-"quantilesDeterministicDistinct"
-"quantilesDeterministicForEach"
-"quantilesDeterministicIf"
-"quantilesDeterministicMap"
-"quantilesDeterministicMerge"
-"quantilesDeterministicNull"
-"quantilesDeterministicOrDefault"
-"quantilesDeterministicOrNull"
-"quantilesDeterministicResample"
-"quantilesDeterministicSimpleState"
-"quantilesDeterministicState"
-"quantilesDistinct"
-"quantilesExact"
-"quantilesExactArgMax"
-"quantilesExactArgMin"
-"quantilesExactArray"
-"quantilesExactDistinct"
-"quantilesExactExclusive"
-"quantilesExactExclusiveArgMax"
-"quantilesExactExclusiveArgMin"
-"quantilesExactExclusiveArray"
-"quantilesExactExclusiveDistinct"
-"quantilesExactExclusiveForEach"
-"quantilesExactExclusiveIf"
-"quantilesExactExclusiveMap"
-"quantilesExactExclusiveMerge"
-"quantilesExactExclusiveNull"
-"quantilesExactExclusiveOrDefault"
-"quantilesExactExclusiveOrNull"
-"quantilesExactExclusiveResample"
-"quantilesExactExclusiveSimpleState"
-"quantilesExactExclusiveState"
-"quantilesExactForEach"
-"quantilesExactHigh"
-"quantilesExactHighArgMax"
-"quantilesExactHighArgMin"
-"quantilesExactHighArray"
-"quantilesExactHighDistinct"
-"quantilesExactHighForEach"
-"quantilesExactHighIf"
-"quantilesExactHighMap"
-"quantilesExactHighMerge"
-"quantilesExactHighNull"
-"quantilesExactHighOrDefault"
-"quantilesExactHighOrNull"
-"quantilesExactHighResample"
-"quantilesExactHighSimpleState"
-"quantilesExactHighState"
-"quantilesExactIf"
-"quantilesExactInclusive"
-"quantilesExactInclusiveArgMax"
-"quantilesExactInclusiveArgMin"
-"quantilesExactInclusiveArray"
-"quantilesExactInclusiveDistinct"
-"quantilesExactInclusiveForEach"
-"quantilesExactInclusiveIf"
-"quantilesExactInclusiveMap"
-"quantilesExactInclusiveMerge"
-"quantilesExactInclusiveNull"
-"quantilesExactInclusiveOrDefault"
-"quantilesExactInclusiveOrNull"
-"quantilesExactInclusiveResample"
-"quantilesExactInclusiveSimpleState"
-"quantilesExactInclusiveState"
-"quantilesExactLow"
-"quantilesExactLowArgMax"
-"quantilesExactLowArgMin"
-"quantilesExactLowArray"
-"quantilesExactLowDistinct"
-"quantilesExactLowForEach"
-"quantilesExactLowIf"
-"quantilesExactLowMap"
-"quantilesExactLowMerge"
-"quantilesExactLowNull"
-"quantilesExactLowOrDefault"
-"quantilesExactLowOrNull"
-"quantilesExactLowResample"
-"quantilesExactLowSimpleState"
-"quantilesExactLowState"
-"quantilesExactMap"
-"quantilesExactMerge"
-"quantilesExactNull"
-"quantilesExactOrDefault"
-"quantilesExactOrNull"
-"quantilesExactResample"
-"quantilesExactSimpleState"
-"quantilesExactState"
-"quantilesExactWeighted"
-"quantilesExactWeightedArgMax"
-"quantilesExactWeightedArgMin"
-"quantilesExactWeightedArray"
-"quantilesExactWeightedDistinct"
-"quantilesExactWeightedForEach"
-"quantilesExactWeightedIf"
-"quantilesExactWeightedMap"
-"quantilesExactWeightedMerge"
-"quantilesExactWeightedNull"
-"quantilesExactWeightedOrDefault"
-"quantilesExactWeightedOrNull"
-"quantilesExactWeightedResample"
-"quantilesExactWeightedSimpleState"
-"quantilesExactWeightedState"
-"quantilesForEach"
-"quantilesGK"
-"quantilesGKArgMax"
-"quantilesGKArgMin"
-"quantilesGKArray"
-"quantilesGKDistinct"
-"quantilesGKForEach"
-"quantilesGKIf"
-"quantilesGKMap"
-"quantilesGKMerge"
-"quantilesGKNull"
-"quantilesGKOrDefault"
-"quantilesGKOrNull"
-"quantilesGKResample"
-"quantilesGKSimpleState"
-"quantilesGKState"
-"quantilesIf"
-"quantilesInterpolatedWeighted"
-"quantilesInterpolatedWeightedArgMax"
-"quantilesInterpolatedWeightedArgMin"
-"quantilesInterpolatedWeightedArray"
-"quantilesInterpolatedWeightedDistinct"
-"quantilesInterpolatedWeightedForEach"
-"quantilesInterpolatedWeightedIf"
-"quantilesInterpolatedWeightedMap"
-"quantilesInterpolatedWeightedMerge"
-"quantilesInterpolatedWeightedNull"
-"quantilesInterpolatedWeightedOrDefault"
-"quantilesInterpolatedWeightedOrNull"
-"quantilesInterpolatedWeightedResample"
-"quantilesInterpolatedWeightedSimpleState"
-"quantilesInterpolatedWeightedState"
-"quantilesMap"
-"quantilesMerge"
-"quantilesNull"
-"quantilesOrDefault"
-"quantilesOrNull"
-"quantilesResample"
-"quantilesSimpleState"
-"quantilesState"
-"quantilesTDigest"
-"quantilesTDigestArgMax"
-"quantilesTDigestArgMin"
-"quantilesTDigestArray"
-"quantilesTDigestDistinct"
-"quantilesTDigestForEach"
-"quantilesTDigestIf"
-"quantilesTDigestMap"
-"quantilesTDigestMerge"
-"quantilesTDigestNull"
-"quantilesTDigestOrDefault"
-"quantilesTDigestOrNull"
-"quantilesTDigestResample"
-"quantilesTDigestSimpleState"
-"quantilesTDigestState"
-"quantilesTDigestWeighted"
-"quantilesTDigestWeightedArgMax"
-"quantilesTDigestWeightedArgMin"
-"quantilesTDigestWeightedArray"
-"quantilesTDigestWeightedDistinct"
-"quantilesTDigestWeightedForEach"
-"quantilesTDigestWeightedIf"
-"quantilesTDigestWeightedMap"
-"quantilesTDigestWeightedMerge"
-"quantilesTDigestWeightedNull"
-"quantilesTDigestWeightedOrDefault"
-"quantilesTDigestWeightedOrNull"
-"quantilesTDigestWeightedResample"
-"quantilesTDigestWeightedSimpleState"
-"quantilesTDigestWeightedState"
-"quantilesTiming"
-"quantilesTimingArgMax"
-"quantilesTimingArgMin"
-"quantilesTimingArray"
-"quantilesTimingDistinct"
-"quantilesTimingForEach"
-"quantilesTimingIf"
-"quantilesTimingMap"
-"quantilesTimingMerge"
-"quantilesTimingNull"
-"quantilesTimingOrDefault"
-"quantilesTimingOrNull"
-"quantilesTimingResample"
-"quantilesTimingSimpleState"
-"quantilesTimingState"
-"quantilesTimingWeighted"
-"quantilesTimingWeightedArgMax"
-"quantilesTimingWeightedArgMin"
-"quantilesTimingWeightedArray"
-"quantilesTimingWeightedDistinct"
-"quantilesTimingWeightedForEach"
-"quantilesTimingWeightedIf"
-"quantilesTimingWeightedMap"
-"quantilesTimingWeightedMerge"
-"quantilesTimingWeightedNull"
-"quantilesTimingWeightedOrDefault"
-"quantilesTimingWeightedOrNull"
-"quantilesTimingWeightedResample"
-"quantilesTimingWeightedSimpleState"
-"quantilesTimingWeightedState"
-"queryID"
-"queryString"
-"queryStringAndFragment"
-"query_id"
-"radians"
-"rand"
-"rand32"
-"rand64"
-"randBernoulli"
-"randBinomial"
-"randCanonical"
-"randChiSquared"
-"randConstant"
-"randExponential"
-"randFisherF"
-"randLogNormal"
-"randNegativeBinomial"
-"randNormal"
-"randPoisson"
-"randStudentT"
-"randUniform"
-"randomFixedString"
-"randomPrintableASCII"
-"randomString"
-"randomStringUTF8"
-"range"
-"rank"
-"rankArgMax"
-"rankArgMin"
-"rankArray"
-"rankCorr"
-"rankCorrArgMax"
-"rankCorrArgMin"
-"rankCorrArray"
-"rankCorrDistinct"
-"rankCorrForEach"
-"rankCorrIf"
-"rankCorrMap"
-"rankCorrMerge"
-"rankCorrNull"
-"rankCorrOrDefault"
-"rankCorrOrNull"
-"rankCorrResample"
-"rankCorrSimpleState"
-"rankCorrState"
-"rankDistinct"
-"rankForEach"
-"rankIf"
-"rankMap"
-"rankMerge"
-"rankNull"
-"rankOrDefault"
-"rankOrNull"
-"rankResample"
-"rankSimpleState"
-"rankState"
-"readWKTLineString"
-"readWKTMultiLineString"
-"readWKTMultiPolygon"
-"readWKTPoint"
-"readWKTPolygon"
-"readWKTRing"
-"regexpExtract"
-"regexpQuoteMeta"
-"regionHierarchy"
-"regionIn"
-"regionToArea"
-"regionToCity"
-"regionToContinent"
-"regionToCountry"
-"regionToDistrict"
-"regionToName"
-"regionToPopulation"
-"regionToTopContinent"
-"reinterpret"
-"reinterpretAsDate"
-"reinterpretAsDateTime"
-"reinterpretAsFixedString"
-"reinterpretAsFloat32"
-"reinterpretAsFloat64"
-"reinterpretAsInt128"
-"reinterpretAsInt16"
-"reinterpretAsInt256"
-"reinterpretAsInt32"
-"reinterpretAsInt64"
-"reinterpretAsInt8"
-"reinterpretAsString"
-"reinterpretAsUInt128"
-"reinterpretAsUInt16"
-"reinterpretAsUInt256"
-"reinterpretAsUInt32"
-"reinterpretAsUInt64"
-"reinterpretAsUInt8"
-"reinterpretAsUUID"
-"repeat"
-"replace"
-"replaceAll"
-"replaceOne"
-"replaceRegexpAll"
-"replaceRegexpOne"
-"replicate"
-"retention"
-"retentionArgMax"
-"retentionArgMin"
-"retentionArray"
-"retentionDistinct"
-"retentionForEach"
-"retentionIf"
-"retentionMap"
-"retentionMerge"
-"retentionNull"
-"retentionOrDefault"
-"retentionOrNull"
-"retentionResample"
-"retentionSimpleState"
-"retentionState"
-"reverse"
-"reverseUTF8"
-"revision"
-"right"
-"rightPad"
-"rightPadUTF8"
-"rightUTF8"
-"round"
-"roundAge"
-"roundBankers"
-"roundDown"
-"roundDuration"
-"roundToExp2"
-"rowNumberInAllBlocks"
-"rowNumberInBlock"
-"row_number"
-"row_numberArgMax"
-"row_numberArgMin"
-"row_numberArray"
-"row_numberDistinct"
-"row_numberForEach"
-"row_numberIf"
-"row_numberMap"
-"row_numberMerge"
-"row_numberNull"
-"row_numberOrDefault"
-"row_numberOrNull"
-"row_numberResample"
-"row_numberSimpleState"
-"row_numberState"
-"rpad"
-"rtrim"
-"runningAccumulate"
-"runningConcurrency"
-"runningDifference"
-"runningDifferenceStartingWithFirstValue"
-"s2CapContains"
-"s2CapUnion"
-"s2CellsIntersect"
-"s2GetNeighbors"
-"s2RectAdd"
-"s2RectContains"
-"s2RectIntersection"
-"s2RectUnion"
-"s2ToGeo"
-"scalarProduct"
-"sequenceCount"
-"sequenceCountArgMax"
-"sequenceCountArgMin"
-"sequenceCountArray"
-"sequenceCountDistinct"
-"sequenceCountForEach"
-"sequenceCountIf"
-"sequenceCountMap"
-"sequenceCountMerge"
-"sequenceCountNull"
-"sequenceCountOrDefault"
-"sequenceCountOrNull"
-"sequenceCountResample"
-"sequenceCountSimpleState"
-"sequenceCountState"
-"sequenceMatch"
-"sequenceMatchArgMax"
-"sequenceMatchArgMin"
-"sequenceMatchArray"
-"sequenceMatchDistinct"
-"sequenceMatchForEach"
-"sequenceMatchIf"
-"sequenceMatchMap"
-"sequenceMatchMerge"
-"sequenceMatchNull"
-"sequenceMatchOrDefault"
-"sequenceMatchOrNull"
-"sequenceMatchResample"
-"sequenceMatchSimpleState"
-"sequenceMatchState"
-"sequenceNextNode"
-"sequenceNextNodeArgMax"
-"sequenceNextNodeArgMin"
-"sequenceNextNodeArray"
-"sequenceNextNodeDistinct"
-"sequenceNextNodeForEach"
-"sequenceNextNodeIf"
-"sequenceNextNodeMap"
-"sequenceNextNodeMerge"
-"sequenceNextNodeNull"
-"sequenceNextNodeOrDefault"
-"sequenceNextNodeOrNull"
-"sequenceNextNodeResample"
-"sequenceNextNodeSimpleState"
-"sequenceNextNodeState"
-"seriesDecomposeSTL"
-"seriesOutliersDetectTukey"
-"seriesPeriodDetectFFT"
-"serverTimeZone"
-"serverTimezone"
-"serverUUID"
-"shardCount"
-"shardNum"
-"showCertificate"
-"sigmoid"
-"sign"
-"simpleJSONExtractBool"
-"simpleJSONExtractFloat"
-"simpleJSONExtractInt"
-"simpleJSONExtractRaw"
-"simpleJSONExtractString"
-"simpleJSONExtractUInt"
-"simpleJSONHas"
-"simpleLinearRegression"
-"simpleLinearRegressionArgMax"
-"simpleLinearRegressionArgMin"
-"simpleLinearRegressionArray"
-"simpleLinearRegressionDistinct"
-"simpleLinearRegressionForEach"
-"simpleLinearRegressionIf"
-"simpleLinearRegressionMap"
-"simpleLinearRegressionMerge"
-"simpleLinearRegressionNull"
-"simpleLinearRegressionOrDefault"
-"simpleLinearRegressionOrNull"
-"simpleLinearRegressionResample"
-"simpleLinearRegressionSimpleState"
-"simpleLinearRegressionState"
-"sin"
-"singleValueOrNull"
-"singleValueOrNullArgMax"
-"singleValueOrNullArgMin"
-"singleValueOrNullArray"
-"singleValueOrNullDistinct"
-"singleValueOrNullForEach"
-"singleValueOrNullIf"
-"singleValueOrNullMap"
-"singleValueOrNullMerge"
-"singleValueOrNullNull"
-"singleValueOrNullOrDefault"
-"singleValueOrNullOrNull"
-"singleValueOrNullResample"
-"singleValueOrNullSimpleState"
-"singleValueOrNullState"
-"sinh"
-"sipHash128"
-"sipHash128Keyed"
-"sipHash128Reference"
-"sipHash128ReferenceKeyed"
-"sipHash64"
-"sipHash64Keyed"
-"skewPop"
-"skewPopArgMax"
-"skewPopArgMin"
-"skewPopArray"
-"skewPopDistinct"
-"skewPopForEach"
-"skewPopIf"
-"skewPopMap"
-"skewPopMerge"
-"skewPopNull"
-"skewPopOrDefault"
-"skewPopOrNull"
-"skewPopResample"
-"skewPopSimpleState"
-"skewPopState"
-"skewSamp"
-"skewSampArgMax"
-"skewSampArgMin"
-"skewSampArray"
-"skewSampDistinct"
-"skewSampForEach"
-"skewSampIf"
-"skewSampMap"
-"skewSampMerge"
-"skewSampNull"
-"skewSampOrDefault"
-"skewSampOrNull"
-"skewSampResample"
-"skewSampSimpleState"
-"skewSampState"
-"sleep"
-"sleepEachRow"
-"snowflakeIDToDateTime"
-"snowflakeIDToDateTime64"
-"snowflakeToDateTime"
-"snowflakeToDateTime64"
-"soundex"
-"space"
-"sparkBar"
-"sparkBarArgMax"
-"sparkBarArgMin"
-"sparkBarArray"
-"sparkBarDistinct"
-"sparkBarForEach"
-"sparkBarIf"
-"sparkBarMap"
-"sparkBarMerge"
-"sparkBarNull"
-"sparkBarOrDefault"
-"sparkBarOrNull"
-"sparkBarResample"
-"sparkBarSimpleState"
-"sparkBarState"
-"sparkbar"
-"sparkbarArgMax"
-"sparkbarArgMin"
-"sparkbarArray"
-"sparkbarDistinct"
-"sparkbarForEach"
-"sparkbarIf"
-"sparkbarMap"
-"sparkbarMerge"
-"sparkbarNull"
-"sparkbarOrDefault"
-"sparkbarOrNull"
-"sparkbarResample"
-"sparkbarSimpleState"
-"sparkbarState"
-"splitByAlpha"
-"splitByChar"
-"splitByNonAlpha"
-"splitByRegexp"
-"splitByString"
-"splitByWhitespace"
-"sqid"
-"sqidDecode"
-"sqidEncode"
-"sqrt"
-"startsWith"
-"startsWithUTF8"
-"stddevPop"
-"stddevPopArgMax"
-"stddevPopArgMin"
-"stddevPopArray"
-"stddevPopDistinct"
-"stddevPopForEach"
-"stddevPopIf"
-"stddevPopMap"
-"stddevPopMerge"
-"stddevPopNull"
-"stddevPopOrDefault"
-"stddevPopOrNull"
-"stddevPopResample"
-"stddevPopSimpleState"
-"stddevPopStable"
-"stddevPopStableArgMax"
-"stddevPopStableArgMin"
-"stddevPopStableArray"
-"stddevPopStableDistinct"
-"stddevPopStableForEach"
-"stddevPopStableIf"
-"stddevPopStableMap"
-"stddevPopStableMerge"
-"stddevPopStableNull"
-"stddevPopStableOrDefault"
-"stddevPopStableOrNull"
-"stddevPopStableResample"
-"stddevPopStableSimpleState"
-"stddevPopStableState"
-"stddevPopState"
-"stddevSamp"
-"stddevSampArgMax"
-"stddevSampArgMin"
-"stddevSampArray"
-"stddevSampDistinct"
-"stddevSampForEach"
-"stddevSampIf"
-"stddevSampMap"
-"stddevSampMerge"
-"stddevSampNull"
-"stddevSampOrDefault"
-"stddevSampOrNull"
-"stddevSampResample"
-"stddevSampSimpleState"
-"stddevSampStable"
-"stddevSampStableArgMax"
-"stddevSampStableArgMin"
-"stddevSampStableArray"
-"stddevSampStableDistinct"
-"stddevSampStableForEach"
-"stddevSampStableIf"
-"stddevSampStableMap"
-"stddevSampStableMerge"
-"stddevSampStableNull"
-"stddevSampStableOrDefault"
-"stddevSampStableOrNull"
-"stddevSampStableResample"
-"stddevSampStableSimpleState"
-"stddevSampStableState"
-"stddevSampState"
-"stem"
-"stochasticLinearRegression"
-"stochasticLinearRegressionArgMax"
-"stochasticLinearRegressionArgMin"
-"stochasticLinearRegressionArray"
-"stochasticLinearRegressionDistinct"
-"stochasticLinearRegressionForEach"
-"stochasticLinearRegressionIf"
-"stochasticLinearRegressionMap"
-"stochasticLinearRegressionMerge"
-"stochasticLinearRegressionNull"
-"stochasticLinearRegressionOrDefault"
-"stochasticLinearRegressionOrNull"
-"stochasticLinearRegressionResample"
-"stochasticLinearRegressionSimpleState"
-"stochasticLinearRegressionState"
-"stochasticLogisticRegression"
-"stochasticLogisticRegressionArgMax"
-"stochasticLogisticRegressionArgMin"
-"stochasticLogisticRegressionArray"
-"stochasticLogisticRegressionDistinct"
-"stochasticLogisticRegressionForEach"
-"stochasticLogisticRegressionIf"
-"stochasticLogisticRegressionMap"
-"stochasticLogisticRegressionMerge"
-"stochasticLogisticRegressionNull"
-"stochasticLogisticRegressionOrDefault"
-"stochasticLogisticRegressionOrNull"
-"stochasticLogisticRegressionResample"
-"stochasticLogisticRegressionSimpleState"
-"stochasticLogisticRegressionState"
-"str_to_date"
-"str_to_map"
-"stringJaccardIndex"
-"stringJaccardIndexUTF8"
-"stringToH3"
-"structureToCapnProtoSchema"
-"structureToProtobufSchema"
-"studentTTest"
-"studentTTestArgMax"
-"studentTTestArgMin"
-"studentTTestArray"
-"studentTTestDistinct"
-"studentTTestForEach"
-"studentTTestIf"
-"studentTTestMap"
-"studentTTestMerge"
-"studentTTestNull"
-"studentTTestOrDefault"
-"studentTTestOrNull"
-"studentTTestResample"
-"studentTTestSimpleState"
-"studentTTestState"
-"subBitmap"
-"subDate"
-"substr"
-"substring"
-"substringIndex"
-"substringIndexUTF8"
-"substringUTF8"
-"subtractDays"
-"subtractHours"
-"subtractInterval"
-"subtractMicroseconds"
-"subtractMilliseconds"
-"subtractMinutes"
-"subtractMonths"
-"subtractNanoseconds"
-"subtractQuarters"
-"subtractSeconds"
-"subtractTupleOfIntervals"
-"subtractWeeks"
-"subtractYears"
-"sum"
-"sumArgMax"
-"sumArgMin"
-"sumArray"
-"sumCount"
-"sumCountArgMax"
-"sumCountArgMin"
-"sumCountArray"
-"sumCountDistinct"
-"sumCountForEach"
-"sumCountIf"
-"sumCountMap"
-"sumCountMerge"
-"sumCountNull"
-"sumCountOrDefault"
-"sumCountOrNull"
-"sumCountResample"
-"sumCountSimpleState"
-"sumCountState"
-"sumDistinct"
-"sumForEach"
-"sumIf"
-"sumKahan"
-"sumKahanArgMax"
-"sumKahanArgMin"
-"sumKahanArray"
-"sumKahanDistinct"
-"sumKahanForEach"
-"sumKahanIf"
-"sumKahanMap"
-"sumKahanMerge"
-"sumKahanNull"
-"sumKahanOrDefault"
-"sumKahanOrNull"
-"sumKahanResample"
-"sumKahanSimpleState"
-"sumKahanState"
-"sumMap"
-"sumMapFiltered"
-"sumMapFilteredArgMax"
-"sumMapFilteredArgMin"
-"sumMapFilteredArray"
-"sumMapFilteredDistinct"
-"sumMapFilteredForEach"
-"sumMapFilteredIf"
-"sumMapFilteredMap"
-"sumMapFilteredMerge"
-"sumMapFilteredNull"
-"sumMapFilteredOrDefault"
-"sumMapFilteredOrNull"
-"sumMapFilteredResample"
-"sumMapFilteredSimpleState"
-"sumMapFilteredState"
-"sumMapFilteredWithOverflow"
-"sumMapFilteredWithOverflowArgMax"
-"sumMapFilteredWithOverflowArgMin"
-"sumMapFilteredWithOverflowArray"
-"sumMapFilteredWithOverflowDistinct"
-"sumMapFilteredWithOverflowForEach"
-"sumMapFilteredWithOverflowIf"
-"sumMapFilteredWithOverflowMap"
-"sumMapFilteredWithOverflowMerge"
-"sumMapFilteredWithOverflowNull"
-"sumMapFilteredWithOverflowOrDefault"
-"sumMapFilteredWithOverflowOrNull"
-"sumMapFilteredWithOverflowResample"
-"sumMapFilteredWithOverflowSimpleState"
-"sumMapFilteredWithOverflowState"
-"sumMapWithOverflow"
-"sumMapWithOverflowArgMax"
-"sumMapWithOverflowArgMin"
-"sumMapWithOverflowArray"
-"sumMapWithOverflowDistinct"
-"sumMapWithOverflowForEach"
-"sumMapWithOverflowIf"
-"sumMapWithOverflowMap"
-"sumMapWithOverflowMerge"
-"sumMapWithOverflowNull"
-"sumMapWithOverflowOrDefault"
-"sumMapWithOverflowOrNull"
-"sumMapWithOverflowResample"
-"sumMapWithOverflowSimpleState"
-"sumMapWithOverflowState"
-"sumMappedArrays"
-"sumMappedArraysArgMax"
-"sumMappedArraysArgMin"
-"sumMappedArraysArray"
-"sumMappedArraysDistinct"
-"sumMappedArraysForEach"
-"sumMappedArraysIf"
-"sumMappedArraysMap"
-"sumMappedArraysMerge"
-"sumMappedArraysNull"
-"sumMappedArraysOrDefault"
-"sumMappedArraysOrNull"
-"sumMappedArraysResample"
-"sumMappedArraysSimpleState"
-"sumMappedArraysState"
-"sumMerge"
-"sumNull"
-"sumOrDefault"
-"sumOrNull"
-"sumResample"
-"sumSimpleState"
-"sumState"
-"sumWithOverflow"
-"sumWithOverflowArgMax"
-"sumWithOverflowArgMin"
-"sumWithOverflowArray"
-"sumWithOverflowDistinct"
-"sumWithOverflowForEach"
-"sumWithOverflowIf"
-"sumWithOverflowMap"
-"sumWithOverflowMerge"
-"sumWithOverflowNull"
-"sumWithOverflowOrDefault"
-"sumWithOverflowOrNull"
-"sumWithOverflowResample"
-"sumWithOverflowSimpleState"
-"sumWithOverflowState"
-"svg"
-"synonyms"
-"tan"
-"tanh"
-"tcpPort"
-"tgamma"
-"theilsU"
-"theilsUArgMax"
-"theilsUArgMin"
-"theilsUArray"
-"theilsUDistinct"
-"theilsUForEach"
-"theilsUIf"
-"theilsUMap"
-"theilsUMerge"
-"theilsUNull"
-"theilsUOrDefault"
-"theilsUOrNull"
-"theilsUResample"
-"theilsUSimpleState"
-"theilsUState"
-"throwIf"
-"tid"
-"timeDiff"
-"timeSlot"
-"timeSlots"
-"timeZone"
-"timeZoneOf"
-"timeZoneOffset"
-"timestamp"
-"timestampDiff"
-"timestamp_diff"
-"timezone"
-"timezoneOf"
-"timezoneOffset"
-"toBool"
-"toColumnTypeName"
-"toDate"
-"toDate32"
-"toDate32OrDefault"
-"toDate32OrNull"
-"toDate32OrZero"
-"toDateOrDefault"
-"toDateOrNull"
-"toDateOrZero"
-"toDateTime"
-"toDateTime32"
-"toDateTime64"
-"toDateTime64OrDefault"
-"toDateTime64OrNull"
-"toDateTime64OrZero"
-"toDateTimeOrDefault"
-"toDateTimeOrNull"
-"toDateTimeOrZero"
-"toDayOfMonth"
-"toDayOfWeek"
-"toDayOfYear"
-"toDaysSinceYearZero"
-"toDecimal128"
-"toDecimal128OrDefault"
-"toDecimal128OrNull"
-"toDecimal128OrZero"
-"toDecimal256"
-"toDecimal256OrDefault"
-"toDecimal256OrNull"
-"toDecimal256OrZero"
-"toDecimal32"
-"toDecimal32OrDefault"
-"toDecimal32OrNull"
-"toDecimal32OrZero"
-"toDecimal64"
-"toDecimal64OrDefault"
-"toDecimal64OrNull"
-"toDecimal64OrZero"
-"toDecimalString"
-"toFixedString"
-"toFloat32"
-"toFloat32OrDefault"
-"toFloat32OrNull"
-"toFloat32OrZero"
-"toFloat64"
-"toFloat64OrDefault"
-"toFloat64OrNull"
-"toFloat64OrZero"
-"toHour"
-"toIPv4"
-"toIPv4OrDefault"
-"toIPv4OrNull"
-"toIPv4OrZero"
-"toIPv6"
-"toIPv6OrDefault"
-"toIPv6OrNull"
-"toIPv6OrZero"
-"toISOWeek"
-"toISOYear"
-"toInt128"
-"toInt128OrDefault"
-"toInt128OrNull"
-"toInt128OrZero"
-"toInt16"
-"toInt16OrDefault"
-"toInt16OrNull"
-"toInt16OrZero"
-"toInt256"
-"toInt256OrDefault"
-"toInt256OrNull"
-"toInt256OrZero"
-"toInt32"
-"toInt32OrDefault"
-"toInt32OrNull"
-"toInt32OrZero"
-"toInt64"
-"toInt64OrDefault"
-"toInt64OrNull"
-"toInt64OrZero"
-"toInt8"
-"toInt8OrDefault"
-"toInt8OrNull"
-"toInt8OrZero"
-"toIntervalDay"
-"toIntervalHour"
-"toIntervalMicrosecond"
-"toIntervalMillisecond"
-"toIntervalMinute"
-"toIntervalMonth"
-"toIntervalNanosecond"
-"toIntervalQuarter"
-"toIntervalSecond"
-"toIntervalWeek"
-"toIntervalYear"
-"toJSONString"
-"toLastDayOfMonth"
-"toLastDayOfWeek"
-"toLowCardinality"
-"toMillisecond"
-"toMinute"
-"toModifiedJulianDay"
-"toModifiedJulianDayOrNull"
-"toMonday"
-"toMonth"
-"toNullable"
-"toQuarter"
-"toRelativeDayNum"
-"toRelativeHourNum"
-"toRelativeMinuteNum"
-"toRelativeMonthNum"
-"toRelativeQuarterNum"
-"toRelativeSecondNum"
-"toRelativeWeekNum"
-"toRelativeYearNum"
-"toSecond"
-"toStartOfDay"
-"toStartOfFifteenMinutes"
-"toStartOfFiveMinute"
-"toStartOfFiveMinutes"
-"toStartOfHour"
-"toStartOfISOYear"
-"toStartOfInterval"
-"toStartOfMicrosecond"
-"toStartOfMillisecond"
-"toStartOfMinute"
-"toStartOfMonth"
-"toStartOfNanosecond"
-"toStartOfQuarter"
-"toStartOfSecond"
-"toStartOfTenMinutes"
-"toStartOfWeek"
-"toStartOfYear"
-"toString"
-"toStringCutToZero"
-"toTime"
-"toTimeZone"
-"toTimezone"
-"toTypeName"
-"toUInt128"
-"toUInt128OrDefault"
-"toUInt128OrNull"
-"toUInt128OrZero"
-"toUInt16"
-"toUInt16OrDefault"
-"toUInt16OrNull"
-"toUInt16OrZero"
-"toUInt256"
-"toUInt256OrDefault"
-"toUInt256OrNull"
-"toUInt256OrZero"
-"toUInt32"
-"toUInt32OrDefault"
-"toUInt32OrNull"
-"toUInt32OrZero"
-"toUInt64"
-"toUInt64OrDefault"
-"toUInt64OrNull"
-"toUInt64OrZero"
-"toUInt8"
-"toUInt8OrDefault"
-"toUInt8OrNull"
-"toUInt8OrZero"
-"toUTCTimestamp"
-"toUUID"
-"toUUIDOrDefault"
-"toUUIDOrNull"
-"toUUIDOrZero"
-"toUnixTimestamp"
-"toUnixTimestamp64Micro"
-"toUnixTimestamp64Milli"
-"toUnixTimestamp64Nano"
-"toValidUTF8"
-"toWeek"
-"toYYYYMM"
-"toYYYYMMDD"
-"toYYYYMMDDhhmmss"
-"toYear"
-"toYearWeek"
-"to_utc_timestamp"
-"today"
-"tokens"
-"topK"
-"topKArgMax"
-"topKArgMin"
-"topKArray"
-"topKDistinct"
-"topKForEach"
-"topKIf"
-"topKMap"
-"topKMerge"
-"topKNull"
-"topKOrDefault"
-"topKOrNull"
-"topKResample"
-"topKSimpleState"
-"topKState"
-"topKWeighted"
-"topKWeightedArgMax"
-"topKWeightedArgMin"
-"topKWeightedArray"
-"topKWeightedDistinct"
-"topKWeightedForEach"
-"topKWeightedIf"
-"topKWeightedMap"
-"topKWeightedMerge"
-"topKWeightedNull"
-"topKWeightedOrDefault"
-"topKWeightedOrNull"
-"topKWeightedResample"
-"topKWeightedSimpleState"
-"topKWeightedState"
-"topLevelDomain"
-"topLevelDomainRFC"
-"transactionID"
-"transactionLatestSnapshot"
-"transactionOldestSnapshot"
-"transform"
-"translate"
-"translateUTF8"
-"trim"
-"trimBoth"
-"trimLeft"
-"trimRight"
-"trunc"
-"truncate"
-"tryBase58Decode"
-"tryBase64Decode"
-"tryBase64URLDecode"
-"tryDecrypt"
-"tryIdnaEncode"
-"tryPunycodeDecode"
-"tumble"
-"tumbleEnd"
-"tumbleStart"
-"tuple"
-"tupleConcat"
-"tupleDivide"
-"tupleDivideByNumber"
-"tupleElement"
-"tupleHammingDistance"
-"tupleIntDiv"
-"tupleIntDivByNumber"
-"tupleIntDivOrZero"
-"tupleIntDivOrZeroByNumber"
-"tupleMinus"
-"tupleModulo"
-"tupleModuloByNumber"
-"tupleMultiply"
-"tupleMultiplyByNumber"
-"tupleNames"
-"tupleNegate"
-"tuplePlus"
-"tupleToNameValuePairs"
-"ucase"
-"unbin"
-"unhex"
-"uniq"
-"uniqArgMax"
-"uniqArgMin"
-"uniqArray"
-"uniqCombined"
-"uniqCombined64"
-"uniqCombined64ArgMax"
-"uniqCombined64ArgMin"
-"uniqCombined64Array"
-"uniqCombined64Distinct"
-"uniqCombined64ForEach"
-"uniqCombined64If"
-"uniqCombined64Map"
-"uniqCombined64Merge"
-"uniqCombined64Null"
-"uniqCombined64OrDefault"
-"uniqCombined64OrNull"
-"uniqCombined64Resample"
-"uniqCombined64SimpleState"
-"uniqCombined64State"
-"uniqCombinedArgMax"
-"uniqCombinedArgMin"
-"uniqCombinedArray"
-"uniqCombinedDistinct"
-"uniqCombinedForEach"
-"uniqCombinedIf"
-"uniqCombinedMap"
-"uniqCombinedMerge"
-"uniqCombinedNull"
-"uniqCombinedOrDefault"
-"uniqCombinedOrNull"
-"uniqCombinedResample"
-"uniqCombinedSimpleState"
-"uniqCombinedState"
-"uniqDistinct"
-"uniqExact"
-"uniqExactArgMax"
-"uniqExactArgMin"
-"uniqExactArray"
-"uniqExactDistinct"
-"uniqExactForEach"
-"uniqExactIf"
-"uniqExactMap"
-"uniqExactMerge"
-"uniqExactNull"
-"uniqExactOrDefault"
-"uniqExactOrNull"
-"uniqExactResample"
-"uniqExactSimpleState"
-"uniqExactState"
-"uniqForEach"
-"uniqHLL12"
-"uniqHLL12ArgMax"
-"uniqHLL12ArgMin"
-"uniqHLL12Array"
-"uniqHLL12Distinct"
-"uniqHLL12ForEach"
-"uniqHLL12If"
-"uniqHLL12Map"
-"uniqHLL12Merge"
-"uniqHLL12Null"
-"uniqHLL12OrDefault"
-"uniqHLL12OrNull"
-"uniqHLL12Resample"
-"uniqHLL12SimpleState"
-"uniqHLL12State"
-"uniqIf"
-"uniqMap"
-"uniqMerge"
-"uniqNull"
-"uniqOrDefault"
-"uniqOrNull"
-"uniqResample"
-"uniqSimpleState"
-"uniqState"
-"uniqTheta"
-"uniqThetaArgMax"
-"uniqThetaArgMin"
-"uniqThetaArray"
-"uniqThetaDistinct"
-"uniqThetaForEach"
-"uniqThetaIf"
-"uniqThetaIntersect"
-"uniqThetaMap"
-"uniqThetaMerge"
-"uniqThetaNot"
-"uniqThetaNull"
-"uniqThetaOrDefault"
-"uniqThetaOrNull"
-"uniqThetaResample"
-"uniqThetaSimpleState"
-"uniqThetaState"
-"uniqThetaUnion"
-"uniqUpTo"
-"uniqUpToArgMax"
-"uniqUpToArgMin"
-"uniqUpToArray"
-"uniqUpToDistinct"
-"uniqUpToForEach"
-"uniqUpToIf"
-"uniqUpToMap"
-"uniqUpToMerge"
-"uniqUpToNull"
-"uniqUpToOrDefault"
-"uniqUpToOrNull"
-"uniqUpToResample"
-"uniqUpToSimpleState"
-"uniqUpToState"
-"upper"
-"upperUTF8"
-"uptime"
-"user"
-"validateNestedArraySizes"
-"varPop"
-"varPopArgMax"
-"varPopArgMin"
-"varPopArray"
-"varPopDistinct"
-"varPopForEach"
-"varPopIf"
-"varPopMap"
-"varPopMerge"
-"varPopNull"
-"varPopOrDefault"
-"varPopOrNull"
-"varPopResample"
-"varPopSimpleState"
-"varPopStable"
-"varPopStableArgMax"
-"varPopStableArgMin"
-"varPopStableArray"
-"varPopStableDistinct"
-"varPopStableForEach"
-"varPopStableIf"
-"varPopStableMap"
-"varPopStableMerge"
-"varPopStableNull"
-"varPopStableOrDefault"
-"varPopStableOrNull"
-"varPopStableResample"
-"varPopStableSimpleState"
-"varPopStableState"
-"varPopState"
-"varSamp"
-"varSampArgMax"
-"varSampArgMin"
-"varSampArray"
-"varSampDistinct"
-"varSampForEach"
-"varSampIf"
-"varSampMap"
-"varSampMerge"
-"varSampNull"
-"varSampOrDefault"
-"varSampOrNull"
-"varSampResample"
-"varSampSimpleState"
-"varSampStable"
-"varSampStableArgMax"
-"varSampStableArgMin"
-"varSampStableArray"
-"varSampStableDistinct"
-"varSampStableForEach"
-"varSampStableIf"
-"varSampStableMap"
-"varSampStableMerge"
-"varSampStableNull"
-"varSampStableOrDefault"
-"varSampStableOrNull"
-"varSampStableResample"
-"varSampStableSimpleState"
-"varSampStableState"
-"varSampState"
-"variantElement"
-"variantType"
-"vectorDifference"
-"vectorSum"
-"version"
-"visibleWidth"
-"visitParamExtractBool"
-"visitParamExtractFloat"
-"visitParamExtractInt"
-"visitParamExtractRaw"
-"visitParamExtractString"
-"visitParamExtractUInt"
-"visitParamHas"
-"week"
-"welchTTest"
-"welchTTestArgMax"
-"welchTTestArgMin"
-"welchTTestArray"
-"welchTTestDistinct"
-"welchTTestForEach"
-"welchTTestIf"
-"welchTTestMap"
-"welchTTestMerge"
-"welchTTestNull"
-"welchTTestOrDefault"
-"welchTTestOrNull"
-"welchTTestResample"
-"welchTTestSimpleState"
-"welchTTestState"
-"widthBucket"
-"width_bucket"
-"windowFunnel"
-"windowFunnelArgMax"
-"windowFunnelArgMin"
-"windowFunnelArray"
-"windowFunnelDistinct"
-"windowFunnelForEach"
-"windowFunnelIf"
-"windowFunnelMap"
-"windowFunnelMerge"
-"windowFunnelNull"
-"windowFunnelOrDefault"
-"windowFunnelOrNull"
-"windowFunnelResample"
-"windowFunnelSimpleState"
-"windowFunnelState"
-"windowID"
-"wkt"
-"wordShingleMinHash"
-"wordShingleMinHashArg"
-"wordShingleMinHashArgCaseInsensitive"
-"wordShingleMinHashArgCaseInsensitiveUTF8"
-"wordShingleMinHashArgUTF8"
-"wordShingleMinHashCaseInsensitive"
-"wordShingleMinHashCaseInsensitiveUTF8"
-"wordShingleMinHashUTF8"
-"wordShingleSimHash"
-"wordShingleSimHashCaseInsensitive"
-"wordShingleSimHashCaseInsensitiveUTF8"
-"wordShingleSimHashUTF8"
-"wyHash64"
-"xor"
-"xxHash32"
-"xxHash64"
-"xxh3"
-"yandexConsistentHash"
-"yearweek"
-"yesterday"
-"zookeeperSessionUptime"
diff --git a/tests/fuzz/update_dict.sh b/tests/fuzz/update_dict.sh
new file mode 100755
index 00000000000..a83c9167129
--- /dev/null
+++ b/tests/fuzz/update_dict.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+set -euo pipefail
+
+SCRIPT_DIR=$(dirname "$(realpath "$0")")
+ROOT_PATH="$(git rev-parse --show-toplevel)"
+CLICKHOUSE_BIN="${CLICKHOUSE_BIN:-$ROOT_PATH/build/programs/clickhouse}"
+DICTIONARIES_DIR="$SCRIPT_DIR/dictionaries"
+
+echo "Generating functions dict"
+$CLICKHOUSE_BIN local -q "SELECT * FROM (SELECT DISTINCT concat('\"', name, '\"') as res FROM system.functions ORDER BY name UNION ALL SELECT concat('\"', a.name, b.name, '\"') as res FROM system.functions as a CROSS JOIN system.aggregate_function_combinators as b WHERE a.is_aggregate = 1) ORDER BY res" > "$DICTIONARIES_DIR/functions.dict"
+
+echo "Generating data types dict"
+$CLICKHOUSE_BIN local -q "SELECT DISTINCT concat('\"', name, '\"') as res FROM system.data_type_families ORDER BY name" > "$DICTIONARIES_DIR/datatypes.dict"
+
+echo "Generating keywords dict"
+$CLICKHOUSE_BIN local -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > "$DICTIONARIES_DIR/keywords.dict"
+
+echo "Merging dictionaries into all.dict"
+cat "$DICTIONARIES_DIR"/* | LC_ALL=C sort | uniq > "$SCRIPT_DIR/all.dict"
\ No newline at end of file

From 20e20b97c9bddf3a88b0ba8e3a388fe54496fcb2 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 13 Aug 2024 16:20:27 +0200
Subject: [PATCH 250/265] Fix test storage_join_direct_join

---
 tests/performance/storage_join_direct_join.xml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/performance/storage_join_direct_join.xml b/tests/performance/storage_join_direct_join.xml
index 867108ac2b7..0e67abb275e 100644
--- a/tests/performance/storage_join_direct_join.xml
+++ b/tests/performance/storage_join_direct_join.xml
@@ -16,4 +16,7 @@
     <query>SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null;</query>
     <query>SELECT keys.key, value1 FROM keys ANY LEFT JOIN dict AS d ON (keys.key = d.key) FORMAT Null SETTINGS
         allow_experimental_analyzer=1</query>
+
+    <drop_query>DROP TABLE IF EXISTS keys</drop_query>
+    <drop_query>DROP TABLE IF EXISTS dict</drop_query>
 </test>

From 0593650565d717c773fec265056256de98f86f7f Mon Sep 17 00:00:00 2001
From: Pablo Marcos <pablo.marcos.oltra@clickhouse.com>
Date: Tue, 13 Aug 2024 14:30:42 +0000
Subject: [PATCH 251/265] Update dict files

Previous commit had the files from the script ran
from the root directory, which returned an error.
---
 tests/fuzz/dictionaries/functions.dict | 4283 ++++++++++++++++++++++++
 1 file changed, 4283 insertions(+)

diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict
index e69de29bb2d..e562595fb67 100644
--- a/tests/fuzz/dictionaries/functions.dict
+++ b/tests/fuzz/dictionaries/functions.dict
@@ -0,0 +1,4283 @@
+"BIT_AND"
+"BIT_ANDArgMax"
+"BIT_ANDArgMin"
+"BIT_ANDArray"
+"BIT_ANDDistinct"
+"BIT_ANDForEach"
+"BIT_ANDIf"
+"BIT_ANDMap"
+"BIT_ANDMerge"
+"BIT_ANDNull"
+"BIT_ANDOrDefault"
+"BIT_ANDOrNull"
+"BIT_ANDResample"
+"BIT_ANDSimpleState"
+"BIT_ANDState"
+"BIT_OR"
+"BIT_ORArgMax"
+"BIT_ORArgMin"
+"BIT_ORArray"
+"BIT_ORDistinct"
+"BIT_ORForEach"
+"BIT_ORIf"
+"BIT_ORMap"
+"BIT_ORMerge"
+"BIT_ORNull"
+"BIT_OROrDefault"
+"BIT_OROrNull"
+"BIT_ORResample"
+"BIT_ORSimpleState"
+"BIT_ORState"
+"BIT_XOR"
+"BIT_XORArgMax"
+"BIT_XORArgMin"
+"BIT_XORArray"
+"BIT_XORDistinct"
+"BIT_XORForEach"
+"BIT_XORIf"
+"BIT_XORMap"
+"BIT_XORMerge"
+"BIT_XORNull"
+"BIT_XOROrDefault"
+"BIT_XOROrNull"
+"BIT_XORResample"
+"BIT_XORSimpleState"
+"BIT_XORState"
+"BLAKE3"
+"CAST"
+"CHARACTER_LENGTH"
+"CHAR_LENGTH"
+"COVAR_POP"
+"COVAR_POPArgMax"
+"COVAR_POPArgMin"
+"COVAR_POPArray"
+"COVAR_POPDistinct"
+"COVAR_POPForEach"
+"COVAR_POPIf"
+"COVAR_POPMap"
+"COVAR_POPMerge"
+"COVAR_POPNull"
+"COVAR_POPOrDefault"
+"COVAR_POPOrNull"
+"COVAR_POPResample"
+"COVAR_POPSimpleState"
+"COVAR_POPState"
+"COVAR_SAMP"
+"COVAR_SAMPArgMax"
+"COVAR_SAMPArgMin"
+"COVAR_SAMPArray"
+"COVAR_SAMPDistinct"
+"COVAR_SAMPForEach"
+"COVAR_SAMPIf"
+"COVAR_SAMPMap"
+"COVAR_SAMPMerge"
+"COVAR_SAMPNull"
+"COVAR_SAMPOrDefault"
+"COVAR_SAMPOrNull"
+"COVAR_SAMPResample"
+"COVAR_SAMPSimpleState"
+"COVAR_SAMPState"
+"CRC32"
+"CRC32IEEE"
+"CRC64"
+"DATABASE"
+"DATE"
+"DATE_DIFF"
+"DATE_FORMAT"
+"DATE_TRUNC"
+"DAY"
+"DAYOFMONTH"
+"DAYOFWEEK"
+"DAYOFYEAR"
+"FORMAT_BYTES"
+"FQDN"
+"FROM_BASE64"
+"FROM_DAYS"
+"FROM_UNIXTIME"
+"HOUR"
+"INET6_ATON"
+"INET6_NTOA"
+"INET_ATON"
+"INET_NTOA"
+"IPv4CIDRToRange"
+"IPv4NumToString"
+"IPv4NumToStringClassC"
+"IPv4StringToNum"
+"IPv4StringToNumOrDefault"
+"IPv4StringToNumOrNull"
+"IPv4ToIPv6"
+"IPv6CIDRToRange"
+"IPv6NumToString"
+"IPv6StringToNum"
+"IPv6StringToNumOrDefault"
+"IPv6StringToNumOrNull"
+"JSONArrayLength"
+"JSONExtract"
+"JSONExtractArrayRaw"
+"JSONExtractBool"
+"JSONExtractFloat"
+"JSONExtractInt"
+"JSONExtractKeys"
+"JSONExtractKeysAndValues"
+"JSONExtractKeysAndValuesRaw"
+"JSONExtractRaw"
+"JSONExtractString"
+"JSONExtractUInt"
+"JSONHas"
+"JSONKey"
+"JSONLength"
+"JSONMergePatch"
+"JSONType"
+"JSON_ARRAY_LENGTH"
+"JSON_EXISTS"
+"JSON_QUERY"
+"JSON_VALUE"
+"L1Distance"
+"L1Norm"
+"L1Normalize"
+"L2Distance"
+"L2Norm"
+"L2Normalize"
+"L2SquaredDistance"
+"L2SquaredNorm"
+"LAST_DAY"
+"LinfDistance"
+"LinfNorm"
+"LinfNormalize"
+"LpDistance"
+"LpNorm"
+"LpNormalize"
+"MACNumToString"
+"MACStringToNum"
+"MACStringToOUI"
+"MAP_FROM_ARRAYS"
+"MD4"
+"MD5"
+"MILLISECOND"
+"MINUTE"
+"MONTH"
+"OCTET_LENGTH"
+"QUARTER"
+"REGEXP_EXTRACT"
+"REGEXP_MATCHES"
+"REGEXP_REPLACE"
+"SCHEMA"
+"SECOND"
+"SHA1"
+"SHA224"
+"SHA256"
+"SHA384"
+"SHA512"
+"SHA512_256"
+"STD"
+"STDArgMax"
+"STDArgMin"
+"STDArray"
+"STDDEV_POP"
+"STDDEV_POPArgMax"
+"STDDEV_POPArgMin"
+"STDDEV_POPArray"
+"STDDEV_POPDistinct"
+"STDDEV_POPForEach"
+"STDDEV_POPIf"
+"STDDEV_POPMap"
+"STDDEV_POPMerge"
+"STDDEV_POPNull"
+"STDDEV_POPOrDefault"
+"STDDEV_POPOrNull"
+"STDDEV_POPResample"
+"STDDEV_POPSimpleState"
+"STDDEV_POPState"
+"STDDEV_SAMP"
+"STDDEV_SAMPArgMax"
+"STDDEV_SAMPArgMin"
+"STDDEV_SAMPArray"
+"STDDEV_SAMPDistinct"
+"STDDEV_SAMPForEach"
+"STDDEV_SAMPIf"
+"STDDEV_SAMPMap"
+"STDDEV_SAMPMerge"
+"STDDEV_SAMPNull"
+"STDDEV_SAMPOrDefault"
+"STDDEV_SAMPOrNull"
+"STDDEV_SAMPResample"
+"STDDEV_SAMPSimpleState"
+"STDDEV_SAMPState"
+"STDDistinct"
+"STDForEach"
+"STDIf"
+"STDMap"
+"STDMerge"
+"STDNull"
+"STDOrDefault"
+"STDOrNull"
+"STDResample"
+"STDSimpleState"
+"STDState"
+"SUBSTRING_INDEX"
+"SVG"
+"TIMESTAMP_DIFF"
+"TO_BASE64"
+"TO_DAYS"
+"TO_UNIXTIME"
+"ULIDStringToDateTime"
+"URLHash"
+"URLHierarchy"
+"URLPathHierarchy"
+"UTCTimestamp"
+"UTC_timestamp"
+"UUIDNumToString"
+"UUIDStringToNum"
+"UUIDToNum"
+"UUIDv7ToDateTime"
+"VAR_POP"
+"VAR_POPArgMax"
+"VAR_POPArgMin"
+"VAR_POPArray"
+"VAR_POPDistinct"
+"VAR_POPForEach"
+"VAR_POPIf"
+"VAR_POPMap"
+"VAR_POPMerge"
+"VAR_POPNull"
+"VAR_POPOrDefault"
+"VAR_POPOrNull"
+"VAR_POPResample"
+"VAR_POPSimpleState"
+"VAR_POPState"
+"VAR_SAMP"
+"VAR_SAMPArgMax"
+"VAR_SAMPArgMin"
+"VAR_SAMPArray"
+"VAR_SAMPDistinct"
+"VAR_SAMPForEach"
+"VAR_SAMPIf"
+"VAR_SAMPMap"
+"VAR_SAMPMerge"
+"VAR_SAMPNull"
+"VAR_SAMPOrDefault"
+"VAR_SAMPOrNull"
+"VAR_SAMPResample"
+"VAR_SAMPSimpleState"
+"VAR_SAMPState"
+"YEAR"
+"YYYYMMDDToDate"
+"YYYYMMDDToDate32"
+"YYYYMMDDhhmmssToDateTime"
+"YYYYMMDDhhmmssToDateTime64"
+"_CAST"
+"__actionName"
+"__bitBoolMaskAnd"
+"__bitBoolMaskOr"
+"__bitSwapLastTwo"
+"__bitWrapperFunc"
+"__getScalar"
+"__scalarSubqueryResult"
+"abs"
+"accurateCast"
+"accurateCastOrDefault"
+"accurateCastOrNull"
+"acos"
+"acosh"
+"addDate"
+"addDays"
+"addHours"
+"addInterval"
+"addMicroseconds"
+"addMilliseconds"
+"addMinutes"
+"addMonths"
+"addNanoseconds"
+"addQuarters"
+"addSeconds"
+"addTupleOfIntervals"
+"addWeeks"
+"addYears"
+"addressToLine"
+"addressToLineWithInlines"
+"addressToSymbol"
+"aes_decrypt_mysql"
+"aes_encrypt_mysql"
+"age"
+"aggThrow"
+"aggThrowArgMax"
+"aggThrowArgMin"
+"aggThrowArray"
+"aggThrowDistinct"
+"aggThrowForEach"
+"aggThrowIf"
+"aggThrowMap"
+"aggThrowMerge"
+"aggThrowNull"
+"aggThrowOrDefault"
+"aggThrowOrNull"
+"aggThrowResample"
+"aggThrowSimpleState"
+"aggThrowState"
+"alphaTokens"
+"analysisOfVariance"
+"analysisOfVarianceArgMax"
+"analysisOfVarianceArgMin"
+"analysisOfVarianceArray"
+"analysisOfVarianceDistinct"
+"analysisOfVarianceForEach"
+"analysisOfVarianceIf"
+"analysisOfVarianceMap"
+"analysisOfVarianceMerge"
+"analysisOfVarianceNull"
+"analysisOfVarianceOrDefault"
+"analysisOfVarianceOrNull"
+"analysisOfVarianceResample"
+"analysisOfVarianceSimpleState"
+"analysisOfVarianceState"
+"and"
+"anova"
+"anovaArgMax"
+"anovaArgMin"
+"anovaArray"
+"anovaDistinct"
+"anovaForEach"
+"anovaIf"
+"anovaMap"
+"anovaMerge"
+"anovaNull"
+"anovaOrDefault"
+"anovaOrNull"
+"anovaResample"
+"anovaSimpleState"
+"anovaState"
+"any"
+"anyArgMax"
+"anyArgMin"
+"anyArray"
+"anyDistinct"
+"anyForEach"
+"anyHeavy"
+"anyHeavyArgMax"
+"anyHeavyArgMin"
+"anyHeavyArray"
+"anyHeavyDistinct"
+"anyHeavyForEach"
+"anyHeavyIf"
+"anyHeavyMap"
+"anyHeavyMerge"
+"anyHeavyNull"
+"anyHeavyOrDefault"
+"anyHeavyOrNull"
+"anyHeavyResample"
+"anyHeavySimpleState"
+"anyHeavyState"
+"anyIf"
+"anyLast"
+"anyLastArgMax"
+"anyLastArgMin"
+"anyLastArray"
+"anyLastDistinct"
+"anyLastForEach"
+"anyLastIf"
+"anyLastMap"
+"anyLastMerge"
+"anyLastNull"
+"anyLastOrDefault"
+"anyLastOrNull"
+"anyLastResample"
+"anyLastSimpleState"
+"anyLastState"
+"anyLast_respect_nulls"
+"anyLast_respect_nullsArgMax"
+"anyLast_respect_nullsArgMin"
+"anyLast_respect_nullsArray"
+"anyLast_respect_nullsDistinct"
+"anyLast_respect_nullsForEach"
+"anyLast_respect_nullsIf"
+"anyLast_respect_nullsMap"
+"anyLast_respect_nullsMerge"
+"anyLast_respect_nullsNull"
+"anyLast_respect_nullsOrDefault"
+"anyLast_respect_nullsOrNull"
+"anyLast_respect_nullsResample"
+"anyLast_respect_nullsSimpleState"
+"anyLast_respect_nullsState"
+"anyMap"
+"anyMerge"
+"anyNull"
+"anyOrDefault"
+"anyOrNull"
+"anyResample"
+"anySimpleState"
+"anyState"
+"any_respect_nulls"
+"any_respect_nullsArgMax"
+"any_respect_nullsArgMin"
+"any_respect_nullsArray"
+"any_respect_nullsDistinct"
+"any_respect_nullsForEach"
+"any_respect_nullsIf"
+"any_respect_nullsMap"
+"any_respect_nullsMerge"
+"any_respect_nullsNull"
+"any_respect_nullsOrDefault"
+"any_respect_nullsOrNull"
+"any_respect_nullsResample"
+"any_respect_nullsSimpleState"
+"any_respect_nullsState"
+"any_value"
+"any_valueArgMax"
+"any_valueArgMin"
+"any_valueArray"
+"any_valueDistinct"
+"any_valueForEach"
+"any_valueIf"
+"any_valueMap"
+"any_valueMerge"
+"any_valueNull"
+"any_valueOrDefault"
+"any_valueOrNull"
+"any_valueResample"
+"any_valueSimpleState"
+"any_valueState"
+"any_value_respect_nulls"
+"any_value_respect_nullsArgMax"
+"any_value_respect_nullsArgMin"
+"any_value_respect_nullsArray"
+"any_value_respect_nullsDistinct"
+"any_value_respect_nullsForEach"
+"any_value_respect_nullsIf"
+"any_value_respect_nullsMap"
+"any_value_respect_nullsMerge"
+"any_value_respect_nullsNull"
+"any_value_respect_nullsOrDefault"
+"any_value_respect_nullsOrNull"
+"any_value_respect_nullsResample"
+"any_value_respect_nullsSimpleState"
+"any_value_respect_nullsState"
+"appendTrailingCharIfAbsent"
+"approx_top_count"
+"approx_top_countArgMax"
+"approx_top_countArgMin"
+"approx_top_countArray"
+"approx_top_countDistinct"
+"approx_top_countForEach"
+"approx_top_countIf"
+"approx_top_countMap"
+"approx_top_countMerge"
+"approx_top_countNull"
+"approx_top_countOrDefault"
+"approx_top_countOrNull"
+"approx_top_countResample"
+"approx_top_countSimpleState"
+"approx_top_countState"
+"approx_top_k"
+"approx_top_kArgMax"
+"approx_top_kArgMin"
+"approx_top_kArray"
+"approx_top_kDistinct"
+"approx_top_kForEach"
+"approx_top_kIf"
+"approx_top_kMap"
+"approx_top_kMerge"
+"approx_top_kNull"
+"approx_top_kOrDefault"
+"approx_top_kOrNull"
+"approx_top_kResample"
+"approx_top_kSimpleState"
+"approx_top_kState"
+"approx_top_sum"
+"approx_top_sumArgMax"
+"approx_top_sumArgMin"
+"approx_top_sumArray"
+"approx_top_sumDistinct"
+"approx_top_sumForEach"
+"approx_top_sumIf"
+"approx_top_sumMap"
+"approx_top_sumMerge"
+"approx_top_sumNull"
+"approx_top_sumOrDefault"
+"approx_top_sumOrNull"
+"approx_top_sumResample"
+"approx_top_sumSimpleState"
+"approx_top_sumState"
+"argMax"
+"argMaxArgMax"
+"argMaxArgMin"
+"argMaxArray"
+"argMaxDistinct"
+"argMaxForEach"
+"argMaxIf"
+"argMaxMap"
+"argMaxMerge"
+"argMaxNull"
+"argMaxOrDefault"
+"argMaxOrNull"
+"argMaxResample"
+"argMaxSimpleState"
+"argMaxState"
+"argMin"
+"argMinArgMax"
+"argMinArgMin"
+"argMinArray"
+"argMinDistinct"
+"argMinForEach"
+"argMinIf"
+"argMinMap"
+"argMinMerge"
+"argMinNull"
+"argMinOrDefault"
+"argMinOrNull"
+"argMinResample"
+"argMinSimpleState"
+"argMinState"
+"array"
+"arrayAUC"
+"arrayAll"
+"arrayAvg"
+"arrayCompact"
+"arrayConcat"
+"arrayCount"
+"arrayCumSum"
+"arrayCumSumNonNegative"
+"arrayDifference"
+"arrayDistinct"
+"arrayDotProduct"
+"arrayElement"
+"arrayEnumerate"
+"arrayEnumerateDense"
+"arrayEnumerateDenseRanked"
+"arrayEnumerateUniq"
+"arrayEnumerateUniqRanked"
+"arrayExists"
+"arrayFill"
+"arrayFilter"
+"arrayFirst"
+"arrayFirstIndex"
+"arrayFirstOrNull"
+"arrayFlatten"
+"arrayFold"
+"arrayIntersect"
+"arrayJaccardIndex"
+"arrayJoin"
+"arrayLast"
+"arrayLastIndex"
+"arrayLastOrNull"
+"arrayMap"
+"arrayMax"
+"arrayMin"
+"arrayPartialReverseSort"
+"arrayPartialShuffle"
+"arrayPartialSort"
+"arrayPopBack"
+"arrayPopFront"
+"arrayProduct"
+"arrayPushBack"
+"arrayPushFront"
+"arrayRandomSample"
+"arrayReduce"
+"arrayReduceInRanges"
+"arrayResize"
+"arrayReverse"
+"arrayReverseFill"
+"arrayReverseSort"
+"arrayReverseSplit"
+"arrayRotateLeft"
+"arrayRotateRight"
+"arrayShiftLeft"
+"arrayShiftRight"
+"arrayShingles"
+"arrayShuffle"
+"arraySlice"
+"arraySort"
+"arraySplit"
+"arrayStringConcat"
+"arraySum"
+"arrayUniq"
+"arrayWithConstant"
+"arrayZip"
+"array_agg"
+"array_aggArgMax"
+"array_aggArgMin"
+"array_aggArray"
+"array_aggDistinct"
+"array_aggForEach"
+"array_aggIf"
+"array_aggMap"
+"array_aggMerge"
+"array_aggNull"
+"array_aggOrDefault"
+"array_aggOrNull"
+"array_aggResample"
+"array_aggSimpleState"
+"array_aggState"
+"array_concat_agg"
+"array_concat_aggArgMax"
+"array_concat_aggArgMin"
+"array_concat_aggArray"
+"array_concat_aggDistinct"
+"array_concat_aggForEach"
+"array_concat_aggIf"
+"array_concat_aggMap"
+"array_concat_aggMerge"
+"array_concat_aggNull"
+"array_concat_aggOrDefault"
+"array_concat_aggOrNull"
+"array_concat_aggResample"
+"array_concat_aggSimpleState"
+"array_concat_aggState"
+"ascii"
+"asin"
+"asinh"
+"assumeNotNull"
+"atan"
+"atan2"
+"atanh"
+"avg"
+"avgArgMax"
+"avgArgMin"
+"avgArray"
+"avgDistinct"
+"avgForEach"
+"avgIf"
+"avgMap"
+"avgMerge"
+"avgNull"
+"avgOrDefault"
+"avgOrNull"
+"avgResample"
+"avgSimpleState"
+"avgState"
+"avgWeighted"
+"avgWeightedArgMax"
+"avgWeightedArgMin"
+"avgWeightedArray"
+"avgWeightedDistinct"
+"avgWeightedForEach"
+"avgWeightedIf"
+"avgWeightedMap"
+"avgWeightedMerge"
+"avgWeightedNull"
+"avgWeightedOrDefault"
+"avgWeightedOrNull"
+"avgWeightedResample"
+"avgWeightedSimpleState"
+"avgWeightedState"
+"bar"
+"base58Decode"
+"base58Encode"
+"base64Decode"
+"base64Encode"
+"base64URLDecode"
+"base64URLEncode"
+"basename"
+"bin"
+"bitAnd"
+"bitCount"
+"bitHammingDistance"
+"bitNot"
+"bitOr"
+"bitPositionsToArray"
+"bitRotateLeft"
+"bitRotateRight"
+"bitShiftLeft"
+"bitShiftRight"
+"bitSlice"
+"bitTest"
+"bitTestAll"
+"bitTestAny"
+"bitXor"
+"bitmapAnd"
+"bitmapAndCardinality"
+"bitmapAndnot"
+"bitmapAndnotCardinality"
+"bitmapBuild"
+"bitmapCardinality"
+"bitmapContains"
+"bitmapHasAll"
+"bitmapHasAny"
+"bitmapMax"
+"bitmapMin"
+"bitmapOr"
+"bitmapOrCardinality"
+"bitmapSubsetInRange"
+"bitmapSubsetLimit"
+"bitmapToArray"
+"bitmapTransform"
+"bitmapXor"
+"bitmapXorCardinality"
+"bitmaskToArray"
+"bitmaskToList"
+"blockNumber"
+"blockSerializedSize"
+"blockSize"
+"boundingRatio"
+"boundingRatioArgMax"
+"boundingRatioArgMin"
+"boundingRatioArray"
+"boundingRatioDistinct"
+"boundingRatioForEach"
+"boundingRatioIf"
+"boundingRatioMap"
+"boundingRatioMerge"
+"boundingRatioNull"
+"boundingRatioOrDefault"
+"boundingRatioOrNull"
+"boundingRatioResample"
+"boundingRatioSimpleState"
+"boundingRatioState"
+"buildId"
+"byteHammingDistance"
+"byteSize"
+"byteSlice"
+"byteSwap"
+"caseWithExpr"
+"caseWithExpression"
+"caseWithoutExpr"
+"caseWithoutExpression"
+"catboostEvaluate"
+"categoricalInformationValue"
+"categoricalInformationValueArgMax"
+"categoricalInformationValueArgMin"
+"categoricalInformationValueArray"
+"categoricalInformationValueDistinct"
+"categoricalInformationValueForEach"
+"categoricalInformationValueIf"
+"categoricalInformationValueMap"
+"categoricalInformationValueMerge"
+"categoricalInformationValueNull"
+"categoricalInformationValueOrDefault"
+"categoricalInformationValueOrNull"
+"categoricalInformationValueResample"
+"categoricalInformationValueSimpleState"
+"categoricalInformationValueState"
+"cbrt"
+"ceil"
+"ceiling"
+"changeDay"
+"changeHour"
+"changeMinute"
+"changeMonth"
+"changeSecond"
+"changeYear"
+"char"
+"cityHash64"
+"clamp"
+"coalesce"
+"concat"
+"concatAssumeInjective"
+"concatWithSeparator"
+"concatWithSeparatorAssumeInjective"
+"concat_ws"
+"connectionId"
+"connection_id"
+"contingency"
+"contingencyArgMax"
+"contingencyArgMin"
+"contingencyArray"
+"contingencyDistinct"
+"contingencyForEach"
+"contingencyIf"
+"contingencyMap"
+"contingencyMerge"
+"contingencyNull"
+"contingencyOrDefault"
+"contingencyOrNull"
+"contingencyResample"
+"contingencySimpleState"
+"contingencyState"
+"convertCharset"
+"corr"
+"corrArgMax"
+"corrArgMin"
+"corrArray"
+"corrDistinct"
+"corrForEach"
+"corrIf"
+"corrMap"
+"corrMatrix"
+"corrMatrixArgMax"
+"corrMatrixArgMin"
+"corrMatrixArray"
+"corrMatrixDistinct"
+"corrMatrixForEach"
+"corrMatrixIf"
+"corrMatrixMap"
+"corrMatrixMerge"
+"corrMatrixNull"
+"corrMatrixOrDefault"
+"corrMatrixOrNull"
+"corrMatrixResample"
+"corrMatrixSimpleState"
+"corrMatrixState"
+"corrMerge"
+"corrNull"
+"corrOrDefault"
+"corrOrNull"
+"corrResample"
+"corrSimpleState"
+"corrStable"
+"corrStableArgMax"
+"corrStableArgMin"
+"corrStableArray"
+"corrStableDistinct"
+"corrStableForEach"
+"corrStableIf"
+"corrStableMap"
+"corrStableMerge"
+"corrStableNull"
+"corrStableOrDefault"
+"corrStableOrNull"
+"corrStableResample"
+"corrStableSimpleState"
+"corrStableState"
+"corrState"
+"cos"
+"cosh"
+"cosineDistance"
+"count"
+"countArgMax"
+"countArgMin"
+"countArray"
+"countDigits"
+"countDistinct"
+"countEqual"
+"countForEach"
+"countIf"
+"countMap"
+"countMatches"
+"countMatchesCaseInsensitive"
+"countMerge"
+"countNull"
+"countOrDefault"
+"countOrNull"
+"countResample"
+"countSimpleState"
+"countState"
+"countSubstrings"
+"countSubstringsCaseInsensitive"
+"countSubstringsCaseInsensitiveUTF8"
+"covarPop"
+"covarPopArgMax"
+"covarPopArgMin"
+"covarPopArray"
+"covarPopDistinct"
+"covarPopForEach"
+"covarPopIf"
+"covarPopMap"
+"covarPopMatrix"
+"covarPopMatrixArgMax"
+"covarPopMatrixArgMin"
+"covarPopMatrixArray"
+"covarPopMatrixDistinct"
+"covarPopMatrixForEach"
+"covarPopMatrixIf"
+"covarPopMatrixMap"
+"covarPopMatrixMerge"
+"covarPopMatrixNull"
+"covarPopMatrixOrDefault"
+"covarPopMatrixOrNull"
+"covarPopMatrixResample"
+"covarPopMatrixSimpleState"
+"covarPopMatrixState"
+"covarPopMerge"
+"covarPopNull"
+"covarPopOrDefault"
+"covarPopOrNull"
+"covarPopResample"
+"covarPopSimpleState"
+"covarPopStable"
+"covarPopStableArgMax"
+"covarPopStableArgMin"
+"covarPopStableArray"
+"covarPopStableDistinct"
+"covarPopStableForEach"
+"covarPopStableIf"
+"covarPopStableMap"
+"covarPopStableMerge"
+"covarPopStableNull"
+"covarPopStableOrDefault"
+"covarPopStableOrNull"
+"covarPopStableResample"
+"covarPopStableSimpleState"
+"covarPopStableState"
+"covarPopState"
+"covarSamp"
+"covarSampArgMax"
+"covarSampArgMin"
+"covarSampArray"
+"covarSampDistinct"
+"covarSampForEach"
+"covarSampIf"
+"covarSampMap"
+"covarSampMatrix"
+"covarSampMatrixArgMax"
+"covarSampMatrixArgMin"
+"covarSampMatrixArray"
+"covarSampMatrixDistinct"
+"covarSampMatrixForEach"
+"covarSampMatrixIf"
+"covarSampMatrixMap"
+"covarSampMatrixMerge"
+"covarSampMatrixNull"
+"covarSampMatrixOrDefault"
+"covarSampMatrixOrNull"
+"covarSampMatrixResample"
+"covarSampMatrixSimpleState"
+"covarSampMatrixState"
+"covarSampMerge"
+"covarSampNull"
+"covarSampOrDefault"
+"covarSampOrNull"
+"covarSampResample"
+"covarSampSimpleState"
+"covarSampStable"
+"covarSampStableArgMax"
+"covarSampStableArgMin"
+"covarSampStableArray"
+"covarSampStableDistinct"
+"covarSampStableForEach"
+"covarSampStableIf"
+"covarSampStableMap"
+"covarSampStableMerge"
+"covarSampStableNull"
+"covarSampStableOrDefault"
+"covarSampStableOrNull"
+"covarSampStableResample"
+"covarSampStableSimpleState"
+"covarSampStableState"
+"covarSampState"
+"cramersV"
+"cramersVArgMax"
+"cramersVArgMin"
+"cramersVArray"
+"cramersVBiasCorrected"
+"cramersVBiasCorrectedArgMax"
+"cramersVBiasCorrectedArgMin"
+"cramersVBiasCorrectedArray"
+"cramersVBiasCorrectedDistinct"
+"cramersVBiasCorrectedForEach"
+"cramersVBiasCorrectedIf"
+"cramersVBiasCorrectedMap"
+"cramersVBiasCorrectedMerge"
+"cramersVBiasCorrectedNull"
+"cramersVBiasCorrectedOrDefault"
+"cramersVBiasCorrectedOrNull"
+"cramersVBiasCorrectedResample"
+"cramersVBiasCorrectedSimpleState"
+"cramersVBiasCorrectedState"
+"cramersVDistinct"
+"cramersVForEach"
+"cramersVIf"
+"cramersVMap"
+"cramersVMerge"
+"cramersVNull"
+"cramersVOrDefault"
+"cramersVOrNull"
+"cramersVResample"
+"cramersVSimpleState"
+"cramersVState"
+"curdate"
+"currentDatabase"
+"currentProfiles"
+"currentRoles"
+"currentSchemas"
+"currentUser"
+"current_database"
+"current_date"
+"current_schemas"
+"current_timestamp"
+"current_user"
+"cutFragment"
+"cutIPv6"
+"cutQueryString"
+"cutQueryStringAndFragment"
+"cutToFirstSignificantSubdomain"
+"cutToFirstSignificantSubdomainCustom"
+"cutToFirstSignificantSubdomainCustomRFC"
+"cutToFirstSignificantSubdomainCustomWithWWW"
+"cutToFirstSignificantSubdomainCustomWithWWWRFC"
+"cutToFirstSignificantSubdomainRFC"
+"cutToFirstSignificantSubdomainWithWWW"
+"cutToFirstSignificantSubdomainWithWWWRFC"
+"cutURLParameter"
+"cutWWW"
+"damerauLevenshteinDistance"
+"dateDiff"
+"dateName"
+"dateTime64ToSnowflake"
+"dateTime64ToSnowflakeID"
+"dateTimeToSnowflake"
+"dateTimeToSnowflakeID"
+"dateTrunc"
+"date_diff"
+"decodeHTMLComponent"
+"decodeURLComponent"
+"decodeURLFormComponent"
+"decodeXMLComponent"
+"decrypt"
+"defaultProfiles"
+"defaultRoles"
+"defaultValueOfArgumentType"
+"defaultValueOfTypeName"
+"degrees"
+"deltaSum"
+"deltaSumArgMax"
+"deltaSumArgMin"
+"deltaSumArray"
+"deltaSumDistinct"
+"deltaSumForEach"
+"deltaSumIf"
+"deltaSumMap"
+"deltaSumMerge"
+"deltaSumNull"
+"deltaSumOrDefault"
+"deltaSumOrNull"
+"deltaSumResample"
+"deltaSumSimpleState"
+"deltaSumState"
+"deltaSumTimestamp"
+"deltaSumTimestampArgMax"
+"deltaSumTimestampArgMin"
+"deltaSumTimestampArray"
+"deltaSumTimestampDistinct"
+"deltaSumTimestampForEach"
+"deltaSumTimestampIf"
+"deltaSumTimestampMap"
+"deltaSumTimestampMerge"
+"deltaSumTimestampNull"
+"deltaSumTimestampOrDefault"
+"deltaSumTimestampOrNull"
+"deltaSumTimestampResample"
+"deltaSumTimestampSimpleState"
+"deltaSumTimestampState"
+"demangle"
+"denseRank"
+"denseRankArgMax"
+"denseRankArgMin"
+"denseRankArray"
+"denseRankDistinct"
+"denseRankForEach"
+"denseRankIf"
+"denseRankMap"
+"denseRankMerge"
+"denseRankNull"
+"denseRankOrDefault"
+"denseRankOrNull"
+"denseRankResample"
+"denseRankSimpleState"
+"denseRankState"
+"dense_rank"
+"dense_rankArgMax"
+"dense_rankArgMin"
+"dense_rankArray"
+"dense_rankDistinct"
+"dense_rankForEach"
+"dense_rankIf"
+"dense_rankMap"
+"dense_rankMerge"
+"dense_rankNull"
+"dense_rankOrDefault"
+"dense_rankOrNull"
+"dense_rankResample"
+"dense_rankSimpleState"
+"dense_rankState"
+"detectCharset"
+"detectLanguage"
+"detectLanguageMixed"
+"detectLanguageUnknown"
+"detectProgrammingLanguage"
+"detectTonality"
+"dictGet"
+"dictGetAll"
+"dictGetChildren"
+"dictGetDate"
+"dictGetDateOrDefault"
+"dictGetDateTime"
+"dictGetDateTimeOrDefault"
+"dictGetDescendants"
+"dictGetFloat32"
+"dictGetFloat32OrDefault"
+"dictGetFloat64"
+"dictGetFloat64OrDefault"
+"dictGetHierarchy"
+"dictGetIPv4"
+"dictGetIPv4OrDefault"
+"dictGetIPv6"
+"dictGetIPv6OrDefault"
+"dictGetInt16"
+"dictGetInt16OrDefault"
+"dictGetInt32"
+"dictGetInt32OrDefault"
+"dictGetInt64"
+"dictGetInt64OrDefault"
+"dictGetInt8"
+"dictGetInt8OrDefault"
+"dictGetOrDefault"
+"dictGetOrNull"
+"dictGetString"
+"dictGetStringOrDefault"
+"dictGetUInt16"
+"dictGetUInt16OrDefault"
+"dictGetUInt32"
+"dictGetUInt32OrDefault"
+"dictGetUInt64"
+"dictGetUInt64OrDefault"
+"dictGetUInt8"
+"dictGetUInt8OrDefault"
+"dictGetUUID"
+"dictGetUUIDOrDefault"
+"dictHas"
+"dictIsIn"
+"displayName"
+"distanceL1"
+"distanceL2"
+"distanceL2Squared"
+"distanceLinf"
+"distanceLp"
+"divide"
+"divideDecimal"
+"domain"
+"domainRFC"
+"domainWithoutWWW"
+"domainWithoutWWWRFC"
+"dotProduct"
+"dumpColumnStructure"
+"dynamicElement"
+"dynamicType"
+"e"
+"editDistance"
+"editDistanceUTF8"
+"empty"
+"emptyArrayDate"
+"emptyArrayDateTime"
+"emptyArrayFloat32"
+"emptyArrayFloat64"
+"emptyArrayInt16"
+"emptyArrayInt32"
+"emptyArrayInt64"
+"emptyArrayInt8"
+"emptyArrayString"
+"emptyArrayToSingle"
+"emptyArrayUInt16"
+"emptyArrayUInt32"
+"emptyArrayUInt64"
+"emptyArrayUInt8"
+"enabledProfiles"
+"enabledRoles"
+"encodeURLComponent"
+"encodeURLFormComponent"
+"encodeXMLComponent"
+"encrypt"
+"endsWith"
+"endsWithUTF8"
+"entropy"
+"entropyArgMax"
+"entropyArgMin"
+"entropyArray"
+"entropyDistinct"
+"entropyForEach"
+"entropyIf"
+"entropyMap"
+"entropyMerge"
+"entropyNull"
+"entropyOrDefault"
+"entropyOrNull"
+"entropyResample"
+"entropySimpleState"
+"entropyState"
+"equals"
+"erf"
+"erfc"
+"errorCodeToName"
+"evalMLMethod"
+"exp"
+"exp10"
+"exp2"
+"exponentialMovingAverage"
+"exponentialMovingAverageArgMax"
+"exponentialMovingAverageArgMin"
+"exponentialMovingAverageArray"
+"exponentialMovingAverageDistinct"
+"exponentialMovingAverageForEach"
+"exponentialMovingAverageIf"
+"exponentialMovingAverageMap"
+"exponentialMovingAverageMerge"
+"exponentialMovingAverageNull"
+"exponentialMovingAverageOrDefault"
+"exponentialMovingAverageOrNull"
+"exponentialMovingAverageResample"
+"exponentialMovingAverageSimpleState"
+"exponentialMovingAverageState"
+"exponentialTimeDecayedAvg"
+"exponentialTimeDecayedAvgArgMax"
+"exponentialTimeDecayedAvgArgMin"
+"exponentialTimeDecayedAvgArray"
+"exponentialTimeDecayedAvgDistinct"
+"exponentialTimeDecayedAvgForEach"
+"exponentialTimeDecayedAvgIf"
+"exponentialTimeDecayedAvgMap"
+"exponentialTimeDecayedAvgMerge"
+"exponentialTimeDecayedAvgNull"
+"exponentialTimeDecayedAvgOrDefault"
+"exponentialTimeDecayedAvgOrNull"
+"exponentialTimeDecayedAvgResample"
+"exponentialTimeDecayedAvgSimpleState"
+"exponentialTimeDecayedAvgState"
+"exponentialTimeDecayedCount"
+"exponentialTimeDecayedCountArgMax"
+"exponentialTimeDecayedCountArgMin"
+"exponentialTimeDecayedCountArray"
+"exponentialTimeDecayedCountDistinct"
+"exponentialTimeDecayedCountForEach"
+"exponentialTimeDecayedCountIf"
+"exponentialTimeDecayedCountMap"
+"exponentialTimeDecayedCountMerge"
+"exponentialTimeDecayedCountNull"
+"exponentialTimeDecayedCountOrDefault"
+"exponentialTimeDecayedCountOrNull"
+"exponentialTimeDecayedCountResample"
+"exponentialTimeDecayedCountSimpleState"
+"exponentialTimeDecayedCountState"
+"exponentialTimeDecayedMax"
+"exponentialTimeDecayedMaxArgMax"
+"exponentialTimeDecayedMaxArgMin"
+"exponentialTimeDecayedMaxArray"
+"exponentialTimeDecayedMaxDistinct"
+"exponentialTimeDecayedMaxForEach"
+"exponentialTimeDecayedMaxIf"
+"exponentialTimeDecayedMaxMap"
+"exponentialTimeDecayedMaxMerge"
+"exponentialTimeDecayedMaxNull"
+"exponentialTimeDecayedMaxOrDefault"
+"exponentialTimeDecayedMaxOrNull"
+"exponentialTimeDecayedMaxResample"
+"exponentialTimeDecayedMaxSimpleState"
+"exponentialTimeDecayedMaxState"
+"exponentialTimeDecayedSum"
+"exponentialTimeDecayedSumArgMax"
+"exponentialTimeDecayedSumArgMin"
+"exponentialTimeDecayedSumArray"
+"exponentialTimeDecayedSumDistinct"
+"exponentialTimeDecayedSumForEach"
+"exponentialTimeDecayedSumIf"
+"exponentialTimeDecayedSumMap"
+"exponentialTimeDecayedSumMerge"
+"exponentialTimeDecayedSumNull"
+"exponentialTimeDecayedSumOrDefault"
+"exponentialTimeDecayedSumOrNull"
+"exponentialTimeDecayedSumResample"
+"exponentialTimeDecayedSumSimpleState"
+"exponentialTimeDecayedSumState"
+"extract"
+"extractAll"
+"extractAllGroups"
+"extractAllGroupsHorizontal"
+"extractAllGroupsVertical"
+"extractGroups"
+"extractKeyValuePairs"
+"extractKeyValuePairsWithEscaping"
+"extractTextFromHTML"
+"extractURLParameter"
+"extractURLParameterNames"
+"extractURLParameters"
+"factorial"
+"farmFingerprint64"
+"farmHash64"
+"file"
+"filesystemAvailable"
+"filesystemCapacity"
+"filesystemUnreserved"
+"finalizeAggregation"
+"firstLine"
+"firstSignificantSubdomain"
+"firstSignificantSubdomainCustom"
+"firstSignificantSubdomainCustomRFC"
+"firstSignificantSubdomainRFC"
+"first_value"
+"first_valueArgMax"
+"first_valueArgMin"
+"first_valueArray"
+"first_valueDistinct"
+"first_valueForEach"
+"first_valueIf"
+"first_valueMap"
+"first_valueMerge"
+"first_valueNull"
+"first_valueOrDefault"
+"first_valueOrNull"
+"first_valueResample"
+"first_valueSimpleState"
+"first_valueState"
+"first_value_respect_nulls"
+"first_value_respect_nullsArgMax"
+"first_value_respect_nullsArgMin"
+"first_value_respect_nullsArray"
+"first_value_respect_nullsDistinct"
+"first_value_respect_nullsForEach"
+"first_value_respect_nullsIf"
+"first_value_respect_nullsMap"
+"first_value_respect_nullsMerge"
+"first_value_respect_nullsNull"
+"first_value_respect_nullsOrDefault"
+"first_value_respect_nullsOrNull"
+"first_value_respect_nullsResample"
+"first_value_respect_nullsSimpleState"
+"first_value_respect_nullsState"
+"flameGraph"
+"flameGraphArgMax"
+"flameGraphArgMin"
+"flameGraphArray"
+"flameGraphDistinct"
+"flameGraphForEach"
+"flameGraphIf"
+"flameGraphMap"
+"flameGraphMerge"
+"flameGraphNull"
+"flameGraphOrDefault"
+"flameGraphOrNull"
+"flameGraphResample"
+"flameGraphSimpleState"
+"flameGraphState"
+"flatten"
+"flattenTuple"
+"floor"
+"format"
+"formatDateTime"
+"formatDateTimeInJodaSyntax"
+"formatQuery"
+"formatQueryOrNull"
+"formatQuerySingleLine"
+"formatQuerySingleLineOrNull"
+"formatReadableDecimalSize"
+"formatReadableQuantity"
+"formatReadableSize"
+"formatReadableTimeDelta"
+"formatRow"
+"formatRowNoNewline"
+"fragment"
+"fromDaysSinceYearZero"
+"fromDaysSinceYearZero32"
+"fromModifiedJulianDay"
+"fromModifiedJulianDayOrNull"
+"fromUTCTimestamp"
+"fromUnixTimestamp"
+"fromUnixTimestamp64Micro"
+"fromUnixTimestamp64Milli"
+"fromUnixTimestamp64Nano"
+"fromUnixTimestampInJodaSyntax"
+"from_utc_timestamp"
+"fullHostName"
+"fuzzBits"
+"gccMurmurHash"
+"gcd"
+"generateRandomStructure"
+"generateSnowflakeID"
+"generateULID"
+"generateUUIDv4"
+"generateUUIDv7"
+"geoDistance"
+"geoToH3"
+"geoToS2"
+"geohashDecode"
+"geohashEncode"
+"geohashesInBox"
+"getClientHTTPHeader"
+"getMacro"
+"getOSKernelVersion"
+"getServerPort"
+"getSetting"
+"getSizeOfEnumType"
+"getSubcolumn"
+"getTypeSerializationStreams"
+"globalIn"
+"globalInIgnoreSet"
+"globalNotIn"
+"globalNotInIgnoreSet"
+"globalNotNullIn"
+"globalNotNullInIgnoreSet"
+"globalNullIn"
+"globalNullInIgnoreSet"
+"globalVariable"
+"greatCircleAngle"
+"greatCircleDistance"
+"greater"
+"greaterOrEquals"
+"greatest"
+"groupArray"
+"groupArrayArgMax"
+"groupArrayArgMin"
+"groupArrayArray"
+"groupArrayDistinct"
+"groupArrayForEach"
+"groupArrayIf"
+"groupArrayInsertAt"
+"groupArrayInsertAtArgMax"
+"groupArrayInsertAtArgMin"
+"groupArrayInsertAtArray"
+"groupArrayInsertAtDistinct"
+"groupArrayInsertAtForEach"
+"groupArrayInsertAtIf"
+"groupArrayInsertAtMap"
+"groupArrayInsertAtMerge"
+"groupArrayInsertAtNull"
+"groupArrayInsertAtOrDefault"
+"groupArrayInsertAtOrNull"
+"groupArrayInsertAtResample"
+"groupArrayInsertAtSimpleState"
+"groupArrayInsertAtState"
+"groupArrayIntersect"
+"groupArrayIntersectArgMax"
+"groupArrayIntersectArgMin"
+"groupArrayIntersectArray"
+"groupArrayIntersectDistinct"
+"groupArrayIntersectForEach"
+"groupArrayIntersectIf"
+"groupArrayIntersectMap"
+"groupArrayIntersectMerge"
+"groupArrayIntersectNull"
+"groupArrayIntersectOrDefault"
+"groupArrayIntersectOrNull"
+"groupArrayIntersectResample"
+"groupArrayIntersectSimpleState"
+"groupArrayIntersectState"
+"groupArrayLast"
+"groupArrayLastArgMax"
+"groupArrayLastArgMin"
+"groupArrayLastArray"
+"groupArrayLastDistinct"
+"groupArrayLastForEach"
+"groupArrayLastIf"
+"groupArrayLastMap"
+"groupArrayLastMerge"
+"groupArrayLastNull"
+"groupArrayLastOrDefault"
+"groupArrayLastOrNull"
+"groupArrayLastResample"
+"groupArrayLastSimpleState"
+"groupArrayLastState"
+"groupArrayMap"
+"groupArrayMerge"
+"groupArrayMovingAvg"
+"groupArrayMovingAvgArgMax"
+"groupArrayMovingAvgArgMin"
+"groupArrayMovingAvgArray"
+"groupArrayMovingAvgDistinct"
+"groupArrayMovingAvgForEach"
+"groupArrayMovingAvgIf"
+"groupArrayMovingAvgMap"
+"groupArrayMovingAvgMerge"
+"groupArrayMovingAvgNull"
+"groupArrayMovingAvgOrDefault"
+"groupArrayMovingAvgOrNull"
+"groupArrayMovingAvgResample"
+"groupArrayMovingAvgSimpleState"
+"groupArrayMovingAvgState"
+"groupArrayMovingSum"
+"groupArrayMovingSumArgMax"
+"groupArrayMovingSumArgMin"
+"groupArrayMovingSumArray"
+"groupArrayMovingSumDistinct"
+"groupArrayMovingSumForEach"
+"groupArrayMovingSumIf"
+"groupArrayMovingSumMap"
+"groupArrayMovingSumMerge"
+"groupArrayMovingSumNull"
+"groupArrayMovingSumOrDefault"
+"groupArrayMovingSumOrNull"
+"groupArrayMovingSumResample"
+"groupArrayMovingSumSimpleState"
+"groupArrayMovingSumState"
+"groupArrayNull"
+"groupArrayOrDefault"
+"groupArrayOrNull"
+"groupArrayResample"
+"groupArraySample"
+"groupArraySampleArgMax"
+"groupArraySampleArgMin"
+"groupArraySampleArray"
+"groupArraySampleDistinct"
+"groupArraySampleForEach"
+"groupArraySampleIf"
+"groupArraySampleMap"
+"groupArraySampleMerge"
+"groupArraySampleNull"
+"groupArraySampleOrDefault"
+"groupArraySampleOrNull"
+"groupArraySampleResample"
+"groupArraySampleSimpleState"
+"groupArraySampleState"
+"groupArraySimpleState"
+"groupArraySorted"
+"groupArraySortedArgMax"
+"groupArraySortedArgMin"
+"groupArraySortedArray"
+"groupArraySortedDistinct"
+"groupArraySortedForEach"
+"groupArraySortedIf"
+"groupArraySortedMap"
+"groupArraySortedMerge"
+"groupArraySortedNull"
+"groupArraySortedOrDefault"
+"groupArraySortedOrNull"
+"groupArraySortedResample"
+"groupArraySortedSimpleState"
+"groupArraySortedState"
+"groupArrayState"
+"groupBitAnd"
+"groupBitAndArgMax"
+"groupBitAndArgMin"
+"groupBitAndArray"
+"groupBitAndDistinct"
+"groupBitAndForEach"
+"groupBitAndIf"
+"groupBitAndMap"
+"groupBitAndMerge"
+"groupBitAndNull"
+"groupBitAndOrDefault"
+"groupBitAndOrNull"
+"groupBitAndResample"
+"groupBitAndSimpleState"
+"groupBitAndState"
+"groupBitOr"
+"groupBitOrArgMax"
+"groupBitOrArgMin"
+"groupBitOrArray"
+"groupBitOrDistinct"
+"groupBitOrForEach"
+"groupBitOrIf"
+"groupBitOrMap"
+"groupBitOrMerge"
+"groupBitOrNull"
+"groupBitOrOrDefault"
+"groupBitOrOrNull"
+"groupBitOrResample"
+"groupBitOrSimpleState"
+"groupBitOrState"
+"groupBitXor"
+"groupBitXorArgMax"
+"groupBitXorArgMin"
+"groupBitXorArray"
+"groupBitXorDistinct"
+"groupBitXorForEach"
+"groupBitXorIf"
+"groupBitXorMap"
+"groupBitXorMerge"
+"groupBitXorNull"
+"groupBitXorOrDefault"
+"groupBitXorOrNull"
+"groupBitXorResample"
+"groupBitXorSimpleState"
+"groupBitXorState"
+"groupBitmap"
+"groupBitmapAnd"
+"groupBitmapAndArgMax"
+"groupBitmapAndArgMin"
+"groupBitmapAndArray"
+"groupBitmapAndDistinct"
+"groupBitmapAndForEach"
+"groupBitmapAndIf"
+"groupBitmapAndMap"
+"groupBitmapAndMerge"
+"groupBitmapAndNull"
+"groupBitmapAndOrDefault"
+"groupBitmapAndOrNull"
+"groupBitmapAndResample"
+"groupBitmapAndSimpleState"
+"groupBitmapAndState"
+"groupBitmapArgMax"
+"groupBitmapArgMin"
+"groupBitmapArray"
+"groupBitmapDistinct"
+"groupBitmapForEach"
+"groupBitmapIf"
+"groupBitmapMap"
+"groupBitmapMerge"
+"groupBitmapNull"
+"groupBitmapOr"
+"groupBitmapOrArgMax"
+"groupBitmapOrArgMin"
+"groupBitmapOrArray"
+"groupBitmapOrDefault"
+"groupBitmapOrDistinct"
+"groupBitmapOrForEach"
+"groupBitmapOrIf"
+"groupBitmapOrMap"
+"groupBitmapOrMerge"
+"groupBitmapOrNull"
+"groupBitmapOrNull"
+"groupBitmapOrOrDefault"
+"groupBitmapOrOrNull"
+"groupBitmapOrResample"
+"groupBitmapOrSimpleState"
+"groupBitmapOrState"
+"groupBitmapResample"
+"groupBitmapSimpleState"
+"groupBitmapState"
+"groupBitmapXor"
+"groupBitmapXorArgMax"
+"groupBitmapXorArgMin"
+"groupBitmapXorArray"
+"groupBitmapXorDistinct"
+"groupBitmapXorForEach"
+"groupBitmapXorIf"
+"groupBitmapXorMap"
+"groupBitmapXorMerge"
+"groupBitmapXorNull"
+"groupBitmapXorOrDefault"
+"groupBitmapXorOrNull"
+"groupBitmapXorResample"
+"groupBitmapXorSimpleState"
+"groupBitmapXorState"
+"groupConcat"
+"groupConcatArgMax"
+"groupConcatArgMin"
+"groupConcatArray"
+"groupConcatDistinct"
+"groupConcatForEach"
+"groupConcatIf"
+"groupConcatMap"
+"groupConcatMerge"
+"groupConcatNull"
+"groupConcatOrDefault"
+"groupConcatOrNull"
+"groupConcatResample"
+"groupConcatSimpleState"
+"groupConcatState"
+"groupUniqArray"
+"groupUniqArrayArgMax"
+"groupUniqArrayArgMin"
+"groupUniqArrayArray"
+"groupUniqArrayDistinct"
+"groupUniqArrayForEach"
+"groupUniqArrayIf"
+"groupUniqArrayMap"
+"groupUniqArrayMerge"
+"groupUniqArrayNull"
+"groupUniqArrayOrDefault"
+"groupUniqArrayOrNull"
+"groupUniqArrayResample"
+"groupUniqArraySimpleState"
+"groupUniqArrayState"
+"group_concat"
+"group_concatArgMax"
+"group_concatArgMin"
+"group_concatArray"
+"group_concatDistinct"
+"group_concatForEach"
+"group_concatIf"
+"group_concatMap"
+"group_concatMerge"
+"group_concatNull"
+"group_concatOrDefault"
+"group_concatOrNull"
+"group_concatResample"
+"group_concatSimpleState"
+"group_concatState"
+"h3CellAreaM2"
+"h3CellAreaRads2"
+"h3Distance"
+"h3EdgeAngle"
+"h3EdgeLengthKm"
+"h3EdgeLengthM"
+"h3ExactEdgeLengthKm"
+"h3ExactEdgeLengthM"
+"h3ExactEdgeLengthRads"
+"h3GetBaseCell"
+"h3GetDestinationIndexFromUnidirectionalEdge"
+"h3GetFaces"
+"h3GetIndexesFromUnidirectionalEdge"
+"h3GetOriginIndexFromUnidirectionalEdge"
+"h3GetPentagonIndexes"
+"h3GetRes0Indexes"
+"h3GetResolution"
+"h3GetUnidirectionalEdge"
+"h3GetUnidirectionalEdgeBoundary"
+"h3GetUnidirectionalEdgesFromHexagon"
+"h3HexAreaKm2"
+"h3HexAreaM2"
+"h3HexRing"
+"h3IndexesAreNeighbors"
+"h3IsPentagon"
+"h3IsResClassIII"
+"h3IsValid"
+"h3Line"
+"h3NumHexagons"
+"h3PointDistKm"
+"h3PointDistM"
+"h3PointDistRads"
+"h3ToCenterChild"
+"h3ToChildren"
+"h3ToGeo"
+"h3ToGeoBoundary"
+"h3ToParent"
+"h3ToString"
+"h3UnidirectionalEdgeIsValid"
+"h3kRing"
+"halfMD5"
+"has"
+"hasAll"
+"hasAny"
+"hasColumnInTable"
+"hasSubsequence"
+"hasSubsequenceCaseInsensitive"
+"hasSubsequenceCaseInsensitiveUTF8"
+"hasSubsequenceUTF8"
+"hasSubstr"
+"hasThreadFuzzer"
+"hasToken"
+"hasTokenCaseInsensitive"
+"hasTokenCaseInsensitiveOrNull"
+"hasTokenOrNull"
+"hex"
+"hilbertDecode"
+"hilbertEncode"
+"histogram"
+"histogramArgMax"
+"histogramArgMin"
+"histogramArray"
+"histogramDistinct"
+"histogramForEach"
+"histogramIf"
+"histogramMap"
+"histogramMerge"
+"histogramNull"
+"histogramOrDefault"
+"histogramOrNull"
+"histogramResample"
+"histogramSimpleState"
+"histogramState"
+"hiveHash"
+"hop"
+"hopEnd"
+"hopStart"
+"hostName"
+"hostname"
+"hypot"
+"identity"
+"idnaDecode"
+"idnaEncode"
+"if"
+"ifNotFinite"
+"ifNull"
+"ignore"
+"ilike"
+"in"
+"inIgnoreSet"
+"indexHint"
+"indexOf"
+"initcap"
+"initcapUTF8"
+"initialQueryID"
+"initial_query_id"
+"initializeAggregation"
+"instr"
+"intDiv"
+"intDivOrZero"
+"intExp10"
+"intExp2"
+"intHash32"
+"intHash64"
+"intervalLengthSum"
+"intervalLengthSumArgMax"
+"intervalLengthSumArgMin"
+"intervalLengthSumArray"
+"intervalLengthSumDistinct"
+"intervalLengthSumForEach"
+"intervalLengthSumIf"
+"intervalLengthSumMap"
+"intervalLengthSumMerge"
+"intervalLengthSumNull"
+"intervalLengthSumOrDefault"
+"intervalLengthSumOrNull"
+"intervalLengthSumResample"
+"intervalLengthSumSimpleState"
+"intervalLengthSumState"
+"isConstant"
+"isDecimalOverflow"
+"isFinite"
+"isIPAddressInRange"
+"isIPv4String"
+"isIPv6String"
+"isInfinite"
+"isNaN"
+"isNotDistinctFrom"
+"isNotNull"
+"isNull"
+"isNullable"
+"isValidJSON"
+"isValidUTF8"
+"isZeroOrNull"
+"jaroSimilarity"
+"jaroWinklerSimilarity"
+"javaHash"
+"javaHashUTF16LE"
+"joinGet"
+"joinGetOrNull"
+"jsonMergePatch"
+"jumpConsistentHash"
+"kafkaMurmurHash"
+"kolmogorovSmirnovTest"
+"kolmogorovSmirnovTestArgMax"
+"kolmogorovSmirnovTestArgMin"
+"kolmogorovSmirnovTestArray"
+"kolmogorovSmirnovTestDistinct"
+"kolmogorovSmirnovTestForEach"
+"kolmogorovSmirnovTestIf"
+"kolmogorovSmirnovTestMap"
+"kolmogorovSmirnovTestMerge"
+"kolmogorovSmirnovTestNull"
+"kolmogorovSmirnovTestOrDefault"
+"kolmogorovSmirnovTestOrNull"
+"kolmogorovSmirnovTestResample"
+"kolmogorovSmirnovTestSimpleState"
+"kolmogorovSmirnovTestState"
+"kostikConsistentHash"
+"kql_array_sort_asc"
+"kql_array_sort_desc"
+"kurtPop"
+"kurtPopArgMax"
+"kurtPopArgMin"
+"kurtPopArray"
+"kurtPopDistinct"
+"kurtPopForEach"
+"kurtPopIf"
+"kurtPopMap"
+"kurtPopMerge"
+"kurtPopNull"
+"kurtPopOrDefault"
+"kurtPopOrNull"
+"kurtPopResample"
+"kurtPopSimpleState"
+"kurtPopState"
+"kurtSamp"
+"kurtSampArgMax"
+"kurtSampArgMin"
+"kurtSampArray"
+"kurtSampDistinct"
+"kurtSampForEach"
+"kurtSampIf"
+"kurtSampMap"
+"kurtSampMerge"
+"kurtSampNull"
+"kurtSampOrDefault"
+"kurtSampOrNull"
+"kurtSampResample"
+"kurtSampSimpleState"
+"kurtSampState"
+"lagInFrame"
+"lagInFrameArgMax"
+"lagInFrameArgMin"
+"lagInFrameArray"
+"lagInFrameDistinct"
+"lagInFrameForEach"
+"lagInFrameIf"
+"lagInFrameMap"
+"lagInFrameMerge"
+"lagInFrameNull"
+"lagInFrameOrDefault"
+"lagInFrameOrNull"
+"lagInFrameResample"
+"lagInFrameSimpleState"
+"lagInFrameState"
+"largestTriangleThreeBuckets"
+"largestTriangleThreeBucketsArgMax"
+"largestTriangleThreeBucketsArgMin"
+"largestTriangleThreeBucketsArray"
+"largestTriangleThreeBucketsDistinct"
+"largestTriangleThreeBucketsForEach"
+"largestTriangleThreeBucketsIf"
+"largestTriangleThreeBucketsMap"
+"largestTriangleThreeBucketsMerge"
+"largestTriangleThreeBucketsNull"
+"largestTriangleThreeBucketsOrDefault"
+"largestTriangleThreeBucketsOrNull"
+"largestTriangleThreeBucketsResample"
+"largestTriangleThreeBucketsSimpleState"
+"largestTriangleThreeBucketsState"
+"last_value"
+"last_valueArgMax"
+"last_valueArgMin"
+"last_valueArray"
+"last_valueDistinct"
+"last_valueForEach"
+"last_valueIf"
+"last_valueMap"
+"last_valueMerge"
+"last_valueNull"
+"last_valueOrDefault"
+"last_valueOrNull"
+"last_valueResample"
+"last_valueSimpleState"
+"last_valueState"
+"last_value_respect_nulls"
+"last_value_respect_nullsArgMax"
+"last_value_respect_nullsArgMin"
+"last_value_respect_nullsArray"
+"last_value_respect_nullsDistinct"
+"last_value_respect_nullsForEach"
+"last_value_respect_nullsIf"
+"last_value_respect_nullsMap"
+"last_value_respect_nullsMerge"
+"last_value_respect_nullsNull"
+"last_value_respect_nullsOrDefault"
+"last_value_respect_nullsOrNull"
+"last_value_respect_nullsResample"
+"last_value_respect_nullsSimpleState"
+"last_value_respect_nullsState"
+"lcase"
+"lcm"
+"leadInFrame"
+"leadInFrameArgMax"
+"leadInFrameArgMin"
+"leadInFrameArray"
+"leadInFrameDistinct"
+"leadInFrameForEach"
+"leadInFrameIf"
+"leadInFrameMap"
+"leadInFrameMerge"
+"leadInFrameNull"
+"leadInFrameOrDefault"
+"leadInFrameOrNull"
+"leadInFrameResample"
+"leadInFrameSimpleState"
+"leadInFrameState"
+"least"
+"left"
+"leftPad"
+"leftPadUTF8"
+"leftUTF8"
+"lemmatize"
+"length"
+"lengthUTF8"
+"less"
+"lessOrEquals"
+"levenshteinDistance"
+"levenshteinDistanceUTF8"
+"lgamma"
+"like"
+"ln"
+"locate"
+"log"
+"log10"
+"log1p"
+"log2"
+"logTrace"
+"lowCardinalityIndices"
+"lowCardinalityKeys"
+"lower"
+"lowerUTF8"
+"lpad"
+"ltrim"
+"lttb"
+"lttbArgMax"
+"lttbArgMin"
+"lttbArray"
+"lttbDistinct"
+"lttbForEach"
+"lttbIf"
+"lttbMap"
+"lttbMerge"
+"lttbNull"
+"lttbOrDefault"
+"lttbOrNull"
+"lttbResample"
+"lttbSimpleState"
+"lttbState"
+"makeDate"
+"makeDate32"
+"makeDateTime"
+"makeDateTime64"
+"mannWhitneyUTest"
+"mannWhitneyUTestArgMax"
+"mannWhitneyUTestArgMin"
+"mannWhitneyUTestArray"
+"mannWhitneyUTestDistinct"
+"mannWhitneyUTestForEach"
+"mannWhitneyUTestIf"
+"mannWhitneyUTestMap"
+"mannWhitneyUTestMerge"
+"mannWhitneyUTestNull"
+"mannWhitneyUTestOrDefault"
+"mannWhitneyUTestOrNull"
+"mannWhitneyUTestResample"
+"mannWhitneyUTestSimpleState"
+"mannWhitneyUTestState"
+"map"
+"mapAdd"
+"mapAll"
+"mapApply"
+"mapConcat"
+"mapContains"
+"mapContainsKeyLike"
+"mapExists"
+"mapExtractKeyLike"
+"mapFilter"
+"mapFromArrays"
+"mapFromString"
+"mapKeys"
+"mapPartialReverseSort"
+"mapPartialSort"
+"mapPopulateSeries"
+"mapReverseSort"
+"mapSort"
+"mapSubtract"
+"mapUpdate"
+"mapValues"
+"match"
+"materialize"
+"max"
+"max2"
+"maxArgMax"
+"maxArgMin"
+"maxArray"
+"maxDistinct"
+"maxForEach"
+"maxIf"
+"maxIntersections"
+"maxIntersectionsArgMax"
+"maxIntersectionsArgMin"
+"maxIntersectionsArray"
+"maxIntersectionsDistinct"
+"maxIntersectionsForEach"
+"maxIntersectionsIf"
+"maxIntersectionsMap"
+"maxIntersectionsMerge"
+"maxIntersectionsNull"
+"maxIntersectionsOrDefault"
+"maxIntersectionsOrNull"
+"maxIntersectionsPosition"
+"maxIntersectionsPositionArgMax"
+"maxIntersectionsPositionArgMin"
+"maxIntersectionsPositionArray"
+"maxIntersectionsPositionDistinct"
+"maxIntersectionsPositionForEach"
+"maxIntersectionsPositionIf"
+"maxIntersectionsPositionMap"
+"maxIntersectionsPositionMerge"
+"maxIntersectionsPositionNull"
+"maxIntersectionsPositionOrDefault"
+"maxIntersectionsPositionOrNull"
+"maxIntersectionsPositionResample"
+"maxIntersectionsPositionSimpleState"
+"maxIntersectionsPositionState"
+"maxIntersectionsResample"
+"maxIntersectionsSimpleState"
+"maxIntersectionsState"
+"maxMap"
+"maxMappedArrays"
+"maxMappedArraysArgMax"
+"maxMappedArraysArgMin"
+"maxMappedArraysArray"
+"maxMappedArraysDistinct"
+"maxMappedArraysForEach"
+"maxMappedArraysIf"
+"maxMappedArraysMap"
+"maxMappedArraysMerge"
+"maxMappedArraysNull"
+"maxMappedArraysOrDefault"
+"maxMappedArraysOrNull"
+"maxMappedArraysResample"
+"maxMappedArraysSimpleState"
+"maxMappedArraysState"
+"maxMerge"
+"maxNull"
+"maxOrDefault"
+"maxOrNull"
+"maxResample"
+"maxSimpleState"
+"maxState"
+"meanZTest"
+"meanZTestArgMax"
+"meanZTestArgMin"
+"meanZTestArray"
+"meanZTestDistinct"
+"meanZTestForEach"
+"meanZTestIf"
+"meanZTestMap"
+"meanZTestMerge"
+"meanZTestNull"
+"meanZTestOrDefault"
+"meanZTestOrNull"
+"meanZTestResample"
+"meanZTestSimpleState"
+"meanZTestState"
+"median"
+"medianArgMax"
+"medianArgMin"
+"medianArray"
+"medianBFloat16"
+"medianBFloat16ArgMax"
+"medianBFloat16ArgMin"
+"medianBFloat16Array"
+"medianBFloat16Distinct"
+"medianBFloat16ForEach"
+"medianBFloat16If"
+"medianBFloat16Map"
+"medianBFloat16Merge"
+"medianBFloat16Null"
+"medianBFloat16OrDefault"
+"medianBFloat16OrNull"
+"medianBFloat16Resample"
+"medianBFloat16SimpleState"
+"medianBFloat16State"
+"medianBFloat16Weighted"
+"medianBFloat16WeightedArgMax"
+"medianBFloat16WeightedArgMin"
+"medianBFloat16WeightedArray"
+"medianBFloat16WeightedDistinct"
+"medianBFloat16WeightedForEach"
+"medianBFloat16WeightedIf"
+"medianBFloat16WeightedMap"
+"medianBFloat16WeightedMerge"
+"medianBFloat16WeightedNull"
+"medianBFloat16WeightedOrDefault"
+"medianBFloat16WeightedOrNull"
+"medianBFloat16WeightedResample"
+"medianBFloat16WeightedSimpleState"
+"medianBFloat16WeightedState"
+"medianDD"
+"medianDDArgMax"
+"medianDDArgMin"
+"medianDDArray"
+"medianDDDistinct"
+"medianDDForEach"
+"medianDDIf"
+"medianDDMap"
+"medianDDMerge"
+"medianDDNull"
+"medianDDOrDefault"
+"medianDDOrNull"
+"medianDDResample"
+"medianDDSimpleState"
+"medianDDState"
+"medianDeterministic"
+"medianDeterministicArgMax"
+"medianDeterministicArgMin"
+"medianDeterministicArray"
+"medianDeterministicDistinct"
+"medianDeterministicForEach"
+"medianDeterministicIf"
+"medianDeterministicMap"
+"medianDeterministicMerge"
+"medianDeterministicNull"
+"medianDeterministicOrDefault"
+"medianDeterministicOrNull"
+"medianDeterministicResample"
+"medianDeterministicSimpleState"
+"medianDeterministicState"
+"medianDistinct"
+"medianExact"
+"medianExactArgMax"
+"medianExactArgMin"
+"medianExactArray"
+"medianExactDistinct"
+"medianExactForEach"
+"medianExactHigh"
+"medianExactHighArgMax"
+"medianExactHighArgMin"
+"medianExactHighArray"
+"medianExactHighDistinct"
+"medianExactHighForEach"
+"medianExactHighIf"
+"medianExactHighMap"
+"medianExactHighMerge"
+"medianExactHighNull"
+"medianExactHighOrDefault"
+"medianExactHighOrNull"
+"medianExactHighResample"
+"medianExactHighSimpleState"
+"medianExactHighState"
+"medianExactIf"
+"medianExactLow"
+"medianExactLowArgMax"
+"medianExactLowArgMin"
+"medianExactLowArray"
+"medianExactLowDistinct"
+"medianExactLowForEach"
+"medianExactLowIf"
+"medianExactLowMap"
+"medianExactLowMerge"
+"medianExactLowNull"
+"medianExactLowOrDefault"
+"medianExactLowOrNull"
+"medianExactLowResample"
+"medianExactLowSimpleState"
+"medianExactLowState"
+"medianExactMap"
+"medianExactMerge"
+"medianExactNull"
+"medianExactOrDefault"
+"medianExactOrNull"
+"medianExactResample"
+"medianExactSimpleState"
+"medianExactState"
+"medianExactWeighted"
+"medianExactWeightedArgMax"
+"medianExactWeightedArgMin"
+"medianExactWeightedArray"
+"medianExactWeightedDistinct"
+"medianExactWeightedForEach"
+"medianExactWeightedIf"
+"medianExactWeightedMap"
+"medianExactWeightedMerge"
+"medianExactWeightedNull"
+"medianExactWeightedOrDefault"
+"medianExactWeightedOrNull"
+"medianExactWeightedResample"
+"medianExactWeightedSimpleState"
+"medianExactWeightedState"
+"medianForEach"
+"medianGK"
+"medianGKArgMax"
+"medianGKArgMin"
+"medianGKArray"
+"medianGKDistinct"
+"medianGKForEach"
+"medianGKIf"
+"medianGKMap"
+"medianGKMerge"
+"medianGKNull"
+"medianGKOrDefault"
+"medianGKOrNull"
+"medianGKResample"
+"medianGKSimpleState"
+"medianGKState"
+"medianIf"
+"medianInterpolatedWeighted"
+"medianInterpolatedWeightedArgMax"
+"medianInterpolatedWeightedArgMin"
+"medianInterpolatedWeightedArray"
+"medianInterpolatedWeightedDistinct"
+"medianInterpolatedWeightedForEach"
+"medianInterpolatedWeightedIf"
+"medianInterpolatedWeightedMap"
+"medianInterpolatedWeightedMerge"
+"medianInterpolatedWeightedNull"
+"medianInterpolatedWeightedOrDefault"
+"medianInterpolatedWeightedOrNull"
+"medianInterpolatedWeightedResample"
+"medianInterpolatedWeightedSimpleState"
+"medianInterpolatedWeightedState"
+"medianMap"
+"medianMerge"
+"medianNull"
+"medianOrDefault"
+"medianOrNull"
+"medianResample"
+"medianSimpleState"
+"medianState"
+"medianTDigest"
+"medianTDigestArgMax"
+"medianTDigestArgMin"
+"medianTDigestArray"
+"medianTDigestDistinct"
+"medianTDigestForEach"
+"medianTDigestIf"
+"medianTDigestMap"
+"medianTDigestMerge"
+"medianTDigestNull"
+"medianTDigestOrDefault"
+"medianTDigestOrNull"
+"medianTDigestResample"
+"medianTDigestSimpleState"
+"medianTDigestState"
+"medianTDigestWeighted"
+"medianTDigestWeightedArgMax"
+"medianTDigestWeightedArgMin"
+"medianTDigestWeightedArray"
+"medianTDigestWeightedDistinct"
+"medianTDigestWeightedForEach"
+"medianTDigestWeightedIf"
+"medianTDigestWeightedMap"
+"medianTDigestWeightedMerge"
+"medianTDigestWeightedNull"
+"medianTDigestWeightedOrDefault"
+"medianTDigestWeightedOrNull"
+"medianTDigestWeightedResample"
+"medianTDigestWeightedSimpleState"
+"medianTDigestWeightedState"
+"medianTiming"
+"medianTimingArgMax"
+"medianTimingArgMin"
+"medianTimingArray"
+"medianTimingDistinct"
+"medianTimingForEach"
+"medianTimingIf"
+"medianTimingMap"
+"medianTimingMerge"
+"medianTimingNull"
+"medianTimingOrDefault"
+"medianTimingOrNull"
+"medianTimingResample"
+"medianTimingSimpleState"
+"medianTimingState"
+"medianTimingWeighted"
+"medianTimingWeightedArgMax"
+"medianTimingWeightedArgMin"
+"medianTimingWeightedArray"
+"medianTimingWeightedDistinct"
+"medianTimingWeightedForEach"
+"medianTimingWeightedIf"
+"medianTimingWeightedMap"
+"medianTimingWeightedMerge"
+"medianTimingWeightedNull"
+"medianTimingWeightedOrDefault"
+"medianTimingWeightedOrNull"
+"medianTimingWeightedResample"
+"medianTimingWeightedSimpleState"
+"medianTimingWeightedState"
+"metroHash64"
+"mid"
+"min"
+"min2"
+"minArgMax"
+"minArgMin"
+"minArray"
+"minDistinct"
+"minForEach"
+"minIf"
+"minMap"
+"minMappedArrays"
+"minMappedArraysArgMax"
+"minMappedArraysArgMin"
+"minMappedArraysArray"
+"minMappedArraysDistinct"
+"minMappedArraysForEach"
+"minMappedArraysIf"
+"minMappedArraysMap"
+"minMappedArraysMerge"
+"minMappedArraysNull"
+"minMappedArraysOrDefault"
+"minMappedArraysOrNull"
+"minMappedArraysResample"
+"minMappedArraysSimpleState"
+"minMappedArraysState"
+"minMerge"
+"minNull"
+"minOrDefault"
+"minOrNull"
+"minResample"
+"minSampleSizeContinous"
+"minSampleSizeContinuous"
+"minSampleSizeConversion"
+"minSimpleState"
+"minState"
+"minus"
+"mismatches"
+"mod"
+"modulo"
+"moduloLegacy"
+"moduloOrZero"
+"monthName"
+"mortonDecode"
+"mortonEncode"
+"multiFuzzyMatchAllIndices"
+"multiFuzzyMatchAny"
+"multiFuzzyMatchAnyIndex"
+"multiIf"
+"multiMatchAllIndices"
+"multiMatchAny"
+"multiMatchAnyIndex"
+"multiSearchAllPositions"
+"multiSearchAllPositionsCaseInsensitive"
+"multiSearchAllPositionsCaseInsensitiveUTF8"
+"multiSearchAllPositionsUTF8"
+"multiSearchAny"
+"multiSearchAnyCaseInsensitive"
+"multiSearchAnyCaseInsensitiveUTF8"
+"multiSearchAnyUTF8"
+"multiSearchFirstIndex"
+"multiSearchFirstIndexCaseInsensitive"
+"multiSearchFirstIndexCaseInsensitiveUTF8"
+"multiSearchFirstIndexUTF8"
+"multiSearchFirstPosition"
+"multiSearchFirstPositionCaseInsensitive"
+"multiSearchFirstPositionCaseInsensitiveUTF8"
+"multiSearchFirstPositionUTF8"
+"multiply"
+"multiplyDecimal"
+"murmurHash2_32"
+"murmurHash2_64"
+"murmurHash3_128"
+"murmurHash3_32"
+"murmurHash3_64"
+"negate"
+"neighbor"
+"nested"
+"netloc"
+"ngramDistance"
+"ngramDistanceCaseInsensitive"
+"ngramDistanceCaseInsensitiveUTF8"
+"ngramDistanceUTF8"
+"ngramMinHash"
+"ngramMinHashArg"
+"ngramMinHashArgCaseInsensitive"
+"ngramMinHashArgCaseInsensitiveUTF8"
+"ngramMinHashArgUTF8"
+"ngramMinHashCaseInsensitive"
+"ngramMinHashCaseInsensitiveUTF8"
+"ngramMinHashUTF8"
+"ngramSearch"
+"ngramSearchCaseInsensitive"
+"ngramSearchCaseInsensitiveUTF8"
+"ngramSearchUTF8"
+"ngramSimHash"
+"ngramSimHashCaseInsensitive"
+"ngramSimHashCaseInsensitiveUTF8"
+"ngramSimHashUTF8"
+"ngrams"
+"nonNegativeDerivative"
+"nonNegativeDerivativeArgMax"
+"nonNegativeDerivativeArgMin"
+"nonNegativeDerivativeArray"
+"nonNegativeDerivativeDistinct"
+"nonNegativeDerivativeForEach"
+"nonNegativeDerivativeIf"
+"nonNegativeDerivativeMap"
+"nonNegativeDerivativeMerge"
+"nonNegativeDerivativeNull"
+"nonNegativeDerivativeOrDefault"
+"nonNegativeDerivativeOrNull"
+"nonNegativeDerivativeResample"
+"nonNegativeDerivativeSimpleState"
+"nonNegativeDerivativeState"
+"normL1"
+"normL2"
+"normL2Squared"
+"normLinf"
+"normLp"
+"normalizeL1"
+"normalizeL2"
+"normalizeLinf"
+"normalizeLp"
+"normalizeQuery"
+"normalizeQueryKeepNames"
+"normalizeUTF8NFC"
+"normalizeUTF8NFD"
+"normalizeUTF8NFKC"
+"normalizeUTF8NFKD"
+"normalizedQueryHash"
+"normalizedQueryHashKeepNames"
+"not"
+"notEmpty"
+"notEquals"
+"notILike"
+"notIn"
+"notInIgnoreSet"
+"notLike"
+"notNullIn"
+"notNullInIgnoreSet"
+"nothing"
+"nothingArgMax"
+"nothingArgMin"
+"nothingArray"
+"nothingDistinct"
+"nothingForEach"
+"nothingIf"
+"nothingMap"
+"nothingMerge"
+"nothingNull"
+"nothingNull"
+"nothingNullArgMax"
+"nothingNullArgMin"
+"nothingNullArray"
+"nothingNullDistinct"
+"nothingNullForEach"
+"nothingNullIf"
+"nothingNullMap"
+"nothingNullMerge"
+"nothingNullNull"
+"nothingNullOrDefault"
+"nothingNullOrNull"
+"nothingNullResample"
+"nothingNullSimpleState"
+"nothingNullState"
+"nothingOrDefault"
+"nothingOrNull"
+"nothingResample"
+"nothingSimpleState"
+"nothingState"
+"nothingUInt64"
+"nothingUInt64ArgMax"
+"nothingUInt64ArgMin"
+"nothingUInt64Array"
+"nothingUInt64Distinct"
+"nothingUInt64ForEach"
+"nothingUInt64If"
+"nothingUInt64Map"
+"nothingUInt64Merge"
+"nothingUInt64Null"
+"nothingUInt64OrDefault"
+"nothingUInt64OrNull"
+"nothingUInt64Resample"
+"nothingUInt64SimpleState"
+"nothingUInt64State"
+"now"
+"now64"
+"nowInBlock"
+"nth_value"
+"nth_valueArgMax"
+"nth_valueArgMin"
+"nth_valueArray"
+"nth_valueDistinct"
+"nth_valueForEach"
+"nth_valueIf"
+"nth_valueMap"
+"nth_valueMerge"
+"nth_valueNull"
+"nth_valueOrDefault"
+"nth_valueOrNull"
+"nth_valueResample"
+"nth_valueSimpleState"
+"nth_valueState"
+"ntile"
+"ntileArgMax"
+"ntileArgMin"
+"ntileArray"
+"ntileDistinct"
+"ntileForEach"
+"ntileIf"
+"ntileMap"
+"ntileMerge"
+"ntileNull"
+"ntileOrDefault"
+"ntileOrNull"
+"ntileResample"
+"ntileSimpleState"
+"ntileState"
+"nullIf"
+"nullIn"
+"nullInIgnoreSet"
+"or"
+"parseDateTime"
+"parseDateTime32BestEffort"
+"parseDateTime32BestEffortOrNull"
+"parseDateTime32BestEffortOrZero"
+"parseDateTime64BestEffort"
+"parseDateTime64BestEffortOrNull"
+"parseDateTime64BestEffortOrZero"
+"parseDateTime64BestEffortUS"
+"parseDateTime64BestEffortUSOrNull"
+"parseDateTime64BestEffortUSOrZero"
+"parseDateTimeBestEffort"
+"parseDateTimeBestEffortOrNull"
+"parseDateTimeBestEffortOrZero"
+"parseDateTimeBestEffortUS"
+"parseDateTimeBestEffortUSOrNull"
+"parseDateTimeBestEffortUSOrZero"
+"parseDateTimeInJodaSyntax"
+"parseDateTimeInJodaSyntaxOrNull"
+"parseDateTimeInJodaSyntaxOrZero"
+"parseDateTimeOrNull"
+"parseDateTimeOrZero"
+"parseReadableSize"
+"parseReadableSizeOrNull"
+"parseReadableSizeOrZero"
+"parseTimeDelta"
+"partitionID"
+"partitionId"
+"path"
+"pathFull"
+"percentRank"
+"percentRankArgMax"
+"percentRankArgMin"
+"percentRankArray"
+"percentRankDistinct"
+"percentRankForEach"
+"percentRankIf"
+"percentRankMap"
+"percentRankMerge"
+"percentRankNull"
+"percentRankOrDefault"
+"percentRankOrNull"
+"percentRankResample"
+"percentRankSimpleState"
+"percentRankState"
+"percent_rank"
+"percent_rankArgMax"
+"percent_rankArgMin"
+"percent_rankArray"
+"percent_rankDistinct"
+"percent_rankForEach"
+"percent_rankIf"
+"percent_rankMap"
+"percent_rankMerge"
+"percent_rankNull"
+"percent_rankOrDefault"
+"percent_rankOrNull"
+"percent_rankResample"
+"percent_rankSimpleState"
+"percent_rankState"
+"pi"
+"plus"
+"pmod"
+"pointInEllipses"
+"pointInPolygon"
+"polygonAreaCartesian"
+"polygonAreaSpherical"
+"polygonConvexHullCartesian"
+"polygonPerimeterCartesian"
+"polygonPerimeterSpherical"
+"polygonsDistanceCartesian"
+"polygonsDistanceSpherical"
+"polygonsEqualsCartesian"
+"polygonsIntersectionCartesian"
+"polygonsIntersectionSpherical"
+"polygonsSymDifferenceCartesian"
+"polygonsSymDifferenceSpherical"
+"polygonsUnionCartesian"
+"polygonsUnionSpherical"
+"polygonsWithinCartesian"
+"polygonsWithinSpherical"
+"port"
+"portRFC"
+"position"
+"positionCaseInsensitive"
+"positionCaseInsensitiveUTF8"
+"positionUTF8"
+"positiveModulo"
+"positive_modulo"
+"pow"
+"power"
+"printf"
+"proportionsZTest"
+"protocol"
+"punycodeDecode"
+"punycodeEncode"
+"quantile"
+"quantileArgMax"
+"quantileArgMin"
+"quantileArray"
+"quantileBFloat16"
+"quantileBFloat16ArgMax"
+"quantileBFloat16ArgMin"
+"quantileBFloat16Array"
+"quantileBFloat16Distinct"
+"quantileBFloat16ForEach"
+"quantileBFloat16If"
+"quantileBFloat16Map"
+"quantileBFloat16Merge"
+"quantileBFloat16Null"
+"quantileBFloat16OrDefault"
+"quantileBFloat16OrNull"
+"quantileBFloat16Resample"
+"quantileBFloat16SimpleState"
+"quantileBFloat16State"
+"quantileBFloat16Weighted"
+"quantileBFloat16WeightedArgMax"
+"quantileBFloat16WeightedArgMin"
+"quantileBFloat16WeightedArray"
+"quantileBFloat16WeightedDistinct"
+"quantileBFloat16WeightedForEach"
+"quantileBFloat16WeightedIf"
+"quantileBFloat16WeightedMap"
+"quantileBFloat16WeightedMerge"
+"quantileBFloat16WeightedNull"
+"quantileBFloat16WeightedOrDefault"
+"quantileBFloat16WeightedOrNull"
+"quantileBFloat16WeightedResample"
+"quantileBFloat16WeightedSimpleState"
+"quantileBFloat16WeightedState"
+"quantileDD"
+"quantileDDArgMax"
+"quantileDDArgMin"
+"quantileDDArray"
+"quantileDDDistinct"
+"quantileDDForEach"
+"quantileDDIf"
+"quantileDDMap"
+"quantileDDMerge"
+"quantileDDNull"
+"quantileDDOrDefault"
+"quantileDDOrNull"
+"quantileDDResample"
+"quantileDDSimpleState"
+"quantileDDState"
+"quantileDeterministic"
+"quantileDeterministicArgMax"
+"quantileDeterministicArgMin"
+"quantileDeterministicArray"
+"quantileDeterministicDistinct"
+"quantileDeterministicForEach"
+"quantileDeterministicIf"
+"quantileDeterministicMap"
+"quantileDeterministicMerge"
+"quantileDeterministicNull"
+"quantileDeterministicOrDefault"
+"quantileDeterministicOrNull"
+"quantileDeterministicResample"
+"quantileDeterministicSimpleState"
+"quantileDeterministicState"
+"quantileDistinct"
+"quantileExact"
+"quantileExactArgMax"
+"quantileExactArgMin"
+"quantileExactArray"
+"quantileExactDistinct"
+"quantileExactExclusive"
+"quantileExactExclusiveArgMax"
+"quantileExactExclusiveArgMin"
+"quantileExactExclusiveArray"
+"quantileExactExclusiveDistinct"
+"quantileExactExclusiveForEach"
+"quantileExactExclusiveIf"
+"quantileExactExclusiveMap"
+"quantileExactExclusiveMerge"
+"quantileExactExclusiveNull"
+"quantileExactExclusiveOrDefault"
+"quantileExactExclusiveOrNull"
+"quantileExactExclusiveResample"
+"quantileExactExclusiveSimpleState"
+"quantileExactExclusiveState"
+"quantileExactForEach"
+"quantileExactHigh"
+"quantileExactHighArgMax"
+"quantileExactHighArgMin"
+"quantileExactHighArray"
+"quantileExactHighDistinct"
+"quantileExactHighForEach"
+"quantileExactHighIf"
+"quantileExactHighMap"
+"quantileExactHighMerge"
+"quantileExactHighNull"
+"quantileExactHighOrDefault"
+"quantileExactHighOrNull"
+"quantileExactHighResample"
+"quantileExactHighSimpleState"
+"quantileExactHighState"
+"quantileExactIf"
+"quantileExactInclusive"
+"quantileExactInclusiveArgMax"
+"quantileExactInclusiveArgMin"
+"quantileExactInclusiveArray"
+"quantileExactInclusiveDistinct"
+"quantileExactInclusiveForEach"
+"quantileExactInclusiveIf"
+"quantileExactInclusiveMap"
+"quantileExactInclusiveMerge"
+"quantileExactInclusiveNull"
+"quantileExactInclusiveOrDefault"
+"quantileExactInclusiveOrNull"
+"quantileExactInclusiveResample"
+"quantileExactInclusiveSimpleState"
+"quantileExactInclusiveState"
+"quantileExactLow"
+"quantileExactLowArgMax"
+"quantileExactLowArgMin"
+"quantileExactLowArray"
+"quantileExactLowDistinct"
+"quantileExactLowForEach"
+"quantileExactLowIf"
+"quantileExactLowMap"
+"quantileExactLowMerge"
+"quantileExactLowNull"
+"quantileExactLowOrDefault"
+"quantileExactLowOrNull"
+"quantileExactLowResample"
+"quantileExactLowSimpleState"
+"quantileExactLowState"
+"quantileExactMap"
+"quantileExactMerge"
+"quantileExactNull"
+"quantileExactOrDefault"
+"quantileExactOrNull"
+"quantileExactResample"
+"quantileExactSimpleState"
+"quantileExactState"
+"quantileExactWeighted"
+"quantileExactWeightedArgMax"
+"quantileExactWeightedArgMin"
+"quantileExactWeightedArray"
+"quantileExactWeightedDistinct"
+"quantileExactWeightedForEach"
+"quantileExactWeightedIf"
+"quantileExactWeightedMap"
+"quantileExactWeightedMerge"
+"quantileExactWeightedNull"
+"quantileExactWeightedOrDefault"
+"quantileExactWeightedOrNull"
+"quantileExactWeightedResample"
+"quantileExactWeightedSimpleState"
+"quantileExactWeightedState"
+"quantileForEach"
+"quantileGK"
+"quantileGKArgMax"
+"quantileGKArgMin"
+"quantileGKArray"
+"quantileGKDistinct"
+"quantileGKForEach"
+"quantileGKIf"
+"quantileGKMap"
+"quantileGKMerge"
+"quantileGKNull"
+"quantileGKOrDefault"
+"quantileGKOrNull"
+"quantileGKResample"
+"quantileGKSimpleState"
+"quantileGKState"
+"quantileIf"
+"quantileInterpolatedWeighted"
+"quantileInterpolatedWeightedArgMax"
+"quantileInterpolatedWeightedArgMin"
+"quantileInterpolatedWeightedArray"
+"quantileInterpolatedWeightedDistinct"
+"quantileInterpolatedWeightedForEach"
+"quantileInterpolatedWeightedIf"
+"quantileInterpolatedWeightedMap"
+"quantileInterpolatedWeightedMerge"
+"quantileInterpolatedWeightedNull"
+"quantileInterpolatedWeightedOrDefault"
+"quantileInterpolatedWeightedOrNull"
+"quantileInterpolatedWeightedResample"
+"quantileInterpolatedWeightedSimpleState"
+"quantileInterpolatedWeightedState"
+"quantileMap"
+"quantileMerge"
+"quantileNull"
+"quantileOrDefault"
+"quantileOrNull"
+"quantileResample"
+"quantileSimpleState"
+"quantileState"
+"quantileTDigest"
+"quantileTDigestArgMax"
+"quantileTDigestArgMin"
+"quantileTDigestArray"
+"quantileTDigestDistinct"
+"quantileTDigestForEach"
+"quantileTDigestIf"
+"quantileTDigestMap"
+"quantileTDigestMerge"
+"quantileTDigestNull"
+"quantileTDigestOrDefault"
+"quantileTDigestOrNull"
+"quantileTDigestResample"
+"quantileTDigestSimpleState"
+"quantileTDigestState"
+"quantileTDigestWeighted"
+"quantileTDigestWeightedArgMax"
+"quantileTDigestWeightedArgMin"
+"quantileTDigestWeightedArray"
+"quantileTDigestWeightedDistinct"
+"quantileTDigestWeightedForEach"
+"quantileTDigestWeightedIf"
+"quantileTDigestWeightedMap"
+"quantileTDigestWeightedMerge"
+"quantileTDigestWeightedNull"
+"quantileTDigestWeightedOrDefault"
+"quantileTDigestWeightedOrNull"
+"quantileTDigestWeightedResample"
+"quantileTDigestWeightedSimpleState"
+"quantileTDigestWeightedState"
+"quantileTiming"
+"quantileTimingArgMax"
+"quantileTimingArgMin"
+"quantileTimingArray"
+"quantileTimingDistinct"
+"quantileTimingForEach"
+"quantileTimingIf"
+"quantileTimingMap"
+"quantileTimingMerge"
+"quantileTimingNull"
+"quantileTimingOrDefault"
+"quantileTimingOrNull"
+"quantileTimingResample"
+"quantileTimingSimpleState"
+"quantileTimingState"
+"quantileTimingWeighted"
+"quantileTimingWeightedArgMax"
+"quantileTimingWeightedArgMin"
+"quantileTimingWeightedArray"
+"quantileTimingWeightedDistinct"
+"quantileTimingWeightedForEach"
+"quantileTimingWeightedIf"
+"quantileTimingWeightedMap"
+"quantileTimingWeightedMerge"
+"quantileTimingWeightedNull"
+"quantileTimingWeightedOrDefault"
+"quantileTimingWeightedOrNull"
+"quantileTimingWeightedResample"
+"quantileTimingWeightedSimpleState"
+"quantileTimingWeightedState"
+"quantiles"
+"quantilesArgMax"
+"quantilesArgMin"
+"quantilesArray"
+"quantilesBFloat16"
+"quantilesBFloat16ArgMax"
+"quantilesBFloat16ArgMin"
+"quantilesBFloat16Array"
+"quantilesBFloat16Distinct"
+"quantilesBFloat16ForEach"
+"quantilesBFloat16If"
+"quantilesBFloat16Map"
+"quantilesBFloat16Merge"
+"quantilesBFloat16Null"
+"quantilesBFloat16OrDefault"
+"quantilesBFloat16OrNull"
+"quantilesBFloat16Resample"
+"quantilesBFloat16SimpleState"
+"quantilesBFloat16State"
+"quantilesBFloat16Weighted"
+"quantilesBFloat16WeightedArgMax"
+"quantilesBFloat16WeightedArgMin"
+"quantilesBFloat16WeightedArray"
+"quantilesBFloat16WeightedDistinct"
+"quantilesBFloat16WeightedForEach"
+"quantilesBFloat16WeightedIf"
+"quantilesBFloat16WeightedMap"
+"quantilesBFloat16WeightedMerge"
+"quantilesBFloat16WeightedNull"
+"quantilesBFloat16WeightedOrDefault"
+"quantilesBFloat16WeightedOrNull"
+"quantilesBFloat16WeightedResample"
+"quantilesBFloat16WeightedSimpleState"
+"quantilesBFloat16WeightedState"
+"quantilesDD"
+"quantilesDDArgMax"
+"quantilesDDArgMin"
+"quantilesDDArray"
+"quantilesDDDistinct"
+"quantilesDDForEach"
+"quantilesDDIf"
+"quantilesDDMap"
+"quantilesDDMerge"
+"quantilesDDNull"
+"quantilesDDOrDefault"
+"quantilesDDOrNull"
+"quantilesDDResample"
+"quantilesDDSimpleState"
+"quantilesDDState"
+"quantilesDeterministic"
+"quantilesDeterministicArgMax"
+"quantilesDeterministicArgMin"
+"quantilesDeterministicArray"
+"quantilesDeterministicDistinct"
+"quantilesDeterministicForEach"
+"quantilesDeterministicIf"
+"quantilesDeterministicMap"
+"quantilesDeterministicMerge"
+"quantilesDeterministicNull"
+"quantilesDeterministicOrDefault"
+"quantilesDeterministicOrNull"
+"quantilesDeterministicResample"
+"quantilesDeterministicSimpleState"
+"quantilesDeterministicState"
+"quantilesDistinct"
+"quantilesExact"
+"quantilesExactArgMax"
+"quantilesExactArgMin"
+"quantilesExactArray"
+"quantilesExactDistinct"
+"quantilesExactExclusive"
+"quantilesExactExclusiveArgMax"
+"quantilesExactExclusiveArgMin"
+"quantilesExactExclusiveArray"
+"quantilesExactExclusiveDistinct"
+"quantilesExactExclusiveForEach"
+"quantilesExactExclusiveIf"
+"quantilesExactExclusiveMap"
+"quantilesExactExclusiveMerge"
+"quantilesExactExclusiveNull"
+"quantilesExactExclusiveOrDefault"
+"quantilesExactExclusiveOrNull"
+"quantilesExactExclusiveResample"
+"quantilesExactExclusiveSimpleState"
+"quantilesExactExclusiveState"
+"quantilesExactForEach"
+"quantilesExactHigh"
+"quantilesExactHighArgMax"
+"quantilesExactHighArgMin"
+"quantilesExactHighArray"
+"quantilesExactHighDistinct"
+"quantilesExactHighForEach"
+"quantilesExactHighIf"
+"quantilesExactHighMap"
+"quantilesExactHighMerge"
+"quantilesExactHighNull"
+"quantilesExactHighOrDefault"
+"quantilesExactHighOrNull"
+"quantilesExactHighResample"
+"quantilesExactHighSimpleState"
+"quantilesExactHighState"
+"quantilesExactIf"
+"quantilesExactInclusive"
+"quantilesExactInclusiveArgMax"
+"quantilesExactInclusiveArgMin"
+"quantilesExactInclusiveArray"
+"quantilesExactInclusiveDistinct"
+"quantilesExactInclusiveForEach"
+"quantilesExactInclusiveIf"
+"quantilesExactInclusiveMap"
+"quantilesExactInclusiveMerge"
+"quantilesExactInclusiveNull"
+"quantilesExactInclusiveOrDefault"
+"quantilesExactInclusiveOrNull"
+"quantilesExactInclusiveResample"
+"quantilesExactInclusiveSimpleState"
+"quantilesExactInclusiveState"
+"quantilesExactLow"
+"quantilesExactLowArgMax"
+"quantilesExactLowArgMin"
+"quantilesExactLowArray"
+"quantilesExactLowDistinct"
+"quantilesExactLowForEach"
+"quantilesExactLowIf"
+"quantilesExactLowMap"
+"quantilesExactLowMerge"
+"quantilesExactLowNull"
+"quantilesExactLowOrDefault"
+"quantilesExactLowOrNull"
+"quantilesExactLowResample"
+"quantilesExactLowSimpleState"
+"quantilesExactLowState"
+"quantilesExactMap"
+"quantilesExactMerge"
+"quantilesExactNull"
+"quantilesExactOrDefault"
+"quantilesExactOrNull"
+"quantilesExactResample"
+"quantilesExactSimpleState"
+"quantilesExactState"
+"quantilesExactWeighted"
+"quantilesExactWeightedArgMax"
+"quantilesExactWeightedArgMin"
+"quantilesExactWeightedArray"
+"quantilesExactWeightedDistinct"
+"quantilesExactWeightedForEach"
+"quantilesExactWeightedIf"
+"quantilesExactWeightedMap"
+"quantilesExactWeightedMerge"
+"quantilesExactWeightedNull"
+"quantilesExactWeightedOrDefault"
+"quantilesExactWeightedOrNull"
+"quantilesExactWeightedResample"
+"quantilesExactWeightedSimpleState"
+"quantilesExactWeightedState"
+"quantilesForEach"
+"quantilesGK"
+"quantilesGKArgMax"
+"quantilesGKArgMin"
+"quantilesGKArray"
+"quantilesGKDistinct"
+"quantilesGKForEach"
+"quantilesGKIf"
+"quantilesGKMap"
+"quantilesGKMerge"
+"quantilesGKNull"
+"quantilesGKOrDefault"
+"quantilesGKOrNull"
+"quantilesGKResample"
+"quantilesGKSimpleState"
+"quantilesGKState"
+"quantilesIf"
+"quantilesInterpolatedWeighted"
+"quantilesInterpolatedWeightedArgMax"
+"quantilesInterpolatedWeightedArgMin"
+"quantilesInterpolatedWeightedArray"
+"quantilesInterpolatedWeightedDistinct"
+"quantilesInterpolatedWeightedForEach"
+"quantilesInterpolatedWeightedIf"
+"quantilesInterpolatedWeightedMap"
+"quantilesInterpolatedWeightedMerge"
+"quantilesInterpolatedWeightedNull"
+"quantilesInterpolatedWeightedOrDefault"
+"quantilesInterpolatedWeightedOrNull"
+"quantilesInterpolatedWeightedResample"
+"quantilesInterpolatedWeightedSimpleState"
+"quantilesInterpolatedWeightedState"
+"quantilesMap"
+"quantilesMerge"
+"quantilesNull"
+"quantilesOrDefault"
+"quantilesOrNull"
+"quantilesResample"
+"quantilesSimpleState"
+"quantilesState"
+"quantilesTDigest"
+"quantilesTDigestArgMax"
+"quantilesTDigestArgMin"
+"quantilesTDigestArray"
+"quantilesTDigestDistinct"
+"quantilesTDigestForEach"
+"quantilesTDigestIf"
+"quantilesTDigestMap"
+"quantilesTDigestMerge"
+"quantilesTDigestNull"
+"quantilesTDigestOrDefault"
+"quantilesTDigestOrNull"
+"quantilesTDigestResample"
+"quantilesTDigestSimpleState"
+"quantilesTDigestState"
+"quantilesTDigestWeighted"
+"quantilesTDigestWeightedArgMax"
+"quantilesTDigestWeightedArgMin"
+"quantilesTDigestWeightedArray"
+"quantilesTDigestWeightedDistinct"
+"quantilesTDigestWeightedForEach"
+"quantilesTDigestWeightedIf"
+"quantilesTDigestWeightedMap"
+"quantilesTDigestWeightedMerge"
+"quantilesTDigestWeightedNull"
+"quantilesTDigestWeightedOrDefault"
+"quantilesTDigestWeightedOrNull"
+"quantilesTDigestWeightedResample"
+"quantilesTDigestWeightedSimpleState"
+"quantilesTDigestWeightedState"
+"quantilesTiming"
+"quantilesTimingArgMax"
+"quantilesTimingArgMin"
+"quantilesTimingArray"
+"quantilesTimingDistinct"
+"quantilesTimingForEach"
+"quantilesTimingIf"
+"quantilesTimingMap"
+"quantilesTimingMerge"
+"quantilesTimingNull"
+"quantilesTimingOrDefault"
+"quantilesTimingOrNull"
+"quantilesTimingResample"
+"quantilesTimingSimpleState"
+"quantilesTimingState"
+"quantilesTimingWeighted"
+"quantilesTimingWeightedArgMax"
+"quantilesTimingWeightedArgMin"
+"quantilesTimingWeightedArray"
+"quantilesTimingWeightedDistinct"
+"quantilesTimingWeightedForEach"
+"quantilesTimingWeightedIf"
+"quantilesTimingWeightedMap"
+"quantilesTimingWeightedMerge"
+"quantilesTimingWeightedNull"
+"quantilesTimingWeightedOrDefault"
+"quantilesTimingWeightedOrNull"
+"quantilesTimingWeightedResample"
+"quantilesTimingWeightedSimpleState"
+"quantilesTimingWeightedState"
+"queryID"
+"queryString"
+"queryStringAndFragment"
+"query_id"
+"radians"
+"rand"
+"rand32"
+"rand64"
+"randBernoulli"
+"randBinomial"
+"randCanonical"
+"randChiSquared"
+"randConstant"
+"randExponential"
+"randFisherF"
+"randLogNormal"
+"randNegativeBinomial"
+"randNormal"
+"randPoisson"
+"randStudentT"
+"randUniform"
+"randomFixedString"
+"randomPrintableASCII"
+"randomString"
+"randomStringUTF8"
+"range"
+"rank"
+"rankArgMax"
+"rankArgMin"
+"rankArray"
+"rankCorr"
+"rankCorrArgMax"
+"rankCorrArgMin"
+"rankCorrArray"
+"rankCorrDistinct"
+"rankCorrForEach"
+"rankCorrIf"
+"rankCorrMap"
+"rankCorrMerge"
+"rankCorrNull"
+"rankCorrOrDefault"
+"rankCorrOrNull"
+"rankCorrResample"
+"rankCorrSimpleState"
+"rankCorrState"
+"rankDistinct"
+"rankForEach"
+"rankIf"
+"rankMap"
+"rankMerge"
+"rankNull"
+"rankOrDefault"
+"rankOrNull"
+"rankResample"
+"rankSimpleState"
+"rankState"
+"readWKTLineString"
+"readWKTMultiLineString"
+"readWKTMultiPolygon"
+"readWKTPoint"
+"readWKTPolygon"
+"readWKTRing"
+"regexpExtract"
+"regexpQuoteMeta"
+"regionHierarchy"
+"regionIn"
+"regionToArea"
+"regionToCity"
+"regionToContinent"
+"regionToCountry"
+"regionToDistrict"
+"regionToName"
+"regionToPopulation"
+"regionToTopContinent"
+"reinterpret"
+"reinterpretAsDate"
+"reinterpretAsDateTime"
+"reinterpretAsFixedString"
+"reinterpretAsFloat32"
+"reinterpretAsFloat64"
+"reinterpretAsInt128"
+"reinterpretAsInt16"
+"reinterpretAsInt256"
+"reinterpretAsInt32"
+"reinterpretAsInt64"
+"reinterpretAsInt8"
+"reinterpretAsString"
+"reinterpretAsUInt128"
+"reinterpretAsUInt16"
+"reinterpretAsUInt256"
+"reinterpretAsUInt32"
+"reinterpretAsUInt64"
+"reinterpretAsUInt8"
+"reinterpretAsUUID"
+"repeat"
+"replace"
+"replaceAll"
+"replaceOne"
+"replaceRegexpAll"
+"replaceRegexpOne"
+"replicate"
+"retention"
+"retentionArgMax"
+"retentionArgMin"
+"retentionArray"
+"retentionDistinct"
+"retentionForEach"
+"retentionIf"
+"retentionMap"
+"retentionMerge"
+"retentionNull"
+"retentionOrDefault"
+"retentionOrNull"
+"retentionResample"
+"retentionSimpleState"
+"retentionState"
+"reverse"
+"reverseUTF8"
+"revision"
+"right"
+"rightPad"
+"rightPadUTF8"
+"rightUTF8"
+"round"
+"roundAge"
+"roundBankers"
+"roundDown"
+"roundDuration"
+"roundToExp2"
+"rowNumberInAllBlocks"
+"rowNumberInBlock"
+"row_number"
+"row_numberArgMax"
+"row_numberArgMin"
+"row_numberArray"
+"row_numberDistinct"
+"row_numberForEach"
+"row_numberIf"
+"row_numberMap"
+"row_numberMerge"
+"row_numberNull"
+"row_numberOrDefault"
+"row_numberOrNull"
+"row_numberResample"
+"row_numberSimpleState"
+"row_numberState"
+"rpad"
+"rtrim"
+"runningAccumulate"
+"runningConcurrency"
+"runningDifference"
+"runningDifferenceStartingWithFirstValue"
+"s2CapContains"
+"s2CapUnion"
+"s2CellsIntersect"
+"s2GetNeighbors"
+"s2RectAdd"
+"s2RectContains"
+"s2RectIntersection"
+"s2RectUnion"
+"s2ToGeo"
+"scalarProduct"
+"sequenceCount"
+"sequenceCountArgMax"
+"sequenceCountArgMin"
+"sequenceCountArray"
+"sequenceCountDistinct"
+"sequenceCountForEach"
+"sequenceCountIf"
+"sequenceCountMap"
+"sequenceCountMerge"
+"sequenceCountNull"
+"sequenceCountOrDefault"
+"sequenceCountOrNull"
+"sequenceCountResample"
+"sequenceCountSimpleState"
+"sequenceCountState"
+"sequenceMatch"
+"sequenceMatchArgMax"
+"sequenceMatchArgMin"
+"sequenceMatchArray"
+"sequenceMatchDistinct"
+"sequenceMatchForEach"
+"sequenceMatchIf"
+"sequenceMatchMap"
+"sequenceMatchMerge"
+"sequenceMatchNull"
+"sequenceMatchOrDefault"
+"sequenceMatchOrNull"
+"sequenceMatchResample"
+"sequenceMatchSimpleState"
+"sequenceMatchState"
+"sequenceNextNode"
+"sequenceNextNodeArgMax"
+"sequenceNextNodeArgMin"
+"sequenceNextNodeArray"
+"sequenceNextNodeDistinct"
+"sequenceNextNodeForEach"
+"sequenceNextNodeIf"
+"sequenceNextNodeMap"
+"sequenceNextNodeMerge"
+"sequenceNextNodeNull"
+"sequenceNextNodeOrDefault"
+"sequenceNextNodeOrNull"
+"sequenceNextNodeResample"
+"sequenceNextNodeSimpleState"
+"sequenceNextNodeState"
+"seriesDecomposeSTL"
+"seriesOutliersDetectTukey"
+"seriesPeriodDetectFFT"
+"serverTimeZone"
+"serverTimezone"
+"serverUUID"
+"shardCount"
+"shardNum"
+"showCertificate"
+"sigmoid"
+"sign"
+"simpleJSONExtractBool"
+"simpleJSONExtractFloat"
+"simpleJSONExtractInt"
+"simpleJSONExtractRaw"
+"simpleJSONExtractString"
+"simpleJSONExtractUInt"
+"simpleJSONHas"
+"simpleLinearRegression"
+"simpleLinearRegressionArgMax"
+"simpleLinearRegressionArgMin"
+"simpleLinearRegressionArray"
+"simpleLinearRegressionDistinct"
+"simpleLinearRegressionForEach"
+"simpleLinearRegressionIf"
+"simpleLinearRegressionMap"
+"simpleLinearRegressionMerge"
+"simpleLinearRegressionNull"
+"simpleLinearRegressionOrDefault"
+"simpleLinearRegressionOrNull"
+"simpleLinearRegressionResample"
+"simpleLinearRegressionSimpleState"
+"simpleLinearRegressionState"
+"sin"
+"singleValueOrNull"
+"singleValueOrNullArgMax"
+"singleValueOrNullArgMin"
+"singleValueOrNullArray"
+"singleValueOrNullDistinct"
+"singleValueOrNullForEach"
+"singleValueOrNullIf"
+"singleValueOrNullMap"
+"singleValueOrNullMerge"
+"singleValueOrNullNull"
+"singleValueOrNullOrDefault"
+"singleValueOrNullOrNull"
+"singleValueOrNullResample"
+"singleValueOrNullSimpleState"
+"singleValueOrNullState"
+"sinh"
+"sipHash128"
+"sipHash128Keyed"
+"sipHash128Reference"
+"sipHash128ReferenceKeyed"
+"sipHash64"
+"sipHash64Keyed"
+"skewPop"
+"skewPopArgMax"
+"skewPopArgMin"
+"skewPopArray"
+"skewPopDistinct"
+"skewPopForEach"
+"skewPopIf"
+"skewPopMap"
+"skewPopMerge"
+"skewPopNull"
+"skewPopOrDefault"
+"skewPopOrNull"
+"skewPopResample"
+"skewPopSimpleState"
+"skewPopState"
+"skewSamp"
+"skewSampArgMax"
+"skewSampArgMin"
+"skewSampArray"
+"skewSampDistinct"
+"skewSampForEach"
+"skewSampIf"
+"skewSampMap"
+"skewSampMerge"
+"skewSampNull"
+"skewSampOrDefault"
+"skewSampOrNull"
+"skewSampResample"
+"skewSampSimpleState"
+"skewSampState"
+"sleep"
+"sleepEachRow"
+"snowflakeIDToDateTime"
+"snowflakeIDToDateTime64"
+"snowflakeToDateTime"
+"snowflakeToDateTime64"
+"soundex"
+"space"
+"sparkBar"
+"sparkBarArgMax"
+"sparkBarArgMin"
+"sparkBarArray"
+"sparkBarDistinct"
+"sparkBarForEach"
+"sparkBarIf"
+"sparkBarMap"
+"sparkBarMerge"
+"sparkBarNull"
+"sparkBarOrDefault"
+"sparkBarOrNull"
+"sparkBarResample"
+"sparkBarSimpleState"
+"sparkBarState"
+"sparkbar"
+"sparkbarArgMax"
+"sparkbarArgMin"
+"sparkbarArray"
+"sparkbarDistinct"
+"sparkbarForEach"
+"sparkbarIf"
+"sparkbarMap"
+"sparkbarMerge"
+"sparkbarNull"
+"sparkbarOrDefault"
+"sparkbarOrNull"
+"sparkbarResample"
+"sparkbarSimpleState"
+"sparkbarState"
+"splitByAlpha"
+"splitByChar"
+"splitByNonAlpha"
+"splitByRegexp"
+"splitByString"
+"splitByWhitespace"
+"sqid"
+"sqidDecode"
+"sqidEncode"
+"sqrt"
+"startsWith"
+"startsWithUTF8"
+"stddevPop"
+"stddevPopArgMax"
+"stddevPopArgMin"
+"stddevPopArray"
+"stddevPopDistinct"
+"stddevPopForEach"
+"stddevPopIf"
+"stddevPopMap"
+"stddevPopMerge"
+"stddevPopNull"
+"stddevPopOrDefault"
+"stddevPopOrNull"
+"stddevPopResample"
+"stddevPopSimpleState"
+"stddevPopStable"
+"stddevPopStableArgMax"
+"stddevPopStableArgMin"
+"stddevPopStableArray"
+"stddevPopStableDistinct"
+"stddevPopStableForEach"
+"stddevPopStableIf"
+"stddevPopStableMap"
+"stddevPopStableMerge"
+"stddevPopStableNull"
+"stddevPopStableOrDefault"
+"stddevPopStableOrNull"
+"stddevPopStableResample"
+"stddevPopStableSimpleState"
+"stddevPopStableState"
+"stddevPopState"
+"stddevSamp"
+"stddevSampArgMax"
+"stddevSampArgMin"
+"stddevSampArray"
+"stddevSampDistinct"
+"stddevSampForEach"
+"stddevSampIf"
+"stddevSampMap"
+"stddevSampMerge"
+"stddevSampNull"
+"stddevSampOrDefault"
+"stddevSampOrNull"
+"stddevSampResample"
+"stddevSampSimpleState"
+"stddevSampStable"
+"stddevSampStableArgMax"
+"stddevSampStableArgMin"
+"stddevSampStableArray"
+"stddevSampStableDistinct"
+"stddevSampStableForEach"
+"stddevSampStableIf"
+"stddevSampStableMap"
+"stddevSampStableMerge"
+"stddevSampStableNull"
+"stddevSampStableOrDefault"
+"stddevSampStableOrNull"
+"stddevSampStableResample"
+"stddevSampStableSimpleState"
+"stddevSampStableState"
+"stddevSampState"
+"stem"
+"stochasticLinearRegression"
+"stochasticLinearRegressionArgMax"
+"stochasticLinearRegressionArgMin"
+"stochasticLinearRegressionArray"
+"stochasticLinearRegressionDistinct"
+"stochasticLinearRegressionForEach"
+"stochasticLinearRegressionIf"
+"stochasticLinearRegressionMap"
+"stochasticLinearRegressionMerge"
+"stochasticLinearRegressionNull"
+"stochasticLinearRegressionOrDefault"
+"stochasticLinearRegressionOrNull"
+"stochasticLinearRegressionResample"
+"stochasticLinearRegressionSimpleState"
+"stochasticLinearRegressionState"
+"stochasticLogisticRegression"
+"stochasticLogisticRegressionArgMax"
+"stochasticLogisticRegressionArgMin"
+"stochasticLogisticRegressionArray"
+"stochasticLogisticRegressionDistinct"
+"stochasticLogisticRegressionForEach"
+"stochasticLogisticRegressionIf"
+"stochasticLogisticRegressionMap"
+"stochasticLogisticRegressionMerge"
+"stochasticLogisticRegressionNull"
+"stochasticLogisticRegressionOrDefault"
+"stochasticLogisticRegressionOrNull"
+"stochasticLogisticRegressionResample"
+"stochasticLogisticRegressionSimpleState"
+"stochasticLogisticRegressionState"
+"str_to_date"
+"str_to_map"
+"stringJaccardIndex"
+"stringJaccardIndexUTF8"
+"stringToH3"
+"structureToCapnProtoSchema"
+"structureToProtobufSchema"
+"studentTTest"
+"studentTTestArgMax"
+"studentTTestArgMin"
+"studentTTestArray"
+"studentTTestDistinct"
+"studentTTestForEach"
+"studentTTestIf"
+"studentTTestMap"
+"studentTTestMerge"
+"studentTTestNull"
+"studentTTestOrDefault"
+"studentTTestOrNull"
+"studentTTestResample"
+"studentTTestSimpleState"
+"studentTTestState"
+"subBitmap"
+"subDate"
+"substr"
+"substring"
+"substringIndex"
+"substringIndexUTF8"
+"substringUTF8"
+"subtractDays"
+"subtractHours"
+"subtractInterval"
+"subtractMicroseconds"
+"subtractMilliseconds"
+"subtractMinutes"
+"subtractMonths"
+"subtractNanoseconds"
+"subtractQuarters"
+"subtractSeconds"
+"subtractTupleOfIntervals"
+"subtractWeeks"
+"subtractYears"
+"sum"
+"sumArgMax"
+"sumArgMin"
+"sumArray"
+"sumCount"
+"sumCountArgMax"
+"sumCountArgMin"
+"sumCountArray"
+"sumCountDistinct"
+"sumCountForEach"
+"sumCountIf"
+"sumCountMap"
+"sumCountMerge"
+"sumCountNull"
+"sumCountOrDefault"
+"sumCountOrNull"
+"sumCountResample"
+"sumCountSimpleState"
+"sumCountState"
+"sumDistinct"
+"sumForEach"
+"sumIf"
+"sumKahan"
+"sumKahanArgMax"
+"sumKahanArgMin"
+"sumKahanArray"
+"sumKahanDistinct"
+"sumKahanForEach"
+"sumKahanIf"
+"sumKahanMap"
+"sumKahanMerge"
+"sumKahanNull"
+"sumKahanOrDefault"
+"sumKahanOrNull"
+"sumKahanResample"
+"sumKahanSimpleState"
+"sumKahanState"
+"sumMap"
+"sumMapFiltered"
+"sumMapFilteredArgMax"
+"sumMapFilteredArgMin"
+"sumMapFilteredArray"
+"sumMapFilteredDistinct"
+"sumMapFilteredForEach"
+"sumMapFilteredIf"
+"sumMapFilteredMap"
+"sumMapFilteredMerge"
+"sumMapFilteredNull"
+"sumMapFilteredOrDefault"
+"sumMapFilteredOrNull"
+"sumMapFilteredResample"
+"sumMapFilteredSimpleState"
+"sumMapFilteredState"
+"sumMapFilteredWithOverflow"
+"sumMapFilteredWithOverflowArgMax"
+"sumMapFilteredWithOverflowArgMin"
+"sumMapFilteredWithOverflowArray"
+"sumMapFilteredWithOverflowDistinct"
+"sumMapFilteredWithOverflowForEach"
+"sumMapFilteredWithOverflowIf"
+"sumMapFilteredWithOverflowMap"
+"sumMapFilteredWithOverflowMerge"
+"sumMapFilteredWithOverflowNull"
+"sumMapFilteredWithOverflowOrDefault"
+"sumMapFilteredWithOverflowOrNull"
+"sumMapFilteredWithOverflowResample"
+"sumMapFilteredWithOverflowSimpleState"
+"sumMapFilteredWithOverflowState"
+"sumMapWithOverflow"
+"sumMapWithOverflowArgMax"
+"sumMapWithOverflowArgMin"
+"sumMapWithOverflowArray"
+"sumMapWithOverflowDistinct"
+"sumMapWithOverflowForEach"
+"sumMapWithOverflowIf"
+"sumMapWithOverflowMap"
+"sumMapWithOverflowMerge"
+"sumMapWithOverflowNull"
+"sumMapWithOverflowOrDefault"
+"sumMapWithOverflowOrNull"
+"sumMapWithOverflowResample"
+"sumMapWithOverflowSimpleState"
+"sumMapWithOverflowState"
+"sumMappedArrays"
+"sumMappedArraysArgMax"
+"sumMappedArraysArgMin"
+"sumMappedArraysArray"
+"sumMappedArraysDistinct"
+"sumMappedArraysForEach"
+"sumMappedArraysIf"
+"sumMappedArraysMap"
+"sumMappedArraysMerge"
+"sumMappedArraysNull"
+"sumMappedArraysOrDefault"
+"sumMappedArraysOrNull"
+"sumMappedArraysResample"
+"sumMappedArraysSimpleState"
+"sumMappedArraysState"
+"sumMerge"
+"sumNull"
+"sumOrDefault"
+"sumOrNull"
+"sumResample"
+"sumSimpleState"
+"sumState"
+"sumWithOverflow"
+"sumWithOverflowArgMax"
+"sumWithOverflowArgMin"
+"sumWithOverflowArray"
+"sumWithOverflowDistinct"
+"sumWithOverflowForEach"
+"sumWithOverflowIf"
+"sumWithOverflowMap"
+"sumWithOverflowMerge"
+"sumWithOverflowNull"
+"sumWithOverflowOrDefault"
+"sumWithOverflowOrNull"
+"sumWithOverflowResample"
+"sumWithOverflowSimpleState"
+"sumWithOverflowState"
+"svg"
+"synonyms"
+"tan"
+"tanh"
+"tcpPort"
+"tgamma"
+"theilsU"
+"theilsUArgMax"
+"theilsUArgMin"
+"theilsUArray"
+"theilsUDistinct"
+"theilsUForEach"
+"theilsUIf"
+"theilsUMap"
+"theilsUMerge"
+"theilsUNull"
+"theilsUOrDefault"
+"theilsUOrNull"
+"theilsUResample"
+"theilsUSimpleState"
+"theilsUState"
+"throwIf"
+"tid"
+"timeDiff"
+"timeSlot"
+"timeSlots"
+"timeZone"
+"timeZoneOf"
+"timeZoneOffset"
+"timestamp"
+"timestampDiff"
+"timestamp_diff"
+"timezone"
+"timezoneOf"
+"timezoneOffset"
+"toBool"
+"toColumnTypeName"
+"toDate"
+"toDate32"
+"toDate32OrDefault"
+"toDate32OrNull"
+"toDate32OrZero"
+"toDateOrDefault"
+"toDateOrNull"
+"toDateOrZero"
+"toDateTime"
+"toDateTime32"
+"toDateTime64"
+"toDateTime64OrDefault"
+"toDateTime64OrNull"
+"toDateTime64OrZero"
+"toDateTimeOrDefault"
+"toDateTimeOrNull"
+"toDateTimeOrZero"
+"toDayOfMonth"
+"toDayOfWeek"
+"toDayOfYear"
+"toDaysSinceYearZero"
+"toDecimal128"
+"toDecimal128OrDefault"
+"toDecimal128OrNull"
+"toDecimal128OrZero"
+"toDecimal256"
+"toDecimal256OrDefault"
+"toDecimal256OrNull"
+"toDecimal256OrZero"
+"toDecimal32"
+"toDecimal32OrDefault"
+"toDecimal32OrNull"
+"toDecimal32OrZero"
+"toDecimal64"
+"toDecimal64OrDefault"
+"toDecimal64OrNull"
+"toDecimal64OrZero"
+"toDecimalString"
+"toFixedString"
+"toFloat32"
+"toFloat32OrDefault"
+"toFloat32OrNull"
+"toFloat32OrZero"
+"toFloat64"
+"toFloat64OrDefault"
+"toFloat64OrNull"
+"toFloat64OrZero"
+"toHour"
+"toIPv4"
+"toIPv4OrDefault"
+"toIPv4OrNull"
+"toIPv4OrZero"
+"toIPv6"
+"toIPv6OrDefault"
+"toIPv6OrNull"
+"toIPv6OrZero"
+"toISOWeek"
+"toISOYear"
+"toInt128"
+"toInt128OrDefault"
+"toInt128OrNull"
+"toInt128OrZero"
+"toInt16"
+"toInt16OrDefault"
+"toInt16OrNull"
+"toInt16OrZero"
+"toInt256"
+"toInt256OrDefault"
+"toInt256OrNull"
+"toInt256OrZero"
+"toInt32"
+"toInt32OrDefault"
+"toInt32OrNull"
+"toInt32OrZero"
+"toInt64"
+"toInt64OrDefault"
+"toInt64OrNull"
+"toInt64OrZero"
+"toInt8"
+"toInt8OrDefault"
+"toInt8OrNull"
+"toInt8OrZero"
+"toIntervalDay"
+"toIntervalHour"
+"toIntervalMicrosecond"
+"toIntervalMillisecond"
+"toIntervalMinute"
+"toIntervalMonth"
+"toIntervalNanosecond"
+"toIntervalQuarter"
+"toIntervalSecond"
+"toIntervalWeek"
+"toIntervalYear"
+"toJSONString"
+"toLastDayOfMonth"
+"toLastDayOfWeek"
+"toLowCardinality"
+"toMillisecond"
+"toMinute"
+"toModifiedJulianDay"
+"toModifiedJulianDayOrNull"
+"toMonday"
+"toMonth"
+"toNullable"
+"toQuarter"
+"toRelativeDayNum"
+"toRelativeHourNum"
+"toRelativeMinuteNum"
+"toRelativeMonthNum"
+"toRelativeQuarterNum"
+"toRelativeSecondNum"
+"toRelativeWeekNum"
+"toRelativeYearNum"
+"toSecond"
+"toStartOfDay"
+"toStartOfFifteenMinutes"
+"toStartOfFiveMinute"
+"toStartOfFiveMinutes"
+"toStartOfHour"
+"toStartOfISOYear"
+"toStartOfInterval"
+"toStartOfMicrosecond"
+"toStartOfMillisecond"
+"toStartOfMinute"
+"toStartOfMonth"
+"toStartOfNanosecond"
+"toStartOfQuarter"
+"toStartOfSecond"
+"toStartOfTenMinutes"
+"toStartOfWeek"
+"toStartOfYear"
+"toString"
+"toStringCutToZero"
+"toTime"
+"toTimeZone"
+"toTimezone"
+"toTypeName"
+"toUInt128"
+"toUInt128OrDefault"
+"toUInt128OrNull"
+"toUInt128OrZero"
+"toUInt16"
+"toUInt16OrDefault"
+"toUInt16OrNull"
+"toUInt16OrZero"
+"toUInt256"
+"toUInt256OrDefault"
+"toUInt256OrNull"
+"toUInt256OrZero"
+"toUInt32"
+"toUInt32OrDefault"
+"toUInt32OrNull"
+"toUInt32OrZero"
+"toUInt64"
+"toUInt64OrDefault"
+"toUInt64OrNull"
+"toUInt64OrZero"
+"toUInt8"
+"toUInt8OrDefault"
+"toUInt8OrNull"
+"toUInt8OrZero"
+"toUTCTimestamp"
+"toUUID"
+"toUUIDOrDefault"
+"toUUIDOrNull"
+"toUUIDOrZero"
+"toUnixTimestamp"
+"toUnixTimestamp64Micro"
+"toUnixTimestamp64Milli"
+"toUnixTimestamp64Nano"
+"toValidUTF8"
+"toWeek"
+"toYYYYMM"
+"toYYYYMMDD"
+"toYYYYMMDDhhmmss"
+"toYear"
+"toYearWeek"
+"to_utc_timestamp"
+"today"
+"tokens"
+"topK"
+"topKArgMax"
+"topKArgMin"
+"topKArray"
+"topKDistinct"
+"topKForEach"
+"topKIf"
+"topKMap"
+"topKMerge"
+"topKNull"
+"topKOrDefault"
+"topKOrNull"
+"topKResample"
+"topKSimpleState"
+"topKState"
+"topKWeighted"
+"topKWeightedArgMax"
+"topKWeightedArgMin"
+"topKWeightedArray"
+"topKWeightedDistinct"
+"topKWeightedForEach"
+"topKWeightedIf"
+"topKWeightedMap"
+"topKWeightedMerge"
+"topKWeightedNull"
+"topKWeightedOrDefault"
+"topKWeightedOrNull"
+"topKWeightedResample"
+"topKWeightedSimpleState"
+"topKWeightedState"
+"topLevelDomain"
+"topLevelDomainRFC"
+"transactionID"
+"transactionLatestSnapshot"
+"transactionOldestSnapshot"
+"transform"
+"translate"
+"translateUTF8"
+"trim"
+"trimBoth"
+"trimLeft"
+"trimRight"
+"trunc"
+"truncate"
+"tryBase58Decode"
+"tryBase64Decode"
+"tryBase64URLDecode"
+"tryDecrypt"
+"tryIdnaEncode"
+"tryPunycodeDecode"
+"tumble"
+"tumbleEnd"
+"tumbleStart"
+"tuple"
+"tupleConcat"
+"tupleDivide"
+"tupleDivideByNumber"
+"tupleElement"
+"tupleHammingDistance"
+"tupleIntDiv"
+"tupleIntDivByNumber"
+"tupleIntDivOrZero"
+"tupleIntDivOrZeroByNumber"
+"tupleMinus"
+"tupleModulo"
+"tupleModuloByNumber"
+"tupleMultiply"
+"tupleMultiplyByNumber"
+"tupleNames"
+"tupleNegate"
+"tuplePlus"
+"tupleToNameValuePairs"
+"ucase"
+"unbin"
+"unhex"
+"uniq"
+"uniqArgMax"
+"uniqArgMin"
+"uniqArray"
+"uniqCombined"
+"uniqCombined64"
+"uniqCombined64ArgMax"
+"uniqCombined64ArgMin"
+"uniqCombined64Array"
+"uniqCombined64Distinct"
+"uniqCombined64ForEach"
+"uniqCombined64If"
+"uniqCombined64Map"
+"uniqCombined64Merge"
+"uniqCombined64Null"
+"uniqCombined64OrDefault"
+"uniqCombined64OrNull"
+"uniqCombined64Resample"
+"uniqCombined64SimpleState"
+"uniqCombined64State"
+"uniqCombinedArgMax"
+"uniqCombinedArgMin"
+"uniqCombinedArray"
+"uniqCombinedDistinct"
+"uniqCombinedForEach"
+"uniqCombinedIf"
+"uniqCombinedMap"
+"uniqCombinedMerge"
+"uniqCombinedNull"
+"uniqCombinedOrDefault"
+"uniqCombinedOrNull"
+"uniqCombinedResample"
+"uniqCombinedSimpleState"
+"uniqCombinedState"
+"uniqDistinct"
+"uniqExact"
+"uniqExactArgMax"
+"uniqExactArgMin"
+"uniqExactArray"
+"uniqExactDistinct"
+"uniqExactForEach"
+"uniqExactIf"
+"uniqExactMap"
+"uniqExactMerge"
+"uniqExactNull"
+"uniqExactOrDefault"
+"uniqExactOrNull"
+"uniqExactResample"
+"uniqExactSimpleState"
+"uniqExactState"
+"uniqForEach"
+"uniqHLL12"
+"uniqHLL12ArgMax"
+"uniqHLL12ArgMin"
+"uniqHLL12Array"
+"uniqHLL12Distinct"
+"uniqHLL12ForEach"
+"uniqHLL12If"
+"uniqHLL12Map"
+"uniqHLL12Merge"
+"uniqHLL12Null"
+"uniqHLL12OrDefault"
+"uniqHLL12OrNull"
+"uniqHLL12Resample"
+"uniqHLL12SimpleState"
+"uniqHLL12State"
+"uniqIf"
+"uniqMap"
+"uniqMerge"
+"uniqNull"
+"uniqOrDefault"
+"uniqOrNull"
+"uniqResample"
+"uniqSimpleState"
+"uniqState"
+"uniqTheta"
+"uniqThetaArgMax"
+"uniqThetaArgMin"
+"uniqThetaArray"
+"uniqThetaDistinct"
+"uniqThetaForEach"
+"uniqThetaIf"
+"uniqThetaIntersect"
+"uniqThetaMap"
+"uniqThetaMerge"
+"uniqThetaNot"
+"uniqThetaNull"
+"uniqThetaOrDefault"
+"uniqThetaOrNull"
+"uniqThetaResample"
+"uniqThetaSimpleState"
+"uniqThetaState"
+"uniqThetaUnion"
+"uniqUpTo"
+"uniqUpToArgMax"
+"uniqUpToArgMin"
+"uniqUpToArray"
+"uniqUpToDistinct"
+"uniqUpToForEach"
+"uniqUpToIf"
+"uniqUpToMap"
+"uniqUpToMerge"
+"uniqUpToNull"
+"uniqUpToOrDefault"
+"uniqUpToOrNull"
+"uniqUpToResample"
+"uniqUpToSimpleState"
+"uniqUpToState"
+"upper"
+"upperUTF8"
+"uptime"
+"user"
+"validateNestedArraySizes"
+"varPop"
+"varPopArgMax"
+"varPopArgMin"
+"varPopArray"
+"varPopDistinct"
+"varPopForEach"
+"varPopIf"
+"varPopMap"
+"varPopMerge"
+"varPopNull"
+"varPopOrDefault"
+"varPopOrNull"
+"varPopResample"
+"varPopSimpleState"
+"varPopStable"
+"varPopStableArgMax"
+"varPopStableArgMin"
+"varPopStableArray"
+"varPopStableDistinct"
+"varPopStableForEach"
+"varPopStableIf"
+"varPopStableMap"
+"varPopStableMerge"
+"varPopStableNull"
+"varPopStableOrDefault"
+"varPopStableOrNull"
+"varPopStableResample"
+"varPopStableSimpleState"
+"varPopStableState"
+"varPopState"
+"varSamp"
+"varSampArgMax"
+"varSampArgMin"
+"varSampArray"
+"varSampDistinct"
+"varSampForEach"
+"varSampIf"
+"varSampMap"
+"varSampMerge"
+"varSampNull"
+"varSampOrDefault"
+"varSampOrNull"
+"varSampResample"
+"varSampSimpleState"
+"varSampStable"
+"varSampStableArgMax"
+"varSampStableArgMin"
+"varSampStableArray"
+"varSampStableDistinct"
+"varSampStableForEach"
+"varSampStableIf"
+"varSampStableMap"
+"varSampStableMerge"
+"varSampStableNull"
+"varSampStableOrDefault"
+"varSampStableOrNull"
+"varSampStableResample"
+"varSampStableSimpleState"
+"varSampStableState"
+"varSampState"
+"variantElement"
+"variantType"
+"vectorDifference"
+"vectorSum"
+"version"
+"visibleWidth"
+"visitParamExtractBool"
+"visitParamExtractFloat"
+"visitParamExtractInt"
+"visitParamExtractRaw"
+"visitParamExtractString"
+"visitParamExtractUInt"
+"visitParamHas"
+"week"
+"welchTTest"
+"welchTTestArgMax"
+"welchTTestArgMin"
+"welchTTestArray"
+"welchTTestDistinct"
+"welchTTestForEach"
+"welchTTestIf"
+"welchTTestMap"
+"welchTTestMerge"
+"welchTTestNull"
+"welchTTestOrDefault"
+"welchTTestOrNull"
+"welchTTestResample"
+"welchTTestSimpleState"
+"welchTTestState"
+"widthBucket"
+"width_bucket"
+"windowFunnel"
+"windowFunnelArgMax"
+"windowFunnelArgMin"
+"windowFunnelArray"
+"windowFunnelDistinct"
+"windowFunnelForEach"
+"windowFunnelIf"
+"windowFunnelMap"
+"windowFunnelMerge"
+"windowFunnelNull"
+"windowFunnelOrDefault"
+"windowFunnelOrNull"
+"windowFunnelResample"
+"windowFunnelSimpleState"
+"windowFunnelState"
+"windowID"
+"wkt"
+"wordShingleMinHash"
+"wordShingleMinHashArg"
+"wordShingleMinHashArgCaseInsensitive"
+"wordShingleMinHashArgCaseInsensitiveUTF8"
+"wordShingleMinHashArgUTF8"
+"wordShingleMinHashCaseInsensitive"
+"wordShingleMinHashCaseInsensitiveUTF8"
+"wordShingleMinHashUTF8"
+"wordShingleSimHash"
+"wordShingleSimHashCaseInsensitive"
+"wordShingleSimHashCaseInsensitiveUTF8"
+"wordShingleSimHashUTF8"
+"wyHash64"
+"xor"
+"xxHash32"
+"xxHash64"
+"xxh3"
+"yandexConsistentHash"
+"yearweek"
+"yesterday"
+"zookeeperSessionUptime"

From 5a6090ad05117c76a4b37071a6362f30f395b235 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 13 Aug 2024 16:25:07 +0200
Subject: [PATCH 252/265] Fix

---
 src/Processors/Sources/PostgreSQLSource.cpp | 12 ++++++------
 src/Processors/Sources/PostgreSQLSource.h   | 14 +++++++++-----
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp
index a3d6fd691d8..b9bda46bd10 100644
--- a/src/Processors/Sources/PostgreSQLSource.cpp
+++ b/src/Processors/Sources/PostgreSQLSource.cpp
@@ -35,9 +35,9 @@ PostgreSQLSource<T>::PostgreSQLSource(
     const Block & sample_block,
     UInt64 max_block_size_)
     : ISource(sample_block.cloneEmpty())
-    , query_str(query_str_)
     , max_block_size(max_block_size_)
     , connection_holder(std::move(connection_holder_))
+    , query_str(query_str_)
 {
     init(sample_block);
 }
@@ -51,10 +51,10 @@ PostgreSQLSource<T>::PostgreSQLSource(
     UInt64 max_block_size_,
     bool auto_commit_)
     : ISource(sample_block.cloneEmpty())
-    , query_str(query_str_)
-    , tx(std::move(tx_))
     , max_block_size(max_block_size_)
     , auto_commit(auto_commit_)
+    , query_str(query_str_)
+    , tx(std::move(tx_))
 {
     init(sample_block);
 }
@@ -204,15 +204,15 @@ PostgreSQLSource<T>::~PostgreSQLSource()
                   */
                 stream->close();
             }
-
-            stream.reset();
-            tx.reset();
         }
         catch (...)
         {
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
 
+        stream.reset();
+        tx.reset();
+
         if (connection_holder)
             connection_holder->setBroken();
     }
diff --git a/src/Processors/Sources/PostgreSQLSource.h b/src/Processors/Sources/PostgreSQLSource.h
index 8a648ae8bb5..319c5d8d7c2 100644
--- a/src/Processors/Sources/PostgreSQLSource.h
+++ b/src/Processors/Sources/PostgreSQLSource.h
@@ -38,14 +38,12 @@ protected:
         UInt64 max_block_size_,
         bool auto_commit_);
 
-    String query_str;
-    std::shared_ptr<T> tx;
-    std::unique_ptr<pqxx::stream_from> stream;
-
     Status prepare() override;
 
-    void onStart();
     Chunk generate() override;
+
+    void onStart();
+
     void onFinish();
 
 private:
@@ -61,6 +59,12 @@ private:
     postgres::ConnectionHolderPtr connection_holder;
 
     std::unordered_map<size_t, PostgreSQLArrayInfo> array_info;
+
+protected:
+    String query_str;
+    /// tx and stream must be destroyed before connection_holder.
+    std::shared_ptr<T> tx;
+    std::unique_ptr<pqxx::stream_from> stream;
 };
 
 
From 7ebb6efb2e413c44a82f2ac25d6dcb8e0da2f59a Mon Sep 17 00:00:00 2001
From: divanik <dan.ivanik@clickhouse.com>
Date: Tue, 13 Aug 2024 15:10:56 +0000
Subject: [PATCH 253/265] Style check

---
 tests/queries/0_stateless/00652_mergetree_mutations.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00652_mergetree_mutations.sh b/tests/queries/0_stateless/00652_mergetree_mutations.sh
index 6be0ebf882f..edb306d3883 100755
--- a/tests/queries/0_stateless/00652_mergetree_mutations.sh
+++ b/tests/queries/0_stateless/00652_mergetree_mutations.sh
@@ -73,7 +73,7 @@ sleep 0.1
 for i in {1..10}
 do
 
-    if [ $(${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' and table = 'mutations_cleaner'") -eq 2 ]; then
+    if [ "$(${CLICKHOUSE_CLIENT} --query="SELECT count() FROM system.mutations WHERE database = '$CLICKHOUSE_DATABASE' and table = 'mutations_cleaner'")" -eq 2 ]; then
         break
     fi
 

From ae614648a3397c4738b85ab8d138419387c562ed Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 13 Aug 2024 15:13:42 +0000
Subject: [PATCH 254/265] trigger sync


From 7e209ebdf686e374fdf764cb6acb3e7de83e927f Mon Sep 17 00:00:00 2001
From: Austin Bruch <austin@tildei.com>
Date: Tue, 13 Aug 2024 12:30:17 -0400
Subject: [PATCH 255/265] Remove trailing colon in header for consistency

---
 docs/en/sql-reference/table-functions/file.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 7908a3cb934..3243e6cf569 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -103,7 +103,7 @@ LIMIT 2;
 └─────────┴─────────┴─────────┘
 ```
 
-### Inserting data from a file into a table:
+### Inserting data from a file into a table
 
 ``` sql
 INSERT INTO FUNCTION

From 9833ef0bed218afdc1927181c11bac306fe21dda Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 13 Aug 2024 17:50:13 +0000
Subject: [PATCH 256/265] slightly better

---
 .../Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h   | 2 +-
 src/Processors/Merges/IMergingTransform.h                   | 6 +++---
 src/Storages/MergeTree/MergeTask.cpp                        | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
index 39171c5a978..c34028b1cba 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
@@ -50,7 +50,7 @@ public:
     void consume(Input & input, size_t source_num) override;
     Status merge() override;
 
-    MergedStats getMergedStats() const override { return  {.bytes = accumulated_bytes, .rows = accumulated_rows, .blocks = chunk_num}; }
+    MergedStats getMergedStats() const override { return {.bytes = accumulated_bytes, .rows = accumulated_rows, .blocks = chunk_num}; }
 
 private:
     Chunk prepareToMerge();
diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h
index fba5b038618..e5cd3bdde46 100644
--- a/src/Processors/Merges/IMergingTransform.h
+++ b/src/Processors/Merges/IMergingTransform.h
@@ -113,7 +113,7 @@ public:
 
     void work() override
     {
-        Stopwatch watch;
+        Stopwatch watch{CLOCK_MONOTONIC_COARSE};
 
         if (!state.init_chunks.empty())
             algorithm.initialize(std::move(state.init_chunks));
@@ -180,12 +180,12 @@ protected:
 
         if (seconds == 0.0)
         {
-            LOG_DEBUG(log, "{}: {} blocks, {} rows, {} bytes in 0 sec.",
+            LOG_DEBUG(log, "{}, {} blocks, {} rows, {} bytes in 0 sec.",
                 transform_message, stats.blocks, stats.rows, stats.bytes);
         }
         else
         {
-            LOG_DEBUG(log, "{}: {} blocks, {} rows, {} bytes in {} sec., {} rows/sec., {}/sec.",
+            LOG_DEBUG(log, "{}, {} blocks, {} rows, {} bytes in {} sec., {} rows/sec., {}/sec.",
                 transform_message, stats.blocks, stats.rows, stats.bytes,
                 seconds, stats.rows / seconds, ReadableSize(stats.bytes / seconds));
         }
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 3aa4d764685..95e00773bae 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -945,7 +945,7 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const
 MergeTask::StageRuntimeContextPtr MergeTask::MergeProjectionsStage::getContextForNextStage()
 {
     /// Do not increment for projection stage because time is already accounted in main task.
-    /// The projection stage has its own empty projection stage which may add a drift of severals milliseconds.
+    /// The projection stage has its own empty projection stage which may add a drift of several milliseconds.
     if (global_ctx->parent_part == nullptr)
     {
         ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);

From 6af5fedf420c667e2a7866c89dfe0bd1d2ff37dd Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 13 Aug 2024 19:26:35 +0000
Subject: [PATCH 257/265] Update autogenerated version to 24.9.1.1 and
 contributors

---
 cmake/autogenerated_versions.txt                | 10 +++++-----
 .../StorageSystemContributors.generated.cpp     | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index d69646d3694..c82038804fe 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
 
 # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54489)
+SET(VERSION_REVISION 54490)
 SET(VERSION_MAJOR 24)
-SET(VERSION_MINOR 8)
+SET(VERSION_MINOR 9)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
-SET(VERSION_DESCRIBE v24.8.1.1-testing)
-SET(VERSION_STRING 24.8.1.1)
+SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608)
+SET(VERSION_DESCRIBE v24.9.1.1-testing)
+SET(VERSION_STRING 24.9.1.1)
 # end of autochange
diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index 35b9c0008c6..eb6f0382d15 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -457,6 +457,7 @@ const char * auto_contributors[] {
     "Gleb-Tretyakov",
     "GoGoWen2021",
     "Gosha Letov",
+    "Graham Campbell",
     "Gregory",
     "Grigorii Sokolik",
     "Grigory",
@@ -472,6 +473,7 @@ const char * auto_contributors[] {
     "Habibullah Oladepo",
     "HaiBo Li",
     "Hakob Saghatelyan",
+    "Halersson Paris",
     "Hamoon",
     "Han Fei",
     "Han Shukai",
@@ -541,6 +543,7 @@ const char * auto_contributors[] {
     "JackyWoo",
     "Jacob Hayes",
     "Jacob Herrington",
+    "Jacob Reckhard",
     "Jai Jhala",
     "Jake Bamrah",
     "Jake Liu",
@@ -661,6 +664,7 @@ const char * auto_contributors[] {
     "LaurieLY",
     "Lee sungju",
     "Lemore",
+    "Lennard Eijsackers",
     "Leonardo Cecchi",
     "Leonardo Maciel",
     "Leonid Krylov",
@@ -804,6 +808,7 @@ const char * auto_contributors[] {
     "Mingliang Pan",
     "Misko Lee",
     "Misz606",
+    "Miсhael Stetsyuk",
     "MochiXu",
     "Mohamad Fadhil",
     "Mohammad Arab Anvari",
@@ -922,6 +927,7 @@ const char * auto_contributors[] {
     "Pervakov Grigorii",
     "Pervakov Grigory",
     "Peter",
+    "Peter Nguyen",
     "Petr Vasilev",
     "Pham Anh Tuan",
     "Philip Hallstrom",
@@ -981,6 +987,7 @@ const char * auto_contributors[] {
     "Ronald Bradford",
     "Rory Crispin",
     "Roy Bellingan",
+    "Ruihang Xia",
     "Ruslan",
     "Ruslan Mardugalliamov",
     "Ruslan Savchenko",
@@ -1000,9 +1007,11 @@ const char * auto_contributors[] {
     "Sami Kerola",
     "Samuel Chou",
     "Samuel Colvin",
+    "Samuele Guerrini",
     "San",
     "Sanjam Panda",
     "Sariel",
+    "Sasha Sheikin",
     "Saulius Valatka",
     "Sean Haynes",
     "Sean Lafferty",
@@ -1202,6 +1211,7 @@ const char * auto_contributors[] {
     "Vladimir Makarov",
     "Vladimir Mihailenco",
     "Vladimir Smirnov",
+    "Vladimir Varankin",
     "Vladislav Rassokhin",
     "Vladislav Smirnov",
     "Vladislav V",
@@ -1275,6 +1285,7 @@ const char * auto_contributors[] {
     "Zhichun Wu",
     "Zhiguo Zhou",
     "Zhipeng",
+    "Zhukova, Maria",
     "Zhuo Qiu",
     "Zijie Lu",
     "Zimu Li",
@@ -1502,6 +1513,7 @@ const char * auto_contributors[] {
     "hchen9",
     "hcz",
     "hdhoang",
+    "heguangnan",
     "heleihelei",
     "helifu",
     "hendrik-m",
@@ -1572,6 +1584,7 @@ const char * auto_contributors[] {
     "kevinyhzou",
     "kgurjev",
     "khamadiev",
+    "khodyrevyurii",
     "kigerzhang",
     "kirillikoff",
     "kmeaw",
@@ -1787,6 +1800,7 @@ const char * auto_contributors[] {
     "ruslandoga",
     "ryzuo",
     "s-kat",
+    "sakulali",
     "sanjam",
     "santaux",
     "santrancisco",
@@ -1804,6 +1818,7 @@ const char * auto_contributors[] {
     "shabroo",
     "shangshujie",
     "shedx",
+    "shiyer7474",
     "shuai-xu",
     "shuchaome",
     "shuyang",
@@ -1901,6 +1916,7 @@ const char * auto_contributors[] {
     "wzl",
     "xPoSx",
     "xbthink",
+    "xc0derx",
     "xiao",
     "xiaolei565",
     "xiebin",
@@ -1964,6 +1980,7 @@ const char * auto_contributors[] {
     "zkun",
     "zlx19950903",
     "zombee0",
+    "zoomxi",
     "zvonand",
     "zvrr",
     "zvvr",

From a9226f49e7e052d2c392214afe32f4d6de1d6d62 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Tue, 13 Aug 2024 20:24:40 +0000
Subject: [PATCH 258/265] remove name with cyrillic letter

---
 src/Storages/System/StorageSystemContributors.generated.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index eb6f0382d15..67dfe3bfe86 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -808,7 +808,6 @@ const char * auto_contributors[] {
     "Mingliang Pan",
     "Misko Lee",
     "Misz606",
-    "Miсhael Stetsyuk",
     "MochiXu",
     "Mohamad Fadhil",
     "Mohammad Arab Anvari",

From 0c9e1a061f825e5b9c5d623d90d4d898cd05e44c Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Tue, 13 Aug 2024 18:49:18 +0200
Subject: [PATCH 259/265] CI: Create new release branch workflow updates

---
 .github/workflows/create_release.yml |   2 +-
 tests/ci/ci_utils.py                 |   5 +
 tests/ci/create_release.py           | 240 ++++++++++++++++-----------
 tests/ci/docker_server.py            |   2 +-
 tests/ci/version_helper.py           |  17 +-
 5 files changed, 161 insertions(+), 105 deletions(-)

diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml
index d4993b373df..73613c65266 100644
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@@ -129,9 +129,9 @@ jobs:
         if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
         shell: bash
         run: |
-          python3 ./tests/ci/create_release.py --set-progress-completed
           git reset --hard HEAD
           git checkout "$GITHUB_REF_NAME"
+          python3 ./tests/ci/create_release.py --set-progress-completed
       - name: Create GH Release
         if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
         shell: bash
diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py
index d807f5be09f..b8778e0cc50 100644
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@@ -167,6 +167,11 @@ class GH:
         latest_branch = Shell.get_output(
             'gh pr list --label release --repo ClickHouse/ClickHouse --search "sort:created" -L1 --json headRefName'
         )
+        if latest_branch:
+            latest_branch = json.loads(latest_branch)[0]["headRefName"]
+        print(
+            f"Latest branch [{latest_branch}], release branch [{branch}], release latest [{latest_branch == branch}]"
+        )
         return latest_branch == branch
 
 
diff --git a/tests/ci/create_release.py b/tests/ci/create_release.py
index 27eba273ce0..b5ea61e1952 100755
--- a/tests/ci/create_release.py
+++ b/tests/ci/create_release.py
@@ -61,6 +61,7 @@ class ReleaseContextManager:
             # create initial release info
             self.release_info = ReleaseInfo(
                 release_branch="NA",
+                release_type="NA",
                 commit_sha=args.ref,
                 release_tag="NA",
                 version="NA",
@@ -93,6 +94,7 @@ class ReleaseContextManager:
 @dataclasses.dataclass
 class ReleaseInfo:
     version: str
+    release_type: str
     release_tag: str
     release_branch: str
     commit_sha: str
@@ -131,7 +133,7 @@ class ReleaseInfo:
         return self
 
     def prepare(
-        self, commit_ref: str, release_type: str, skip_tag_check: bool
+        self, commit_ref: str, release_type: str, _skip_tag_check: bool
     ) -> "ReleaseInfo":
         version = None
         release_branch = None
@@ -143,17 +145,18 @@ class ReleaseInfo:
         assert release_type in ("patch", "new")
         if release_type == "new":
             # check commit_ref is right and on a right branch
-            Shell.check(
-                f"git merge-base --is-ancestor {commit_ref} origin/master",
-                strict=True,
-                verbose=True,
-            )
+            if commit_ref != "master":
+                Shell.check(
+                    f"git merge-base --is-ancestor {commit_ref} origin/master",
+                    strict=True,
+                    verbose=True,
+                )
             with checkout(commit_ref):
                 commit_sha = Shell.get_output_or_raise(f"git rev-list -n1 {commit_ref}")
                 # Git() must be inside "with checkout" contextmanager
                 git = Git()
                 version = get_version_from_repo(git=git)
-                release_branch = "master"
+                release_branch = f"{version.major}.{version.minor}"
                 expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new"
                 version.bump().with_description(VersionType.NEW)
                 assert (
@@ -204,10 +207,11 @@ class ReleaseInfo:
                 expected_tag_prefix
             ) and git.latest_tag.endswith(expected_tag_suffix):
                 pass
-            elif not skip_tag_check:
-                assert (
-                    False
-                ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?"
+            # TODO: uncomment and check with dry-run
+            # elif not skip_tag_check:
+            #     assert (
+            #         False
+            #     ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?"
 
             previous_release_sha = Shell.get_output_or_raise(
                 f"git rev-list -n1 {previous_release_tag}"
@@ -238,6 +242,7 @@ class ReleaseInfo:
         self.release_progress = ReleaseProgress.STARTED
         self.progress_status = ReleaseProgressDescription.OK
         self.latest = latest_release
+        self.release_type = release_type
         return self
 
     def push_release_tag(self, dry_run: bool) -> None:
@@ -262,16 +267,15 @@ class ReleaseInfo:
     @staticmethod
     def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None:
         cmd = f"gh api repos/{CI.Envs.GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}"
-        Shell.check(cmd, dry_run=dry_run, strict=True)
+        res = Shell.check(cmd, dry_run=dry_run, verbose=True)
+        if not res:
+            # not a critical error - do not fail. branch might be created already (recovery case)
+            print("WARNING: failed to create backport labels for the new branch")
 
     def push_new_release_branch(self, dry_run: bool) -> None:
-        assert (
-            self.release_branch == "master"
-        ), "New release branch can be created only for release type [new]"
         git = Git()
         version = get_version_from_repo(git=git)
-        new_release_branch = f"{version.major}.{version.minor}"
-        stable_release_type = version.get_stable_release_type()
+        new_release_branch = self.release_branch
         version_after_release = copy(version)
         version_after_release.bump()
         assert (
@@ -285,11 +289,8 @@ class ReleaseInfo:
         print(
             f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]"
         )
-        with checkout(self.release_branch):
+        with checkout("master"):
             with checkout_new(new_release_branch):
-                pr_labels = f"--label {CI.Labels.RELEASE}"
-                if stable_release_type == VersionType.LTS:
-                    pr_labels += f" --label {CI.Labels.RELEASE_LTS}"
                 cmd_push_branch = (
                     f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}"
                 )
@@ -302,67 +303,108 @@ class ReleaseInfo:
         ReleaseInfo._create_gh_label(
             f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run
         )
-        Shell.check(
-            f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}'
-            --head {new_release_branch} {pr_labels}
-            --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'
-            """,
-            dry_run=dry_run,
-            strict=True,
-            verbose=True,
-        )
 
     def get_version_bump_branch(self):
         return f"bump_version_{self.version}"
 
     def update_version_and_contributors_list(self, dry_run: bool) -> None:
-        # Bump version, update contributors list, create PR
-        branch_upd_version_contributors = self.get_version_bump_branch()
+        # Bump version, update contributors list, create on release branch
         with checkout(self.commit_sha):
             git = Git()
             version = get_version_from_repo(git=git)
-            if self.release_branch == "master":
+            if self.release_type == "patch":
+                assert (
+                    version.string == self.version
+                ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]"
+                version.bump_patch()
+            else:
+                version.reset_tweak()
+            version.with_description(version.get_stable_release_type())
+
+        with checkout(self.release_branch):
+            update_cmake_version(version)
+            update_contributors(raise_error=True)
+            cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'"
+            cmd_push_branch = f"{GIT_PREFIX} push"
+            Shell.check(
+                cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True
+            )
+            Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True)
+            if dry_run:
+                Shell.check(
+                    f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                    verbose=True,
+                )
+                Shell.check(
+                    f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                    verbose=True,
+                )
+
+        # TODO: move to new GH step?
+        if self.release_type == "new":
+            print("Update version on master branch")
+            branch_upd_version_contributors = self.get_version_bump_branch()
+            with checkout(self.commit_sha):
+                git = Git()
+                version = get_version_from_repo(git=git)
                 version.bump()
                 version.with_description(VersionType.TESTING)
-            else:
-                version.with_description(version.get_stable_release_type())
-            assert (
-                version.string == self.version
-            ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]"
-        with checkout(self.release_branch):
-            with checkout_new(branch_upd_version_contributors):
-                update_cmake_version(version)
-                update_contributors(raise_error=True)
-                cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'"
-                cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}"
-                actor = os.getenv("GITHUB_ACTOR", "") or "me"
-                body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n"
-                cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body \"{body}\" --assignee {actor}"
+            with checkout("master"):
+                with checkout_new(branch_upd_version_contributors):
+                    update_cmake_version(version)
+                    update_contributors(raise_error=True)
+                    cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'"
+                    cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}"
+                    actor = os.getenv("GITHUB_ACTOR", "") or "me"
+                    body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n"
+                    cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base master --body \"{body}\" --assignee {actor}"
+                    Shell.check(
+                        cmd_commit_version_upd,
+                        strict=True,
+                        dry_run=dry_run,
+                        verbose=True,
+                    )
+                    Shell.check(
+                        cmd_push_branch, strict=True, dry_run=dry_run, verbose=True
+                    )
+                    Shell.check(
+                        cmd_create_pr, strict=True, dry_run=dry_run, verbose=True
+                    )
+                    if dry_run:
+                        Shell.check(
+                            f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                            verbose=True,
+                        )
+                        Shell.check(
+                            f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                            verbose=True,
+                        )
+                        self.version_bump_pr = "dry-run"
+                    else:
+                        self.version_bump_pr = GH.get_pr_url_by_branch(
+                            branch=branch_upd_version_contributors
+                        )
+
+            # TODO: move to new GH step?
+            print("Create Release PR")
+            with checkout(self.release_branch):
+                pr_labels = f"--label {CI.Labels.RELEASE}"
+                if version.get_stable_release_type() == VersionType.LTS:
+                    pr_labels += f" --label {CI.Labels.RELEASE_LTS}"
                 Shell.check(
-                    cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True
+                    f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {self.release_branch}' \
+                                --head {self.release_branch} {pr_labels} \
+                                --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'""",
+                    dry_run=dry_run,
+                    strict=True,
+                    verbose=True,
                 )
-                Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True)
-                Shell.check(cmd_create_pr, strict=True, dry_run=dry_run, verbose=True)
-                if dry_run:
-                    Shell.check(
-                        f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
-                        verbose=True,
-                    )
-                    Shell.check(
-                        f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
-                        verbose=True,
-                    )
-                    self.version_bump_pr = "dry-run"
-                else:
-                    self.version_bump_pr = GH.get_pr_url_by_branch(
-                        branch=branch_upd_version_contributors
-                    )
 
     def get_change_log_branch(self):
         return f"auto/{self.release_tag}"
 
     def update_release_info(self, dry_run: bool) -> "ReleaseInfo":
-        if self.release_branch != "master":
+        if self.release_type == "patch":
             if not self.changelog_pr:
                 branch = self.get_change_log_branch()
                 if not dry_run:
@@ -371,21 +413,22 @@ class ReleaseInfo:
                     url = "dry-run"
                 print(f"ChangeLog PR url [{url}]")
                 self.changelog_pr = url
-
-            if not self.version_bump_pr:
-                branch = self.get_version_bump_branch()
-                if not dry_run:
-                    url = GH.get_pr_url_by_branch(branch=branch)
-                else:
-                    url = "dry-run"
-                print(f"Version bump PR url [{url}]")
-                self.version_bump_pr = url
-
-            self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
-            print(f"Release url [{self.release_url}]")
-
             self.docker = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version"
+        else:
+            # new release branch - find version bump pr on a master branch
+            branch = self.get_version_bump_branch()
+            if not dry_run:
+                url = GH.get_pr_url_by_branch(branch=branch)
+            else:
+                url = "dry-run"
+            print(f"Version bump PR url [{url}]")
+            self.version_bump_pr = url
+
+        self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
+        print(f"Release url [{self.release_url}]")
+
         self.dump()
+
         return self
 
     def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None:
@@ -410,35 +453,40 @@ class ReleaseInfo:
 
     def merge_prs(self, dry_run: bool) -> None:
         repo = CI.Envs.GITHUB_REPOSITORY
-        assert self.version_bump_pr
-        if dry_run:
-            version_bump_pr_num = 12345
-        else:
-            version_bump_pr_num = int(self.version_bump_pr.split("/")[-1])
-        print("Merging Version bump PR")
-        res_1 = Shell.check(
-            f"gh pr merge {version_bump_pr_num} --repo {repo} --merge --auto",
-            verbose=True,
-            dry_run=dry_run,
-        )
-
-        res_2 = True
-        if not self.release_tag.endswith("-new"):
+        if self.release_type == "patch":
             assert self.changelog_pr
             print("Merging ChangeLog PR")
             if dry_run:
                 changelog_pr_num = 23456
             else:
                 changelog_pr_num = int(self.changelog_pr.split("/")[-1])
-            res_2 = Shell.check(
+            res = Shell.check(
                 f"gh pr merge {changelog_pr_num} --repo {repo} --merge --auto",
                 verbose=True,
                 dry_run=dry_run,
             )
         else:
-            assert not self.changelog_pr
+            if not dry_run:
+                assert not self.changelog_pr
+            res = True
 
-        self.prs_merged = res_1 and res_2
+        if self.release_type == "new":
+            assert self.version_bump_pr
+            print("Merging Version Bump PR")
+            if dry_run:
+                version_bump_pr = 23456
+            else:
+                version_bump_pr = int(self.version_bump_pr.split("/")[-1])
+            res = res and Shell.check(
+                f"gh pr merge {version_bump_pr} --repo {repo} --merge --auto",
+                verbose=True,
+                dry_run=dry_run,
+            )
+        else:
+            if not dry_run:
+                assert not self.changelog_pr
+
+        self.prs_merged = res
 
 
 class RepoTypes:
@@ -759,7 +807,7 @@ if __name__ == "__main__":
             release_info.prepare(
                 commit_ref=args.ref,
                 release_type=args.release_type,
-                skip_tag_check=args.skip_tag_check,
+                _skip_tag_check=args.skip_tag_check,
             )
 
     if args.download_packages:
diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py
index 3251ec5644e..34439c19f0a 100644
--- a/tests/ci/docker_server.py
+++ b/tests/ci/docker_server.py
@@ -70,7 +70,7 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--tag-type",
         type=str,
-        choices=("head", "release", "latest-release"),
+        choices=("head", "release", "release-latest"),
         default="head",
         help="defines required tags for resulting docker image. "
         "head - for master image (tag: head) "
diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py
index 07a7a9601c0..b20b2bb25cf 100755
--- a/tests/ci/version_helper.py
+++ b/tests/ci/version_helper.py
@@ -85,6 +85,16 @@ class ClickHouseVersion:
             self._tweak = 1
         return self
 
+    def bump_patch(self) -> "ClickHouseVersion":
+        self._revision += 1
+        self._patch += 1
+        self._tweak = 1
+        return self
+
+    def reset_tweak(self) -> "ClickHouseVersion":
+        self._tweak = 1
+        return self
+
     def major_update(self) -> "ClickHouseVersion":
         if self._git is not None:
             self._git.update()
@@ -104,13 +114,6 @@ class ClickHouseVersion:
             self.major, self.minor, self.patch + 1, self.revision, self._git
         )
 
-    def reset_tweak(self) -> "ClickHouseVersion":
-        if self._git is not None:
-            self._git.update()
-        return ClickHouseVersion(
-            self.major, self.minor, self.patch, self.revision, self._git, 1
-        )
-
     @property
     def major(self) -> int:
         return self._major

From b5134fd4903b91250bb6db16a8d52ff0b2469686 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Tue, 13 Aug 2024 16:13:25 +0100
Subject: [PATCH 260/265] fix build

---
 base/base/cgroupsv2.cpp | 6 +++++-
 base/base/cgroupsv2.h   | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp
index b4ca8271d64..e0e37c8729b 100644
--- a/base/base/cgroupsv2.cpp
+++ b/base/base/cgroupsv2.cpp
@@ -51,8 +51,9 @@ fs::path cgroupV2PathOfProcess()
 #endif
 }
 
-std::optional<std::string> getCgroupsV2PathContainingFile(std::string_view file_name)
+std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name)
 {
+#if defined(OS_LINUX)
     if (!cgroupsV2Enabled())
         return {};
 
@@ -70,4 +71,7 @@ std::optional<std::string> getCgroupsV2PathContainingFile(std::string_view file_
         current_cgroup = current_cgroup.parent_path();
     }
     return {};
+#else
+    return {};
+#endif
 }
diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h
index 925a399471e..a6276474254 100644
--- a/base/base/cgroupsv2.h
+++ b/base/base/cgroupsv2.h
@@ -19,4 +19,4 @@ std::filesystem::path cgroupV2PathOfProcess();
 
 /// Returns the most nested cgroup dir containing the specified file.
 /// If cgroups v2 is not enabled - returns an empty optional.
-std::optional<std::string> getCgroupsV2PathContainingFile(std::string_view file_name);
+std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name);

From f6f79e188d6c5a16bb327f9e62dce506ea8e8a19 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Wed, 14 Aug 2024 00:23:18 +0000
Subject: [PATCH 261/265] Apply libunwind changes needed for musl

---
 contrib/libunwind | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/libunwind b/contrib/libunwind
index a89d904befe..601db0b0e03 160000
--- a/contrib/libunwind
+++ b/contrib/libunwind
@@ -1 +1 @@
-Subproject commit a89d904befea07814628c6ce0b44083c4e149c62
+Subproject commit 601db0b0e03018c01710470a37703b618f9cf08b

From 56d6ef5c4a015f5851923f2c420538456564e790 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 14 Aug 2024 10:53:07 +0000
Subject: [PATCH 262/265] Fix 02995_index_10 timeout

---
 tests/queries/0_stateless/02995_index_10.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02995_index_10.sh b/tests/queries/0_stateless/02995_index_10.sh
index 813cc49cbd8..e7e7d3c3b42 100755
--- a/tests/queries/0_stateless/02995_index_10.sh
+++ b/tests/queries/0_stateless/02995_index_10.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-${CLICKHOUSE_CLIENT} "
+${CLICKHOUSE_CLIENT} -q "
 
 DROP TABLE IF EXISTS test;
 CREATE TABLE test (a String, b String, c String) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 11;
@@ -37,8 +37,9 @@ WHERE a >= (round(pow(sipHash64(1, try), 1 / (3 + sipHash64(2, try) % 8))) AS a1
   AND b <= (b1 + round(pow(sipHash64(7, try), 1 / (3 + sipHash64(8, try) % 8))))::String
   AND c >= (round(pow(sipHash64(9, try), 1 / (3 + sipHash64(10, try) % 8))) AS c1)::String
   AND c <= (c1 + round(pow(sipHash64(11, try), 1 / (3 + sipHash64(12, try) % 8))))::String
-HAVING count() > 0;
-"
+HAVING count() > 0
+SETTINGS trace_profile_events=0 -- test is too slow with profiling
+;"
 done | ${CLICKHOUSE_CLIENT}
 
-${CLICKHOUSE_CLIENT} "DROP TABLE test"
+${CLICKHOUSE_CLIENT} -q "DROP TABLE test"

From 962bf1d821a498aaeb6f16e5d4205272cfd00001 Mon Sep 17 00:00:00 2001
From: Max Kainov <maxkaynov@gmail.com>
Date: Wed, 14 Aug 2024 13:37:14 +0200
Subject: [PATCH 263/265] CI: Fix for critical bug fix regex

---
 tests/ci/changelog.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/ci/changelog.py b/tests/ci/changelog.py
index 39e426945d3..b7f73f22016 100755
--- a/tests/ci/changelog.py
+++ b/tests/ci/changelog.py
@@ -115,7 +115,6 @@ def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]:
         # pylint: enable=protected-access
         if repo_name not in repos:
             repos[repo_name] = pr.base.repo
-        in_changelog = False
         merge_commit = pr.merge_commit_sha
         if merge_commit is None:
             logging.warning("PR %s does not have merge-commit, skipping", pr.number)
@@ -291,7 +290,7 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri
     # Normalize bug fixes
     if (
         re.match(
-            r"(?i)bug\Wfix",
+            r".*(?i)bug\Wfix",
             category,
         )
         # Map "Critical Bug Fix" to "Bug fix" category for changelog

From f11478398ec563218644eb3d8c16ae6f223c1a13 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 14 Aug 2024 16:43:26 +0000
Subject: [PATCH 264/265] Update version_date.tsv and changelogs after
 v24.3.7.30-lts

---
 docs/changelogs/v24.3.7.30-lts.md    | 29 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 2 files changed, 30 insertions(+)
 create mode 100644 docs/changelogs/v24.3.7.30-lts.md

diff --git a/docs/changelogs/v24.3.7.30-lts.md b/docs/changelogs/v24.3.7.30-lts.md
new file mode 100644
index 00000000000..f945a54840f
--- /dev/null
+++ b/docs/changelogs/v24.3.7.30-lts.md
@@ -0,0 +1,29 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.3.7.30-lts (c8a28cf4331) FIXME as compared to v24.3.6.48-lts (b2d33c3c45d)
+
+#### Improvement
+* Backported in [#68103](https://github.com/ClickHouse/ClickHouse/issues/68103): Distinguish booleans and integers while parsing values for custom settings: ``` SET custom_a = true; SET custom_b = 1; ```. [#62206](https://github.com/ClickHouse/ClickHouse/pull/62206) ([Vitaly Baranov](https://github.com/vitlibar)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#67931](https://github.com/ClickHouse/ClickHouse/issues/67931): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68062](https://github.com/ClickHouse/ClickHouse/issues/68062): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67812](https://github.com/ClickHouse/ClickHouse/issues/67812): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67848](https://github.com/ClickHouse/ClickHouse/issues/67848): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68271](https://github.com/ClickHouse/ClickHouse/issues/68271): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67806](https://github.com/ClickHouse/ClickHouse/issues/67806): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67834](https://github.com/ClickHouse/ClickHouse/issues/67834): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#68206](https://github.com/ClickHouse/ClickHouse/issues/68206): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68089](https://github.com/ClickHouse/ClickHouse/issues/68089): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68120](https://github.com/ClickHouse/ClickHouse/issues/68120): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Update version after release. [#67676](https://github.com/ClickHouse/ClickHouse/pull/67676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68074](https://github.com/ClickHouse/ClickHouse/issues/68074): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index f46353277e2..71a4a722a36 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -13,6 +13,7 @@ v24.4.4.113-stable	2024-08-02
 v24.4.3.25-stable	2024-06-14
 v24.4.2.141-stable	2024-06-07
 v24.4.1.2088-stable	2024-05-01
+v24.3.7.30-lts	2024-08-14
 v24.3.6.48-lts	2024-08-02
 v24.3.5.46-lts	2024-07-03
 v24.3.4.147-lts	2024-06-13

From 2e5f45a7ad4924affb1ff8b0e5a40b59b6549621 Mon Sep 17 00:00:00 2001
From: Michael Stetsyuk <michael.stetsyuk@clickhouse.com>
Date: Wed, 14 Aug 2024 17:53:33 +0000
Subject: [PATCH 265/265] rename: S3DiskNoKeyErrors -> DiskS3NoSuchKeyErrors

---
 src/Common/CurrentMetrics.cpp                             | 2 +-
 src/IO/S3/Client.cpp                                      | 4 ++--
 tests/integration/test_checking_s3_blobs_paranoid/test.py | 2 +-
 tests/integration/test_storage_delta/test.py              | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index b6dd14d292c..67890568941 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -307,7 +307,7 @@
     M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \
     M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
     \
-    M(S3DiskNoKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
+    M(DiskS3NoSuchKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
 
 #ifdef APPLY_FOR_EXTERNAL_METRICS
     #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index a966e370ca1..8338a235387 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -46,7 +46,7 @@ namespace ProfileEvents
 
 namespace CurrentMetrics
 {
-    extern const Metric S3DiskNoKeyErrors;
+    extern const Metric DiskS3NoSuchKeyErrors;
 }
 
 namespace DB
@@ -701,7 +701,7 @@ RequestResult Client::processRequestResult(RequestResult && outcome) const
         return std::forward<RequestResult>(outcome);
 
     if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
-        CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors);
+        CurrentMetrics::add(CurrentMetrics::DiskS3NoSuchKeyErrors);
 
     String enriched_message = fmt::format(
         "{} {}",
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index 73f2888ce00..76a0f30f82e 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -708,7 +708,7 @@ def test_no_key_found_disk(cluster, broken_s3):
             """
             SELECT value
             FROM system.metrics
-            WHERE metric = 'S3DiskNoKeyErrors'
+            WHERE metric = 'DiskS3NoSuchKeyErrors'
             """
         ).strip()
     )
diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py
index 054b79ff6fe..a595d01e6b3 100644
--- a/tests/integration/test_storage_delta/test.py
+++ b/tests/integration/test_storage_delta/test.py
@@ -464,7 +464,7 @@ def test_restart_broken(started_cluster):
             """
             SELECT value
             FROM system.metrics
-            WHERE metric = 'S3DiskNoKeyErrors'
+            WHERE metric = 'DiskS3NoSuchKeyErrors'
             """
         ).strip()
     )