From e7959d74bceed8a6c753e91b7ab7b29ed0ecd107 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 1 Sep 2022 22:06:54 +0000 Subject: [PATCH 001/252] add composable factory and protocol --- src/Server/TCPProtocolStack.h | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 src/Server/TCPProtocolStack.h diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h new file mode 100644 index 00000000000..c7c3e73acab --- /dev/null +++ b/src/Server/TCPProtocolStack.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base/types.h" + + +namespace DB +{ + +class TCPProtocolStack : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; + using TCPServerConnection = Poco::Net::TCPServerConnection; +private: + TCPServer & tcp_server; + +public: + TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket) : TCPServerConnection(socket), tcp_server(tcp_server_) {} + + void append(std::unique_ptr factory) + { + stack.emplace_back(std::move(factory)); + } + + void run() override + { + for (auto & factory : stack) + { + std::unique_ptr connection(factory->createConnection(socket(), tcp_server)); + connection->run(); + } + } + +private: + std::list> stack; +}; + + +class TCPProtocolStackFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string server_display_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit TCPProtocolStackFactory(IServer & server_) : + server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + { + server_display_name = server.config().getString("display_name", getFQDNOrHostName()); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TCPProtocolStack(tcp_server, socket); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + + +} From b6e3680d60a3a94b377ae1c65cda01feffcd30e1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 2 Sep 2022 04:47:04 +0000 Subject: [PATCH 002/252] test with TLS --- programs/server/Server.cpp | 22 ++++++++- src/Server/TCPProtocolStack.h | 92 ++++++++++++++++++++++++++++++----- 2 files changed, 99 insertions(+), 15 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b621a897035..1633dec6865 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -88,6 +88,8 @@ #include #include +#include + #include "config_core.h" #include "Common/config_version.h" @@ -1952,19 +1954,35 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { #if USE_SSL - Poco::Net::SecureServerSocket socket; + //Poco::Net::SecureServerSocket socket; + Poco::Net::ServerSocket socket; auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); + + TCPProtocolStackFactory *stack = new TCPProtocolStackFactory(*this); + stack->append(new TLSHandlerFactory(*this)); + stack->append(new TCPHandlerFactory(*this, false, false)); return ProtocolServerAdapter( listen_host, port_name, "secure native protocol (tcp_secure): " + address.toString(), std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + stack, server_pool, socket, new Poco::Net::TCPServerParams)); +/* + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, true, false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); +*/ #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index c7c3e73acab..e1c39fbf8eb 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -11,6 +11,9 @@ #include #include #include +#include + +#include "Poco/Net/SSLManager.h" #include "base/types.h" @@ -18,20 +21,25 @@ namespace DB { +class TCPConnectionAccessor : public Poco::Net::TCPServerConnection +{ +public: + using Poco::Net::TCPServerConnection::socket; + explicit TCPConnectionAccessor(const Poco::Net::StreamSocket & socket) : Poco::Net::TCPServerConnection(socket) {} +}; + class TCPProtocolStack : public Poco::Net::TCPServerConnection { using StreamSocket = Poco::Net::StreamSocket; using TCPServerConnection = Poco::Net::TCPServerConnection; private: TCPServer & tcp_server; + std::list stack; public: - TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket) : TCPServerConnection(socket), tcp_server(tcp_server_) {} - - void append(std::unique_ptr factory) - { - stack.emplace_back(std::move(factory)); - } + TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_) + : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_) + {} void run() override { @@ -39,16 +47,74 @@ public: { std::unique_ptr connection(factory->createConnection(socket(), tcp_server)); connection->run(); + if (auto * accessor = dynamic_cast(connection.get()); accessor) + socket() = accessor->socket(); } } - -private: - std::list> stack; }; class TCPProtocolStackFactory : public TCPServerConnectionFactory { +private: + IServer & server; + Poco::Logger * log; + std::string server_display_name; + std::list stack; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit TCPProtocolStackFactory(IServer & server_) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + { + server_display_name = server.config().getString("display_name", getFQDNOrHostName()); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TCPProtocolStack(tcp_server, socket, stack); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } + + void append(TCPServerConnectionFactory::Ptr factory) + { + stack.push_back(factory); + } +}; + + + +class TLSHandler : public TCPConnectionAccessor +{ + using StreamSocket = Poco::Net::StreamSocket; + using SecureStreamSocket = Poco::Net::SecureStreamSocket; + using TCPServerConnection = Poco::Net::TCPServerConnection; +public: + explicit TLSHandler(const StreamSocket & socket) : TCPConnectionAccessor(socket) {} + + void run() override + { + socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + } +}; + + +class TLSHandlerFactory : public TCPServerConnectionFactory +{ private: IServer & server; Poco::Logger * log; @@ -62,18 +128,18 @@ private: }; public: - explicit TCPProtocolStackFactory(IServer & server_) : - server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + explicit TLSHandlerFactory(IServer & server_) + : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/) override { try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStack(tcp_server, socket); + return new TLSHandler(socket); } catch (const Poco::Net::NetException &) { From 5727517713455d28ae554e2cc44ecff3f5144eb3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 2 Sep 2022 16:22:57 +0000 Subject: [PATCH 003/252] add variadic constructor --- programs/server/Server.cpp | 14 ++++++++++++++ src/Server/TCPProtocolStack.h | 6 +++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 1633dec6865..4b74d724b85 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1960,9 +1960,11 @@ void Server::createServers( socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); +/* TCPProtocolStackFactory *stack = new TCPProtocolStackFactory(*this); stack->append(new TLSHandlerFactory(*this)); stack->append(new TCPHandlerFactory(*this, false, false)); + return ProtocolServerAdapter( listen_host, port_name, @@ -1972,6 +1974,18 @@ void Server::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); +*/ + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPProtocolStackFactory(*this, new TLSHandlerFactory(*this), new TCPHandlerFactory(*this, false, false)), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + + /* return ProtocolServerAdapter( listen_host, diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index e1c39fbf8eb..c72dfd98f53 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -70,8 +70,9 @@ private: }; public: - explicit TCPProtocolStackFactory(IServer & server_) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")) + template + explicit TCPProtocolStackFactory(IServer & server_, T... factory) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), stack({factory...}) { server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } @@ -102,7 +103,6 @@ class TLSHandler : public TCPConnectionAccessor { using StreamSocket = Poco::Net::StreamSocket; using SecureStreamSocket = Poco::Net::SecureStreamSocket; - using TCPServerConnection = Poco::Net::TCPServerConnection; public: explicit TLSHandler(const StreamSocket & socket) : TCPConnectionAccessor(socket) {} From 8a7fe2888a9c6526c24437e2de628e40864cf7ae Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 8 Sep 2022 06:12:33 +0000 Subject: [PATCH 004/252] protocols configuration processing --- programs/server/Server.cpp | 109 +++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 4b74d724b85..df6b40cd347 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1870,6 +1871,114 @@ void Server::createServers( http_params->setTimeout(settings.http_receive_timeout); http_params->setKeepAliveTimeout(keep_alive_timeout); + + + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + + auto createFactory = [&](const std::string & type) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr + { + if (type == "tcp") + return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); + if (type == "tls") + return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this)); + if (type == "mysql") + return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); + if (type == "postgres") + return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this)); + if (type == "http") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "HTTPHandler-factory")) + ); + if (type == "prometheus") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory")) + ); + if (type == "interserver") + return TCPServerConnectionFactory::Ptr( + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) + ); + + + throw Exception("LOGICAL ERROR: Unknown protocol name.", ErrorCodes::LOGICAL_ERROR); + }; + + for (const auto & protocol : protocols) + { + std::string prefix = protocol + "."; + + if (config.has(prefix + "host") && config.has(prefix + "port")) + { + + std::string port_name = prefix + "port"; + std::string listen_host = prefix + "host"; + bool is_secure = false; + auto stack = std::make_unique(*this); + while (true) + { + if (!config.has(prefix + "type")) + { + // misconfigured - lack of "type" + stack.reset(); + break; + } + + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + { + // misconfigured - only one tls layer is allowed + stack.reset(); + break; + } + is_secure = true; + } + + TCPServerConnectionFactory::Ptr factory = createFactory(type); + if (!factory) + { + // misconfigured - protocol "type" doesn't exist + stack.reset(); + break; + } + + stack->append(factory); + + if (!config.has(prefix + "impl")) + { + stack->append(createFactory("tcp")); + break; + } + prefix = "protocols." + config.getString(prefix + "impl"); + } + + if (!stack) + continue; + + createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name.c_str(), + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } + } + + + + for (const auto & listen_host : listen_hosts) { /// HTTP From 772bf050da081aeef04814ea5420bf055299fe1a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 10 Sep 2022 20:21:37 +0000 Subject: [PATCH 005/252] add PROXYv1 handler, add stack exchange data block, tuneup protocols config --- programs/server/Server.cpp | 56 ++++--- src/Server/TCPHandler.cpp | 10 ++ src/Server/TCPHandler.h | 2 + src/Server/TCPHandlerFactory.h | 16 ++ src/Server/TCPProtocolStack.h | 202 +++++++++++++++++++++++- src/Server/TCPProtocolStackData.h | 15 ++ src/Server/TCPServerConnectionFactory.h | 5 + 7 files changed, 274 insertions(+), 32 deletions(-) create mode 100644 src/Server/TCPProtocolStackData.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index df6b40cd347..c86a33ba60c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -71,6 +71,7 @@ #include #include #include +#include #include #include "MetricsTransmitter.h" #include @@ -88,6 +89,7 @@ #include #include #include +#include #include @@ -1882,6 +1884,8 @@ void Server::createServers( return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); if (type == "tls") return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this)); + if (type == "proxy1") + return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this)); if (type == "mysql") return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); if (type == "postgres") @@ -1906,51 +1910,53 @@ void Server::createServers( for (const auto & protocol : protocols) { std::string prefix = protocol + "."; + std::unordered_set pset {prefix}; if (config.has(prefix + "host") && config.has(prefix + "port")) { - std::string port_name = prefix + "port"; std::string listen_host = prefix + "host"; bool is_secure = false; auto stack = std::make_unique(*this); while (true) { - if (!config.has(prefix + "type")) + // if there is no "type" - it's a reference to another protocol and this is just another endpoint + if (config.has(prefix + "type")) { - // misconfigured - lack of "type" - stack.reset(); - break; - } - - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) + std::string type = config.getString(prefix + "type"); + if (type == "tls") { - // misconfigured - only one tls layer is allowed + if (is_secure) + { + // misconfigured - only one tls layer is allowed + stack.reset(); + break; + } + is_secure = true; + } + + TCPServerConnectionFactory::Ptr factory = createFactory(type); + if (!factory) + { + // misconfigured - protocol type doesn't exist stack.reset(); break; } - is_secure = true; + + stack->append(factory); + + if (!config.has(prefix + "impl")) + break; } - TCPServerConnectionFactory::Ptr factory = createFactory(type); - if (!factory) + prefix = "protocols." + config.getString(prefix + "impl") + "."; + + if (!pset.insert(prefix).second) { - // misconfigured - protocol "type" doesn't exist + // misconfigured - loop is detected stack.reset(); break; } - - stack->append(factory); - - if (!config.has(prefix + "impl")) - { - stack->append(createFactory("tcp")); - break; - } - prefix = "protocols." + config.getString(prefix + "impl"); } if (!stack) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1fc88168b35..44b6cfdd628 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -109,6 +109,16 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N { } +TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_) +: Poco::Net::TCPServerConnection(socket_) + , server(server_) + , tcp_server(tcp_server_) + , log(&Poco::Logger::get("TCPHandler")) + , forwarded_for(stack_data.forwarded_for) + , server_display_name(std::move(server_display_name_)) +{ +} + TCPHandler::~TCPHandler() { try diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index ea5fb2f9fe0..13c3c5f70c1 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -22,6 +22,7 @@ #include #include "IServer.h" +#include "Server/TCPProtocolStackData.h" #include "base/types.h" @@ -137,6 +138,7 @@ public: * Proxy-forwarded (original client) IP address is used for quota accounting if quota is keyed by forwarded IP. */ TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_); + TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, TCPProtocolStackData & stack_data, std::string server_display_name_); ~TCPHandler() override; void run() override; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 354c886f4c0..fde04c6e0ab 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -3,6 +3,7 @@ #include #include #include +#include "Server/TCPProtocolStackData.h" #include #include #include @@ -53,6 +54,21 @@ public: return new DummyTCPHandler(socket); } } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + + return new TCPHandler(server, tcp_server, socket, stack_data, server_display_name); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } }; } diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index c72dfd98f53..21687898d45 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -14,13 +15,24 @@ #include #include "Poco/Net/SSLManager.h" +#include +#include "Interpreters/Context.h" +#include "Server/TCPProtocolStackData.h" #include "base/types.h" namespace DB { +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; + extern const int CANNOT_READ_FROM_SOCKET; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; +} + class TCPConnectionAccessor : public Poco::Net::TCPServerConnection { public: @@ -43,12 +55,16 @@ public: void run() override { + TCPProtocolStackData stack_data; + stack_data.socket = socket(); for (auto & factory : stack) { - std::unique_ptr connection(factory->createConnection(socket(), tcp_server)); + std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); connection->run(); - if (auto * accessor = dynamic_cast(connection.get()); accessor) - socket() = accessor->socket(); + if (stack_data.socket != socket()) + socket() = stack_data.socket; +// if (auto * accessor = dynamic_cast(connection.get()); accessor) + // socket() = accessor->socket(); } } }; @@ -99,17 +115,23 @@ public: -class TLSHandler : public TCPConnectionAccessor +class TLSHandler : public Poco::Net::TCPServerConnection //TCPConnectionAccessor { using StreamSocket = Poco::Net::StreamSocket; using SecureStreamSocket = Poco::Net::SecureStreamSocket; public: - explicit TLSHandler(const StreamSocket & socket) : TCPConnectionAccessor(socket) {} + explicit TLSHandler(const StreamSocket & socket, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) //TCPConnectionAccessor(socket) + , stack_data(stack_data_) + {} void run() override { socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + stack_data.socket = socket(); } +private: + TCPProtocolStackData & stack_data; }; @@ -134,12 +156,18 @@ public: server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/) override + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override { try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket); + return new TLSHandler(socket, stack_data); } catch (const Poco::Net::NetException &) { @@ -150,4 +178,164 @@ public: }; +class ProxyV1Handler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; +public: + explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket), server(server_), stack_data(stack_data_) {} + + void run() override + { + const auto & settings = server.context()->getSettingsRef(); + socket().setReceiveTimeout(settings.receive_timeout); + + std::string word; + bool eol; + + // Read PROXYv1 protocol header + // http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt + + // read "PROXY" + if (!readWord(5, word, eol) || word != "PROXY" || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read "TCP4" or "TCP6" or "UNKNOWN" + if (!readWord(7, word, eol)) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word == "UNKNOWN" && eol) + return; + + if (eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + stack_data.forwarded_for = std::move(word); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port + if (!readWord(5, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port and "\r\n" + if (!readWord(5, word, eol) || !eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + } + +protected: + bool readWord(int max_len, std::string & word, bool & eol) + { + word.clear(); + eol = false; + + char ch = 0; + int n = 0; + bool is_cr = false; + try + { + for (++max_len; max_len > 0 || is_cr; --max_len) + { + n = socket().receiveBytes(&ch, 1); + if (n == 0) + { + socket().shutdown(); + return false; + } + if (n < 0) + break; + + if (is_cr) + return ch == 0x0A; + + if (ch == 0x0D) + { + is_cr = true; + eol = true; + continue; + } + + if (ch == ' ') + return true; + + word.push_back(ch); + } + } + catch (const Poco::Net::NetException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + catch (const Poco::TimeoutException &) + { + throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + socket().peerAddress().toString(), + socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + } + catch (const Poco::IOException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + + if (n < 0) + throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + + return false; + } + +private: + IServer & server; + TCPProtocolStackData & stack_data; +}; + +class ProxyV1HandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string server_display_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit ProxyV1HandlerFactory(IServer & server_) + : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")) + { + server_display_name = server.config().getString("display_name", getFQDNOrHostName()); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new ProxyV1Handler(socket, server, stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + } diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h new file mode 100644 index 00000000000..bc90de8c678 --- /dev/null +++ b/src/Server/TCPProtocolStackData.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +namespace DB +{ + +struct TCPProtocolStackData +{ + Poco::Net::StreamSocket socket; + std::string forwarded_for; +}; + +} diff --git a/src/Server/TCPServerConnectionFactory.h b/src/Server/TCPServerConnectionFactory.h index 613f98352bd..ab9b0848ed7 100644 --- a/src/Server/TCPServerConnectionFactory.h +++ b/src/Server/TCPServerConnectionFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include "Server/TCPProtocolStackData.h" namespace Poco { @@ -23,5 +24,9 @@ public: /// Same as Poco::Net::TCPServerConnectionFactory except we can pass the TCPServer virtual Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) = 0; + virtual Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server, TCPProtocolStackData &/* stack_data */) + { + return createConnection(socket, tcp_server); + } }; } From d001baec873b1bb13d633cee0b5f6204918efb70 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 00:40:40 +0000 Subject: [PATCH 006/252] pass section config key to a factory --- programs/server/Server.cpp | 19 ++++++++------- src/Server/TCPProtocolStack.h | 45 ++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 31 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c86a33ba60c..205e30a2c65 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1878,14 +1878,14 @@ void Server::createServers( Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); - auto createFactory = [&](const std::string & type) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr + auto createFactory = [&](const std::string & type, const std::string & conf_name) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); if (type == "tls") - return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this)); + return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); if (type == "proxy1") - return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this)); + return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); if (type == "mysql") return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this)); if (type == "postgres") @@ -1903,12 +1903,12 @@ void Server::createServers( new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) ); - throw Exception("LOGICAL ERROR: Unknown protocol name.", ErrorCodes::LOGICAL_ERROR); }; for (const auto & protocol : protocols) { + std::string conf_name = protocol; std::string prefix = protocol + "."; std::unordered_set pset {prefix}; @@ -1917,10 +1917,10 @@ void Server::createServers( std::string port_name = prefix + "port"; std::string listen_host = prefix + "host"; bool is_secure = false; - auto stack = std::make_unique(*this); + auto stack = std::make_unique(*this, conf_name); while (true) { - // if there is no "type" - it's a reference to another protocol and this is just another endpoint + // if there is no "type" - it's a reference to another protocol and this is just an endpoint if (config.has(prefix + "type")) { std::string type = config.getString(prefix + "type"); @@ -1935,7 +1935,7 @@ void Server::createServers( is_secure = true; } - TCPServerConnectionFactory::Ptr factory = createFactory(type); + TCPServerConnectionFactory::Ptr factory = createFactory(type, conf_name); if (!factory) { // misconfigured - protocol type doesn't exist @@ -1949,7 +1949,8 @@ void Server::createServers( break; } - prefix = "protocols." + config.getString(prefix + "impl") + "."; + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; if (!pset.insert(prefix).second) { @@ -2095,7 +2096,7 @@ void Server::createServers( port_name, "secure native protocol (tcp_secure): " + address.toString(), std::make_unique( - new TCPProtocolStackFactory(*this, new TLSHandlerFactory(*this), new TCPHandlerFactory(*this, false, false)), + new TCPProtocolStackFactory(*this, "", new TLSHandlerFactory(*this, ""), new TCPHandlerFactory(*this, false, false)), server_pool, socket, new Poco::Net::TCPServerParams)); diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index 21687898d45..0804d78336b 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -47,10 +47,11 @@ class TCPProtocolStack : public Poco::Net::TCPServerConnection private: TCPServer & tcp_server; std::list stack; + std::string conf_name; public: - TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_) - : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_) + TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) {} void run() override @@ -73,9 +74,9 @@ public: class TCPProtocolStackFactory : public TCPServerConnectionFactory { private: - IServer & server; + IServer & server [[maybe_unused]]; Poco::Logger * log; - std::string server_display_name; + std::string conf_name; std::list stack; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -87,10 +88,9 @@ private: public: template - explicit TCPProtocolStackFactory(IServer & server_, T... factory) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), stack({factory...}) + explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) { - server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override @@ -98,7 +98,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStack(tcp_server, socket, stack); + return new TCPProtocolStack(tcp_server, socket, stack, conf_name); } catch (const Poco::Net::NetException &) { @@ -120,8 +120,9 @@ class TLSHandler : public Poco::Net::TCPServerConnection //TCPConnectionAccessor using StreamSocket = Poco::Net::StreamSocket; using SecureStreamSocket = Poco::Net::SecureStreamSocket; public: - explicit TLSHandler(const StreamSocket & socket, TCPProtocolStackData & stack_data_) + explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) : Poco::Net::TCPServerConnection(socket) //TCPConnectionAccessor(socket) + , conf_name(conf_name_) , stack_data(stack_data_) {} @@ -131,6 +132,7 @@ public: stack_data.socket = socket(); } private: + std::string conf_name; TCPProtocolStackData & stack_data; }; @@ -138,9 +140,9 @@ private: class TLSHandlerFactory : public TCPServerConnectionFactory { private: - IServer & server; + IServer & server [[maybe_unused]]; Poco::Logger * log; - std::string server_display_name; + std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -150,10 +152,9 @@ private: }; public: - explicit TLSHandlerFactory(IServer & server_) - : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")) + explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) { - server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override @@ -167,7 +168,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket, stack_data); + return new TLSHandler(socket, conf_name, stack_data); } catch (const Poco::Net::NetException &) { @@ -182,8 +183,8 @@ class ProxyV1Handler : public Poco::Net::TCPServerConnection { using StreamSocket = Poco::Net::StreamSocket; public: - explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket), server(server_), stack_data(stack_data_) {} + explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} void run() override { @@ -293,6 +294,7 @@ protected: private: IServer & server; + std::string conf_name; TCPProtocolStackData & stack_data; }; @@ -301,7 +303,7 @@ class ProxyV1HandlerFactory : public TCPServerConnectionFactory private: IServer & server; Poco::Logger * log; - std::string server_display_name; + std::string conf_name; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -311,10 +313,9 @@ private: }; public: - explicit ProxyV1HandlerFactory(IServer & server_) - : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")) + explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) { - server_display_name = server.config().getString("display_name", getFQDNOrHostName()); } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override @@ -328,7 +329,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new ProxyV1Handler(socket, server, stack_data); + return new ProxyV1Handler(socket, server, conf_name, stack_data); } catch (const Poco::Net::NetException &) { From c3ac0c434bb3b967df5fb567ada6486ac9dcbda3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 04:40:58 +0000 Subject: [PATCH 007/252] some refactoring --- programs/server/Server.cpp | 31 +++++++++---------------------- src/Server/TCPProtocolStack.h | 2 ++ 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 205e30a2c65..28f0e34eb73 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1903,7 +1903,7 @@ void Server::createServers( new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) ); - throw Exception("LOGICAL ERROR: Unknown protocol name.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); }; for (const auto & protocol : protocols) @@ -1927,40 +1927,27 @@ void Server::createServers( if (type == "tls") { if (is_secure) - { - // misconfigured - only one tls layer is allowed - stack.reset(); - break; - } + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); is_secure = true; } - TCPServerConnectionFactory::Ptr factory = createFactory(type, conf_name); - if (!factory) - { - // misconfigured - protocol type doesn't exist - stack.reset(); - break; - } - - stack->append(factory); - - if (!config.has(prefix + "impl")) - break; + stack->append(createFactory(type, conf_name)); } + if (!config.has(prefix + "impl")) + break; + conf_name = "protocols." + config.getString(prefix + "impl"); prefix = conf_name + "."; - if (!pset.insert(prefix).second) + if (!pset.insert(conf_name).second) { // misconfigured - loop is detected - stack.reset(); - break; + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); } } - if (!stack) + if (!stack || stack->size() == 0) continue; createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h index 0804d78336b..85896f7f416 100644 --- a/src/Server/TCPProtocolStack.h +++ b/src/Server/TCPProtocolStack.h @@ -111,6 +111,8 @@ public: { stack.push_back(factory); } + + size_t size() { return stack.size(); } }; From 745e759146a03237412909a824653227e3fe3460 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 14:32:37 +0000 Subject: [PATCH 008/252] bugs fixed, cleanup, working state --- programs/server/Server.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 28f0e34eb73..ff5750946ca 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1873,12 +1873,10 @@ void Server::createServers( http_params->setTimeout(settings.http_receive_timeout); http_params->setKeepAliveTimeout(keep_alive_timeout); - - Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); - auto createFactory = [&](const std::string & type, const std::string & conf_name) -> Poco::SharedPtr //TCPServerConnectionFactory::Ptr + auto createFactory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); @@ -1908,16 +1906,20 @@ void Server::createServers( for (const auto & protocol : protocols) { - std::string conf_name = protocol; - std::string prefix = protocol + "."; + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; std::unordered_set pset {prefix}; if (config.has(prefix + "host") && config.has(prefix + "port")) { + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); std::string port_name = prefix + "port"; - std::string listen_host = prefix + "host"; + std::string listen_host = config.getString(prefix + "host"); bool is_secure = false; auto stack = std::make_unique(*this, conf_name); + while (true) { // if there is no "type" - it's a reference to another protocol and this is just an endpoint @@ -1941,14 +1943,11 @@ void Server::createServers( prefix = conf_name + "."; if (!pset.insert(conf_name).second) - { - // misconfigured - loop is detected throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } } if (!stack || stack->size() == 0) - continue; + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { @@ -1960,7 +1959,7 @@ void Server::createServers( return ProtocolServerAdapter( listen_host, port_name.c_str(), - "secure native protocol (tcp_secure): " + address.toString(), + description + ": " + address.toString(), std::make_unique( stack.release(), server_pool, @@ -1969,9 +1968,6 @@ void Server::createServers( }); } } - - - for (const auto & listen_host : listen_hosts) { From 0c62b5acfcb431793726a8e4f49d68497b6f4bd7 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 19:00:40 +0000 Subject: [PATCH 009/252] split into diferent files --- programs/server/Server.cpp | 4 +- src/Server/ProxyV1Handler.cpp | 126 ++++++++++ src/Server/ProxyV1Handler.h | 29 +++ src/Server/ProxyV1HandlerFactory.h | 56 +++++ src/Server/TCPProtocolStack.h | 344 --------------------------- src/Server/TCPProtocolStackFactory.h | 57 +++++ src/Server/TCPProtocolStackHandler.h | 42 ++++ src/Server/TLSHandler.h | 35 +++ src/Server/TLSHandlerFactory.h | 59 +++++ 9 files changed, 407 insertions(+), 345 deletions(-) create mode 100644 src/Server/ProxyV1Handler.cpp create mode 100644 src/Server/ProxyV1Handler.h create mode 100644 src/Server/ProxyV1HandlerFactory.h delete mode 100644 src/Server/TCPProtocolStack.h create mode 100644 src/Server/TCPProtocolStackFactory.h create mode 100644 src/Server/TCPProtocolStackHandler.h create mode 100644 src/Server/TLSHandler.h create mode 100644 src/Server/TLSHandlerFactory.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index ff5750946ca..e5f62f2f885 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -84,6 +84,8 @@ #include #include #include +#include +#include #include #include #include @@ -91,7 +93,7 @@ #include #include -#include +#include #include "config_core.h" #include "Common/config_version.h" diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp new file mode 100644 index 00000000000..b3ed8b7bd60 --- /dev/null +++ b/src/Server/ProxyV1Handler.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int SOCKET_TIMEOUT; + extern const int CANNOT_READ_FROM_SOCKET; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; +} + + +void ProxyV1Handler::run() +{ + const auto & settings = server.context()->getSettingsRef(); + socket().setReceiveTimeout(settings.receive_timeout); + + std::string word; + bool eol; + + // Read PROXYv1 protocol header + // http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt + + // read "PROXY" + if (!readWord(5, word, eol) || word != "PROXY" || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read "TCP4" or "TCP6" or "UNKNOWN" + if (!readWord(7, word, eol)) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + if (word == "UNKNOWN" && eol) + return; + + if (eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + stack_data.forwarded_for = std::move(word); + + // read address + if (!readWord(39, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port + if (!readWord(5, word, eol) || eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + + // read port and "\r\n" + if (!readWord(5, word, eol) || !eol) + throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); +} + +bool ProxyV1Handler::readWord(int max_len, std::string & word, bool & eol) +{ + word.clear(); + eol = false; + + char ch = 0; + int n = 0; + bool is_cr = false; + try + { + for (++max_len; max_len > 0 || is_cr; --max_len) + { + n = socket().receiveBytes(&ch, 1); + if (n == 0) + { + socket().shutdown(); + return false; + } + if (n < 0) + break; + + if (is_cr) + return ch == 0x0A; + + if (ch == 0x0D) + { + is_cr = true; + eol = true; + continue; + } + + if (ch == ' ') + return true; + + word.push_back(ch); + } + } + catch (const Poco::Net::NetException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + catch (const Poco::TimeoutException &) + { + throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + socket().peerAddress().toString(), + socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + } + catch (const Poco::IOException & e) + { + throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + } + + if (n < 0) + throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + + return false; +} + + + +} diff --git a/src/Server/ProxyV1Handler.h b/src/Server/ProxyV1Handler.h new file mode 100644 index 00000000000..062cc0e291a --- /dev/null +++ b/src/Server/ProxyV1Handler.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class ProxyV1Handler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; +public: + explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} + + void run() override; + +protected: + bool readWord(int max_len, std::string & word, bool & eol); + +private: + IServer & server; + std::string conf_name; + TCPProtocolStackData & stack_data; +}; + +} diff --git a/src/Server/ProxyV1HandlerFactory.h b/src/Server/ProxyV1HandlerFactory.h new file mode 100644 index 00000000000..028596d745d --- /dev/null +++ b/src/Server/ProxyV1HandlerFactory.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class ProxyV1HandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server; + Poco::Logger * log; + std::string conf_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new ProxyV1Handler(socket, server, conf_name, stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + +} diff --git a/src/Server/TCPProtocolStack.h b/src/Server/TCPProtocolStack.h deleted file mode 100644 index 85896f7f416..00000000000 --- a/src/Server/TCPProtocolStack.h +++ /dev/null @@ -1,344 +0,0 @@ -#pragma once - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Poco/Net/SSLManager.h" -#include - -#include "Interpreters/Context.h" -#include "Server/TCPProtocolStackData.h" -#include "base/types.h" - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NETWORK_ERROR; - extern const int SOCKET_TIMEOUT; - extern const int CANNOT_READ_FROM_SOCKET; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; -} - -class TCPConnectionAccessor : public Poco::Net::TCPServerConnection -{ -public: - using Poco::Net::TCPServerConnection::socket; - explicit TCPConnectionAccessor(const Poco::Net::StreamSocket & socket) : Poco::Net::TCPServerConnection(socket) {} -}; - -class TCPProtocolStack : public Poco::Net::TCPServerConnection -{ - using StreamSocket = Poco::Net::StreamSocket; - using TCPServerConnection = Poco::Net::TCPServerConnection; -private: - TCPServer & tcp_server; - std::list stack; - std::string conf_name; - -public: - TCPProtocolStack(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) - : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) - {} - - void run() override - { - TCPProtocolStackData stack_data; - stack_data.socket = socket(); - for (auto & factory : stack) - { - std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); - connection->run(); - if (stack_data.socket != socket()) - socket() = stack_data.socket; -// if (auto * accessor = dynamic_cast(connection.get()); accessor) - // socket() = accessor->socket(); - } - } -}; - - -class TCPProtocolStackFactory : public TCPServerConnectionFactory -{ -private: - IServer & server [[maybe_unused]]; - Poco::Logger * log; - std::string conf_name; - std::list stack; - - class DummyTCPHandler : public Poco::Net::TCPServerConnection - { - public: - using Poco::Net::TCPServerConnection::TCPServerConnection; - void run() override {} - }; - -public: - template - explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) - : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) - { - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override - { - try - { - LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStack(tcp_server, socket, stack, conf_name); - } - catch (const Poco::Net::NetException &) - { - LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); - return new DummyTCPHandler(socket); - } - } - - void append(TCPServerConnectionFactory::Ptr factory) - { - stack.push_back(factory); - } - - size_t size() { return stack.size(); } -}; - - - -class TLSHandler : public Poco::Net::TCPServerConnection //TCPConnectionAccessor -{ - using StreamSocket = Poco::Net::StreamSocket; - using SecureStreamSocket = Poco::Net::SecureStreamSocket; -public: - explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket) //TCPConnectionAccessor(socket) - , conf_name(conf_name_) - , stack_data(stack_data_) - {} - - void run() override - { - socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); - stack_data.socket = socket(); - } -private: - std::string conf_name; - TCPProtocolStackData & stack_data; -}; - - -class TLSHandlerFactory : public TCPServerConnectionFactory -{ -private: - IServer & server [[maybe_unused]]; - Poco::Logger * log; - std::string conf_name; - - class DummyTCPHandler : public Poco::Net::TCPServerConnection - { - public: - using Poco::Net::TCPServerConnection::TCPServerConnection; - void run() override {} - }; - -public: - explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) - { - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override - { - TCPProtocolStackData stack_data; - return createConnection(socket, tcp_server, stack_data); - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override - { - try - { - LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket, conf_name, stack_data); - } - catch (const Poco::Net::NetException &) - { - LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); - return new DummyTCPHandler(socket); - } - } -}; - - -class ProxyV1Handler : public Poco::Net::TCPServerConnection -{ - using StreamSocket = Poco::Net::StreamSocket; -public: - explicit ProxyV1Handler(const StreamSocket & socket, IServer & server_, const std::string & conf_name_, TCPProtocolStackData & stack_data_) - : Poco::Net::TCPServerConnection(socket), server(server_), conf_name(conf_name_), stack_data(stack_data_) {} - - void run() override - { - const auto & settings = server.context()->getSettingsRef(); - socket().setReceiveTimeout(settings.receive_timeout); - - std::string word; - bool eol; - - // Read PROXYv1 protocol header - // http://www.haproxy.org/download/1.8/doc/proxy-protocol.txt - - // read "PROXY" - if (!readWord(5, word, eol) || word != "PROXY" || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read "TCP4" or "TCP6" or "UNKNOWN" - if (!readWord(7, word, eol)) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - if (word == "UNKNOWN" && eol) - return; - - if (eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read address - if (!readWord(39, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - stack_data.forwarded_for = std::move(word); - - // read address - if (!readWord(39, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read port - if (!readWord(5, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - - // read port and "\r\n" - if (!readWord(5, word, eol) || !eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - } - -protected: - bool readWord(int max_len, std::string & word, bool & eol) - { - word.clear(); - eol = false; - - char ch = 0; - int n = 0; - bool is_cr = false; - try - { - for (++max_len; max_len > 0 || is_cr; --max_len) - { - n = socket().receiveBytes(&ch, 1); - if (n == 0) - { - socket().shutdown(); - return false; - } - if (n < 0) - break; - - if (is_cr) - return ch == 0x0A; - - if (ch == 0x0D) - { - is_cr = true; - eol = true; - continue; - } - - if (ch == ' ') - return true; - - word.push_back(ch); - } - } - catch (const Poco::Net::NetException & e) - { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); - } - catch (const Poco::TimeoutException &) - { - throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", - socket().peerAddress().toString(), - socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); - } - catch (const Poco::IOException & e) - { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); - } - - if (n < 0) - throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); - - return false; - } - -private: - IServer & server; - std::string conf_name; - TCPProtocolStackData & stack_data; -}; - -class ProxyV1HandlerFactory : public TCPServerConnectionFactory -{ -private: - IServer & server; - Poco::Logger * log; - std::string conf_name; - - class DummyTCPHandler : public Poco::Net::TCPServerConnection - { - public: - using Poco::Net::TCPServerConnection::TCPServerConnection; - void run() override {} - }; - -public: - explicit ProxyV1HandlerFactory(IServer & server_, const std::string & conf_name_) - : server(server_), log(&Poco::Logger::get("ProxyV1HandlerFactory")), conf_name(conf_name_) - { - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override - { - TCPProtocolStackData stack_data; - return createConnection(socket, tcp_server, stack_data); - } - - Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override - { - try - { - LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new ProxyV1Handler(socket, server, conf_name, stack_data); - } - catch (const Poco::Net::NetException &) - { - LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); - return new DummyTCPHandler(socket); - } - } -}; - -} diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h new file mode 100644 index 00000000000..87d5dba350f --- /dev/null +++ b/src/Server/TCPProtocolStackFactory.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + + +class TCPProtocolStackFactory : public TCPServerConnectionFactory +{ +private: + IServer & server [[maybe_unused]]; + Poco::Logger * log; + std::string conf_name; + std::list stack; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + template + explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) + : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TCPProtocolStackHandler(tcp_server, socket, stack, conf_name); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } + + void append(TCPServerConnectionFactory::Ptr factory) + { + stack.push_back(factory); + } + + size_t size() { return stack.size(); } +}; + + +} diff --git a/src/Server/TCPProtocolStackHandler.h b/src/Server/TCPProtocolStackHandler.h new file mode 100644 index 00000000000..7b513298022 --- /dev/null +++ b/src/Server/TCPProtocolStackHandler.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include +#include +#include "Server/TCPProtocolStackData.h" + + +namespace DB +{ + + +class TCPProtocolStackHandler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; + using TCPServerConnection = Poco::Net::TCPServerConnection; +private: + TCPServer & tcp_server; + std::list stack; + std::string conf_name; + +public: + TCPProtocolStackHandler(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) + {} + + void run() override + { + TCPProtocolStackData stack_data; + stack_data.socket = socket(); + for (auto & factory : stack) + { + std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); + connection->run(); + if (stack_data.socket != socket()) + socket() = stack_data.socket; + } + } +}; + + +} diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h new file mode 100644 index 00000000000..623a9999475 --- /dev/null +++ b/src/Server/TLSHandler.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include +#include "Server/TCPProtocolStackData.h" + + +namespace DB +{ + + +class TLSHandler : public Poco::Net::TCPServerConnection +{ + using StreamSocket = Poco::Net::StreamSocket; + using SecureStreamSocket = Poco::Net::SecureStreamSocket; +public: + explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + : Poco::Net::TCPServerConnection(socket) + , conf_name(conf_name_) + , stack_data(stack_data_) + {} + + void run() override + { + socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + stack_data.socket = socket(); + } +private: + std::string conf_name; + TCPProtocolStackData & stack_data; +}; + + +} diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h new file mode 100644 index 00000000000..283e96252c3 --- /dev/null +++ b/src/Server/TLSHandlerFactory.h @@ -0,0 +1,59 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +class TLSHandlerFactory : public TCPServerConnectionFactory +{ +private: + IServer & server [[maybe_unused]]; + Poco::Logger * log; + std::string conf_name; + + class DummyTCPHandler : public Poco::Net::TCPServerConnection + { + public: + using Poco::Net::TCPServerConnection::TCPServerConnection; + void run() override {} + }; + +public: + explicit TLSHandlerFactory(IServer & server_, const std::string & conf_name_) + : server(server_), log(&Poco::Logger::get("TLSHandlerFactory")), conf_name(conf_name_) + { + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override + { + TCPProtocolStackData stack_data; + return createConnection(socket, tcp_server, stack_data); + } + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer &/* tcp_server*/, TCPProtocolStackData & stack_data) override + { + try + { + LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); + return new TLSHandler(socket, conf_name, stack_data); + } + catch (const Poco::Net::NetException &) + { + LOG_TRACE(log, "TCP Request. Client is not connected (most likely RST packet was sent)."); + return new DummyTCPHandler(socket); + } + } +}; + + +} From 7985f4ba16cdc5e396ab5336ce5702c264158dff Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 21:35:38 +0000 Subject: [PATCH 010/252] cleanup, respect USE_SSL --- programs/server/Server.cpp | 38 ++++++++------------------------------ src/Server/TLSHandler.h | 18 ++++++++++++++---- 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index e5f62f2f885..9e434c19fc6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1882,8 +1882,15 @@ void Server::createServers( { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); + if (type == "tls") +#if USE_SSL return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); +#else + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + if (type == "proxy1") return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name)); if (type == "mysql") @@ -2055,39 +2062,11 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { #if USE_SSL - //Poco::Net::SecureServerSocket socket; - Poco::Net::ServerSocket socket; + Poco::Net::SecureServerSocket socket; auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); -/* - TCPProtocolStackFactory *stack = new TCPProtocolStackFactory(*this); - stack->append(new TLSHandlerFactory(*this)); - stack->append(new TCPHandlerFactory(*this, false, false)); - - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - stack, - server_pool, - socket, - new Poco::Net::TCPServerParams)); -*/ - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPProtocolStackFactory(*this, "", new TLSHandlerFactory(*this, ""), new TCPHandlerFactory(*this, false, false)), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - - -/* return ProtocolServerAdapter( listen_host, port_name, @@ -2097,7 +2076,6 @@ void Server::createServers( server_pool, socket, new Poco::Net::TCPServerParams)); -*/ #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 623a9999475..4fea43523cd 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,19 +1,24 @@ #pragma once #include -#include -#include #include "Server/TCPProtocolStackData.h" +#if USE_SSL +# include +# include +#endif namespace DB { +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; +} class TLSHandler : public Poco::Net::TCPServerConnection { using StreamSocket = Poco::Net::StreamSocket; - using SecureStreamSocket = Poco::Net::SecureStreamSocket; public: explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) : Poco::Net::TCPServerConnection(socket) @@ -23,8 +28,13 @@ public: void run() override { - socket() = SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); +#if USE_SSL + socket() = Poco::Net::SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); stack_data.socket = socket(); +#else + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif } private: std::string conf_name; From ffa7d3b121fa1910afdf64c278c70f8cd69caca0 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 21:51:25 +0000 Subject: [PATCH 011/252] cleanup --- programs/server/Server.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9e434c19fc6..b122fcbfed3 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -71,7 +70,6 @@ #include #include #include -#include #include #include "MetricsTransmitter.h" #include @@ -82,10 +80,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -93,8 +93,6 @@ #include #include -#include - #include "config_core.h" #include "Common/config_version.h" @@ -2066,13 +2064,12 @@ void Server::createServers( auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( listen_host, port_name, "secure native protocol (tcp_secure): " + address.toString(), std::make_unique( - new TCPHandlerFactory(*this, true, false), + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), server_pool, socket, new Poco::Net::TCPServerParams)); From 7c855c9da246f64828dd3c658c85443911f279c3 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 11 Sep 2022 22:44:27 +0000 Subject: [PATCH 012/252] style fix --- programs/server/Server.cpp | 4 ++-- src/Server/ProxyV1Handler.cpp | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b122fcbfed3..368971b3a34 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1882,7 +1882,7 @@ void Server::createServers( return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); if (type == "tls") -#if USE_SSL +#if USE_SSL return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -1975,7 +1975,7 @@ void Server::createServers( }); } } - + for (const auto & listen_host : listen_hosts) { /// HTTP diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp index b3ed8b7bd60..838a1de1c04 100644 --- a/src/Server/ProxyV1Handler.cpp +++ b/src/Server/ProxyV1Handler.cpp @@ -15,12 +15,11 @@ namespace ErrorCodes extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; } - void ProxyV1Handler::run() { const auto & settings = server.context()->getSettingsRef(); socket().setReceiveTimeout(settings.receive_timeout); - + std::string word; bool eol; @@ -57,7 +56,7 @@ void ProxyV1Handler::run() // read port if (!readWord(5, word, eol) || eol) throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); - + // read port and "\r\n" if (!readWord(5, word, eol) || !eol) throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); @@ -121,6 +120,4 @@ bool ProxyV1Handler::readWord(int max_len, std::string & word, bool & eol) return false; } - - } From 0d89bdbbb922fc21060e2c639a91ff1455197b54 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Sun, 11 Sep 2022 19:25:33 -0400 Subject: [PATCH 013/252] maybe_unused --- src/Server/TLSHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 4fea43523cd..f8cb94eb004 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -38,7 +38,7 @@ public: } private: std::string conf_name; - TCPProtocolStackData & stack_data; + TCPProtocolStackData & stack_data [[maybe_unused]]; }; From d550604e281206a14cd39eec9c975d653e37f651 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 13 Sep 2022 23:12:53 +0000 Subject: [PATCH 014/252] respect listen_host config param, fix updateServers and getListenTry --- programs/server/Server.cpp | 131 ++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 53 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 0c2af7a0158..6336c1f795f 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -390,7 +390,16 @@ bool getListenTry(const Poco::Util::AbstractConfiguration & config) { bool listen_try = config.getBool("listen_try", false); if (!listen_try) - listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty(); + { + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + listen_try = + DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() && + std::none_of(protocols.begin(), protocols.end(), [&](const auto & protocol) + { + return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); + }); + } return listen_try; } @@ -1878,7 +1887,7 @@ void Server::createServers( Poco::Util::AbstractConfiguration::Keys protocols; config.keys("protocols", protocols); - auto createFactory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr + auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr { if (type == "tcp") return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false)); @@ -1915,66 +1924,74 @@ void Server::createServers( for (const auto & protocol : protocols) { - std::string conf_name = "protocols." + protocol; - std::string prefix = conf_name + "."; - std::unordered_set pset {prefix}; - - if (config.has(prefix + "host") && config.has(prefix + "port")) + std::vector hosts; + if (config.has("protocols." + protocol + ".host")) + hosts.push_back(config.getString("protocols." + protocol + ".host")); + else + hosts = listen_hosts; + + for (const auto & host : hosts) { - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; - std::string listen_host = config.getString(prefix + "host"); - bool is_secure = false; - auto stack = std::make_unique(*this, conf_name); + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + std::unordered_set pset {prefix}; - while (true) + if (config.has(prefix + "port")) { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + std::string port_name = prefix + "port"; + bool is_secure = false; + auto stack = std::make_unique(*this, conf_name); + + while (true) { - std::string type = config.getString(prefix + "type"); - if (type == "tls") + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; + } + + stack->append(create_factory(type, conf_name)); } - stack->append(createFactory(type, conf_name)); + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); } - if (!config.has(prefix + "impl")) - break; + if (!stack || stack->size() == 0) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; + createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + return ProtocolServerAdapter( + host, + port_name.c_str(), + description + ": " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); } - - if (!stack || stack->size() == 0) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); - - createServer(config, listen_host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, is_secure); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - - return ProtocolServerAdapter( - listen_host, - port_name.c_str(), - description + ": " + address.toString(), - std::make_unique( - stack.release(), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); } } @@ -2223,9 +2240,17 @@ void Server::updateServers( { if (!server.isStopping()) { - bool has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); - bool has_port = !config.getString(server.getPortName(), "").empty(); - if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber()) + std::string port_name = server.getPortName(); + bool has_host = false; + if (port_name.starts_with("protocols.")) + { + std::string protocol = port_name.substr(0, port_name.find_last_of('.')); + has_host = config.has(protocol + ".host"); + } + if (!has_host) + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + bool has_port = !config.getString(port_name, "").empty(); + if (!has_host || !has_port || config.getInt(port_name) != server.portNumber()) { server.stop(); LOG_INFO(log, "Stopped listening for {}", server.getDescription()); From 8f079922926df010b837c57504ece066b2b161ad Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 14 Sep 2022 06:24:44 +0000 Subject: [PATCH 015/252] allow to specify key and certificate for TLS layer --- src/Server/TCPProtocolStackFactory.h | 3 +++ src/Server/TLSHandler.h | 23 ++++++++++++++++++----- src/Server/TLSHandlerFactory.h | 7 ++++++- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 87d5dba350f..4acbd3e5059 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -3,6 +3,9 @@ #include #include #include +#include +#include +#include namespace DB diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index f8cb94eb004..e753910e1c0 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,7 +1,10 @@ #pragma once +#include #include -#include "Server/TCPProtocolStackData.h" +#include +#include +#include #if USE_SSL # include @@ -18,18 +21,27 @@ namespace ErrorCodes class TLSHandler : public Poco::Net::TCPServerConnection { +#if USE_SSL + using SecureStreamSocket = Poco::Net::SecureStreamSocket; + using SSLManager = Poco::Net::SSLManager; + using Context = Poco::Net::Context; +#endif using StreamSocket = Poco::Net::StreamSocket; public: - explicit TLSHandler(const StreamSocket & socket, const std::string & conf_name_, TCPProtocolStackData & stack_data_) + explicit TLSHandler(const StreamSocket & socket, const std::string & key_, const std::string & certificate_, TCPProtocolStackData & stack_data_) : Poco::Net::TCPServerConnection(socket) - , conf_name(conf_name_) + , key(key_) + , certificate(certificate_) , stack_data(stack_data_) {} void run() override { #if USE_SSL - socket() = Poco::Net::SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext()); + auto ctx = SSLManager::instance().defaultServerContext(); + if (!key.empty() && !certificate.empty()) + ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); + socket() = SecureStreamSocket::attach(socket(), ctx); stack_data.socket = socket(); #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -37,7 +49,8 @@ public: #endif } private: - std::string conf_name; + std::string key [[maybe_unused]]; + std::string certificate [[maybe_unused]]; TCPProtocolStackData & stack_data [[maybe_unused]]; }; diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 283e96252c3..8063ffa783d 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -45,7 +46,11 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TLSHandler(socket, conf_name, stack_data); + return new TLSHandler( + socket, + server.config().getString(conf_name + ".privateKeyFile", ""), + server.config().getString(conf_name + ".certificateFile", ""), + stack_data); } catch (const Poco::Net::NetException &) { From b939379da676611262b400ebba778b75c84fef2b Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 14 Sep 2022 16:29:26 +0000 Subject: [PATCH 016/252] bug fix, merge fix, style fix --- programs/server/Server.cpp | 10 +++++----- src/Server/TLSHandler.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index fda0ccf491a..624c312468e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1913,15 +1913,15 @@ void Server::createServers( return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this)); if (type == "http") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "HTTPHandler-factory")) + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory")) ); if (type == "prometheus") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory")) + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory")) ); if (type == "interserver") return TCPServerConnectionFactory::Ptr( - new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory")) + new HTTPServerConnectionFactory(context(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory")) ); throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); @@ -1934,12 +1934,12 @@ void Server::createServers( hosts.push_back(config.getString("protocols." + protocol + ".host")); else hosts = listen_hosts; - + for (const auto & host : hosts) { std::string conf_name = "protocols." + protocol; std::string prefix = conf_name + "."; - std::unordered_set pset {prefix}; + std::unordered_set pset {conf_name}; if (config.has(prefix + "port")) { diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index e753910e1c0..fa2772cfd41 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -21,7 +21,7 @@ namespace ErrorCodes class TLSHandler : public Poco::Net::TCPServerConnection { -#if USE_SSL +#if USE_SSL using SecureStreamSocket = Poco::Net::SecureStreamSocket; using SSLManager = Poco::Net::SSLManager; using Context = Poco::Net::Context; From 910d49302cc8669899534bbf5d77f84a5f8a42ba Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 14 Sep 2022 19:05:37 +0000 Subject: [PATCH 017/252] USE_SSL fix --- src/Server/TLSHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index fa2772cfd41..32f0ca59776 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -1,12 +1,12 @@ #pragma once -#include #include #include #include #include #if USE_SSL +# include # include # include #endif From 3a9996f04c84c2c535b5c235b3f8cde45751f2fe Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 14 Sep 2022 09:09:47 +0000 Subject: [PATCH 018/252] Add bg thread for snapshot upload to S3 --- src/Coordination/KeeperDispatcher.cpp | 42 +++++++++++++++++++++++- src/Coordination/KeeperDispatcher.h | 7 ++++ src/Coordination/KeeperSnapshotManager.h | 2 +- src/Coordination/KeeperStateMachine.cpp | 2 ++ 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 261e43d80e4..2956d45e86c 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes KeeperDispatcher::KeeperDispatcher() : responses_queue(std::numeric_limits::max()) + , snapshots_backup_queue(std::numeric_limits::max()) , configuration_and_settings(std::make_shared()) , log(&Poco::Logger::get("KeeperDispatcher")) { @@ -191,7 +192,16 @@ void KeeperDispatcher::snapshotThread() try { - task.create_snapshot(std::move(task.snapshot)); + auto snapshot_path = task.create_snapshot(std::move(task.snapshot)); + + if (snapshot_path.empty()) + continue; + + if (isLeader()) + { + if (!snapshots_backup_queue.push(snapshot_path)) + LOG_WARNING(log, "Failed to add snapshot {} to backup queue", snapshot_path); + } } catch (...) { @@ -200,6 +210,31 @@ void KeeperDispatcher::snapshotThread() } } +void KeeperDispatcher::snapshotBackupThread() +{ + setThreadName("KeeperBSnpT"); + while (!shutdown_called) + { + std::string snapshot_path; + if (!snapshots_backup_queue.pop(snapshot_path)) + break; + + if (shutdown_called) + break; + + try + { + LOG_INFO(log, "Will try to backup snapshot on {}", snapshot_path); + ReadBufferFromFile snapshot_file(snapshot_path); + } + catch (...) + { + LOG_INFO(log, "Failed to backup {}", snapshot_path); + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) { std::lock_guard lock(session_to_response_callback_mutex); @@ -284,6 +319,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf request_thread = ThreadFromGlobalPool([this] { requestThread(); }); responses_thread = ThreadFromGlobalPool([this] { responseThread(); }); snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); }); + snapshot_backup_thread = ThreadFromGlobalPool([this] { snapshotBackupThread(); }); server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue); @@ -349,6 +385,10 @@ void KeeperDispatcher::shutdown() if (snapshot_thread.joinable()) snapshot_thread.join(); + snapshots_backup_queue.finish(); + if (snapshot_backup_thread.joinable()) + snapshot_backup_thread.join(); + update_configuration_queue.finish(); if (update_configuration_thread.joinable()) update_configuration_thread.join(); diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 5e2701299f4..6023d52ec04 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -36,6 +36,9 @@ private: ResponsesQueue responses_queue; SnapshotsQueue snapshots_queue{1}; + using SnapshotBackupQueue = ConcurrentBoundedQueue; + SnapshotBackupQueue snapshots_backup_queue; + /// More than 1k updates is definitely misconfiguration. UpdateConfigurationQueue update_configuration_queue{1000}; @@ -62,6 +65,8 @@ private: ThreadFromGlobalPool session_cleaner_thread; /// Dumping new snapshots to disk ThreadFromGlobalPool snapshot_thread; + /// Backup new snapshots to S3 + ThreadFromGlobalPool snapshot_backup_thread; /// Apply or wait for configuration changes ThreadFromGlobalPool update_configuration_thread; @@ -85,6 +90,8 @@ private: void sessionCleanerTask(); /// Thread create snapshots in the background void snapshotThread(); + /// Thread backup snapshots to S3 in the background + void snapshotBackupThread(); /// Thread apply or wait configuration changes from leader void updateConfigurationThread(); diff --git a/src/Coordination/KeeperSnapshotManager.h b/src/Coordination/KeeperSnapshotManager.h index c00ce9421e7..52647712083 100644 --- a/src/Coordination/KeeperSnapshotManager.h +++ b/src/Coordination/KeeperSnapshotManager.h @@ -87,7 +87,7 @@ public: }; using KeeperStorageSnapshotPtr = std::shared_ptr; -using CreateSnapshotCallback = std::function; +using CreateSnapshotCallback = std::function; using SnapshotMetaAndStorage = std::pair; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index c5a66ce29ca..a05e1ee91fa 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -400,6 +400,8 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res } when_done(ret, exception); + + return ret ? latest_snapshot_path : ""; }; From 9fdd2f2e61d6f8787ba774b7818abdfa04e39459 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 15 Sep 2022 07:45:28 +0000 Subject: [PATCH 019/252] Basic implementation for S3 snapshot upload --- src/Coordination/KeeperDispatcher.cpp | 135 +++++++++++++++++- src/Coordination/KeeperDispatcher.h | 8 ++ src/IO/S3/PocoHTTPClient.h | 12 +- src/IO/S3Common.cpp | 55 +++++-- src/IO/S3Common.h | 17 +++ .../ExternalDataSourceConfiguration.h | 2 +- src/Storages/StorageS3.h | 2 +- src/Storages/StorageS3Settings.cpp | 37 +---- src/Storages/StorageS3Settings.h | 36 +---- 9 files changed, 223 insertions(+), 81 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 2956d45e86c..915acc8a33f 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -1,13 +1,21 @@ #include +#include #include #include #include #include #include +#include #include #include #include #include +#include "Core/UUID.h" +#include "IO/S3/PocoHTTPClient.h" +#include +#include +#include +#include namespace CurrentMetrics { @@ -197,7 +205,7 @@ void KeeperDispatcher::snapshotThread() if (snapshot_path.empty()) continue; - if (isLeader()) + if (isLeader() && getBackupS3Client() != nullptr) { if (!snapshots_backup_queue.push(snapshot_path)) LOG_WARNING(log, "Failed to add snapshot {} to backup queue", snapshot_path); @@ -210,9 +218,98 @@ void KeeperDispatcher::snapshotThread() } } +struct KeeperDispatcher::S3Configuration +{ + S3Configuration(S3::URI uri_, S3::AuthSettings auth_settings_, std::shared_ptr client_) + : uri(std::move(uri_)) + , auth_settings(std::move(auth_settings_)) + , client(std::move(client_)) + {} + + S3::URI uri; + S3::AuthSettings auth_settings; + std::shared_ptr client; +}; + +void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfiguration & config) +{ + try + { + const std::string config_prefix = "keeper_server.s3_backup"; + + if (!config.has(config_prefix)) + { + std::lock_guard client_lock{backup_s3_client_mutex}; + if (backup_s3_client) + LOG_INFO(log, "S3 backup configuration was removed"); + backup_s3_client = nullptr; + return; + } + + auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); + + auto endpoint = config.getString(config_prefix + ".endpoint"); + + std::unique_lock client_lock{backup_s3_client_mutex}; + + if (backup_s3_client && backup_s3_client->client && auth_settings == backup_s3_client->auth_settings && backup_s3_client->uri.endpoint == endpoint) + return; + + LOG_INFO(log, "S3 backup configuration was updated"); + + client_lock.unlock(); + + auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key); + HeaderCollection headers = auth_settings.headers; + + static constexpr size_t s3_max_redirects = 10; + static constexpr bool enable_s3_requests_logging = false; + + auto new_uri = S3::URI{Poco::URI(endpoint)}; + + if (!new_uri.key.empty()) + { + LOG_ERROR(log, "Invalid endpoint defined for S3 backup, it shouldn't contain key, endpoint: {}", endpoint); + return; + } + + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( + auth_settings.region, + RemoteHostFilter(), s3_max_redirects, + enable_s3_requests_logging, + /* for_disk_s3 = */ false); + + client_configuration.endpointOverride = new_uri.endpoint; + + auto client = S3::ClientFactory::instance().create( + client_configuration, + new_uri.is_virtual_hosted_style, + credentials.GetAWSAccessKeyId(), + credentials.GetAWSSecretKey(), + auth_settings.server_side_encryption_customer_key_base64, + std::move(headers), + auth_settings.use_environment_credentials.value_or(false), + auth_settings.use_insecure_imds_request.value_or(false)); + + auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); + + client_lock.lock(); + backup_s3_client = std::move(new_client); + client_lock.unlock(); + LOG_INFO(log, "S3 backup client was updated"); + } + catch (...) + { + LOG_ERROR(log, "Failed to create an S3 client for backup"); + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + void KeeperDispatcher::snapshotBackupThread() { setThreadName("KeeperBSnpT"); + + UUID uuid = UUIDHelpers::generateV4(); while (!shutdown_called) { std::string snapshot_path; @@ -224,8 +321,36 @@ void KeeperDispatcher::snapshotBackupThread() try { + auto backup_client = getBackupS3Client(); + if (backup_client == nullptr) + continue; + LOG_INFO(log, "Will try to backup snapshot on {}", snapshot_path); ReadBufferFromFile snapshot_file(snapshot_path); + + S3Settings::ReadWriteSettings read_write_settings; + read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000; + + auto snapshot_name = fs::path(snapshot_path).filename(); + LOG_DEBUG(log, "Trying to create a lock file"); + WriteBufferFromS3 lock_writer + { + backup_client->client, + backup_client->uri.bucket, + fmt::format(".{}_LOCK", snapshot_path), + read_write_settings + }; + + WriteBufferFromS3 s3_writer + { + backup_client->client, + backup_client->uri.bucket, + fs::path(snapshot_path).filename(), + read_write_settings + }; + copyData(snapshot_file, s3_writer); + + s3_writer.finalize(); } catch (...) { @@ -579,6 +704,12 @@ void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, Keep requests_for_sessions.clear(); } +std::shared_ptr KeeperDispatcher::getBackupS3Client() const +{ + std::lock_guard lock{backup_s3_client_mutex}; + return backup_s3_client; +} + int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { /// New session id allocation is a special request, because we cannot process it in normal @@ -718,6 +849,8 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati if (!push_result) throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue"); } + + updateS3Configuration(config); } void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms) diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 6023d52ec04..89027640f7c 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -1,6 +1,7 @@ #pragma once #include +#include "base/defines.h" #include "config_core.h" #if USE_NURAFT @@ -82,6 +83,10 @@ private: /// Counter for new session_id requests. std::atomic internal_session_id_counter{0}; + struct S3Configuration; + mutable std::mutex backup_s3_client_mutex; + std::shared_ptr backup_s3_client; + /// Thread put requests to raft void requestThread(); /// Thread put responses for subscribed sessions @@ -105,6 +110,8 @@ private: /// Clears both arguments void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); + std::shared_ptr getBackupS3Client() const; + public: /// Just allocate some objects, real initialization is done by `intialize method` KeeperDispatcher(); @@ -130,6 +137,7 @@ public: /// Registered in ConfigReloader callback. Add new configuration changes to /// update_configuration_queue. Keeper Dispatcher apply them asynchronously. void updateConfiguration(const Poco::Util::AbstractConfiguration & config); + void updateS3Configuration(const Poco::Util::AbstractConfiguration & config); /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 9005f132974..7e2cdb80579 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -23,6 +22,17 @@ class StandardHttpResponse; namespace DB { + +struct HttpHeader +{ + String name; + String value; + + inline bool operator==(const HttpHeader & other) const { return name == other.name && value == other.value; } +}; + +using HeaderCollection = std::vector; + class Context; } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index e97fa707c13..0248c49d635 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -9,6 +9,8 @@ # include # include +# include + # include # include # include @@ -646,6 +648,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int S3_ERROR; + extern const int INVALID_CONFIG_PARAMETER; } namespace S3 @@ -772,25 +775,16 @@ namespace S3 boost::to_upper(name); if (name != S3 && name != COS && name != OBS && name != OSS) - { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name)); - } + if (name == S3) - { storage_name = name; - } else if (name == OBS) - { storage_name = OBS; - } else if (name == OSS) - { storage_name = OSS; - } else - { storage_name = COSN; - } } else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key)) { @@ -839,6 +833,47 @@ namespace S3 { return getObjectInfo(client_ptr, bucket, key, version_id, throw_on_error).size; } + + AuthSettings AuthSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) + { + auto access_key_id = config.getString(config_elem + ".access_key_id", ""); + auto secret_access_key = config.getString(config_elem + ".secret_access_key", ""); + auto region = config.getString(config_elem + ".region", ""); + auto server_side_encryption_customer_key_base64 = config.getString(config_elem + ".server_side_encryption_customer_key_base64", ""); + + std::optional use_environment_credentials; + if (config.has(config_elem + ".use_environment_credentials")) + use_environment_credentials = config.getBool(config_elem + ".use_environment_credentials"); + + std::optional use_insecure_imds_request; + if (config.has(config_elem + ".use_insecure_imds_request")) + use_insecure_imds_request = config.getBool(config_elem + ".use_insecure_imds_request"); + + HeaderCollection headers; + Poco::Util::AbstractConfiguration::Keys subconfig_keys; + config.keys(config_elem, subconfig_keys); + for (const String & subkey : subconfig_keys) + { + if (subkey.starts_with("header")) + { + auto header_str = config.getString(config_elem + "." + subkey); + auto delimiter = header_str.find(':'); + if (delimiter == String::npos) + throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); + headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)}); + } + } + + return AuthSettings + { + std::move(access_key_id), std::move(secret_access_key), + std::move(region), + std::move(server_side_encryption_customer_key_base64), + std::move(headers), + use_environment_credentials, + use_insecure_imds_request + }; + } } } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index da7ecf95b78..b6fc9573c18 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -128,6 +128,23 @@ S3::ObjectInfo getObjectInfo(std::shared_ptr client_ptr size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true); +struct AuthSettings +{ + static AuthSettings loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); + + String access_key_id; + String secret_access_key; + String region; + String server_side_encryption_customer_key_base64; + + HeaderCollection headers; + + std::optional use_environment_credentials; + std::optional use_insecure_imds_request; + + bool operator==(const AuthSettings & other) const = default; +}; + } #endif diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index 719fceb7df1..0b40cee16ae 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -116,7 +116,7 @@ struct URLBasedDataSourceConfiguration struct StorageS3Configuration : URLBasedDataSourceConfiguration { - S3Settings::AuthSettings auth_settings; + S3::AuthSettings auth_settings; S3Settings::ReadWriteSettings rw_settings; }; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 3a02237570d..c9d8282d049 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -207,7 +207,7 @@ public: const String access_key_id; const String secret_access_key; std::shared_ptr client; - S3Settings::AuthSettings auth_settings; + S3::AuthSettings auth_settings; S3Settings::ReadWriteSettings rw_settings; }; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 353e324c853..f68a2a1ff42 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include #include @@ -46,41 +48,8 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U if (config.has(config_elem + "." + key + ".endpoint")) { auto endpoint = get_string_for_key(key, "endpoint", false); - auto access_key_id = get_string_for_key(key, "access_key_id"); - auto secret_access_key = get_string_for_key(key, "secret_access_key"); - auto region = get_string_for_key(key, "region"); - auto server_side_encryption_customer_key_base64 = get_string_for_key(key, "server_side_encryption_customer_key_base64"); - std::optional use_environment_credentials; - if (config.has(config_elem + "." + key + ".use_environment_credentials")) - use_environment_credentials = config.getBool(config_elem + "." + key + ".use_environment_credentials"); - - std::optional use_insecure_imds_request; - if (config.has(config_elem + "." + key + ".use_insecure_imds_request")) - use_insecure_imds_request = config.getBool(config_elem + "." + key + ".use_insecure_imds_request"); - - HeaderCollection headers; - Poco::Util::AbstractConfiguration::Keys subconfig_keys; - config.keys(config_elem + "." + key, subconfig_keys); - for (const String & subkey : subconfig_keys) - { - if (subkey.starts_with("header")) - { - auto header_str = config.getString(config_elem + "." + key + "." + subkey); - auto delimiter = header_str.find(':'); - if (delimiter == String::npos) - throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); - headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)}); - } - } - - S3Settings::AuthSettings auth_settings{ - std::move(access_key_id), std::move(secret_access_key), - std::move(region), - std::move(server_side_encryption_customer_key_base64), - std::move(headers), - use_environment_credentials, - use_insecure_imds_request}; + auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config); S3Settings::ReadWriteSettings rw_settings; rw_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 9ef51c77191..9642e43bb2e 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -8,6 +8,8 @@ #include #include +#include + namespace Poco::Util { class AbstractConfiguration; @@ -15,43 +17,11 @@ class AbstractConfiguration; namespace DB { -struct HttpHeader -{ - String name; - String value; - - inline bool operator==(const HttpHeader & other) const { return name == other.name && value == other.value; } -}; - -using HeaderCollection = std::vector; struct Settings; struct S3Settings { - struct AuthSettings - { - String access_key_id; - String secret_access_key; - String region; - String server_side_encryption_customer_key_base64; - - HeaderCollection headers; - - std::optional use_environment_credentials; - std::optional use_insecure_imds_request; - - inline bool operator==(const AuthSettings & other) const - { - return access_key_id == other.access_key_id && secret_access_key == other.secret_access_key - && region == other.region - && server_side_encryption_customer_key_base64 == other.server_side_encryption_customer_key_base64 - && headers == other.headers - && use_environment_credentials == other.use_environment_credentials - && use_insecure_imds_request == other.use_insecure_imds_request; - } - }; - struct ReadWriteSettings { size_t max_single_read_retries = 0; @@ -79,7 +49,7 @@ struct S3Settings void updateFromSettingsIfEmpty(const Settings & settings); }; - AuthSettings auth_settings; + S3::AuthSettings auth_settings; ReadWriteSettings rw_settings; inline bool operator==(const S3Settings & other) const From d4e4ac3801f669dffd331a3ee1660828246873c1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 15 Sep 2022 09:01:43 +0000 Subject: [PATCH 020/252] Lock file during S3 snapshot upload --- src/Coordination/KeeperDispatcher.cpp | 193 ++++++++++++++++++-------- src/Coordination/KeeperDispatcher.h | 14 +- 2 files changed, 145 insertions(+), 62 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 915acc8a33f..cf4101056b1 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -1,21 +1,32 @@ #include -#include -#include -#include -#include -#include #include #include + #include -#include +#include +#include #include #include -#include "Core/UUID.h" -#include "IO/S3/PocoHTTPClient.h" -#include + +#include + +#include +#include +#include +#include #include +#include #include + +#include #include +#include +#include +#include + +#include +#include +#include namespace CurrentMetrics { @@ -38,7 +49,7 @@ namespace ErrorCodes KeeperDispatcher::KeeperDispatcher() : responses_queue(std::numeric_limits::max()) - , snapshots_backup_queue(std::numeric_limits::max()) + , snapshots_s3_queue(std::numeric_limits::max()) , configuration_and_settings(std::make_shared()) , log(&Poco::Logger::get("KeeperDispatcher")) { @@ -205,10 +216,10 @@ void KeeperDispatcher::snapshotThread() if (snapshot_path.empty()) continue; - if (isLeader() && getBackupS3Client() != nullptr) + if (isLeader() && getSnapshotS3Client() != nullptr) { - if (!snapshots_backup_queue.push(snapshot_path)) - LOG_WARNING(log, "Failed to add snapshot {} to backup queue", snapshot_path); + if (!snapshots_s3_queue.push(snapshot_path)) + LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", snapshot_path); } } catch (...) @@ -235,14 +246,14 @@ void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfigura { try { - const std::string config_prefix = "keeper_server.s3_backup"; + const std::string config_prefix = "keeper_server.s3_snapshot"; if (!config.has(config_prefix)) { - std::lock_guard client_lock{backup_s3_client_mutex}; - if (backup_s3_client) - LOG_INFO(log, "S3 backup configuration was removed"); - backup_s3_client = nullptr; + std::lock_guard client_lock{snapshot_s3_client_mutex}; + if (snapshot_s3_client) + LOG_INFO(log, "S3 configuration was removed"); + snapshot_s3_client = nullptr; return; } @@ -250,12 +261,12 @@ void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfigura auto endpoint = config.getString(config_prefix + ".endpoint"); - std::unique_lock client_lock{backup_s3_client_mutex}; + std::unique_lock client_lock{snapshot_s3_client_mutex}; - if (backup_s3_client && backup_s3_client->client && auth_settings == backup_s3_client->auth_settings && backup_s3_client->uri.endpoint == endpoint) + if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings && snapshot_s3_client->uri.endpoint == endpoint) return; - LOG_INFO(log, "S3 backup configuration was updated"); + LOG_INFO(log, "S3 configuration was updated"); client_lock.unlock(); @@ -269,7 +280,7 @@ void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfigura if (!new_uri.key.empty()) { - LOG_ERROR(log, "Invalid endpoint defined for S3 backup, it shouldn't contain key, endpoint: {}", endpoint); + LOG_ERROR(log, "Invalid endpoint defined for S3, it shouldn't contain key, endpoint: {}", endpoint); return; } @@ -294,26 +305,26 @@ void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfigura auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); client_lock.lock(); - backup_s3_client = std::move(new_client); + snapshot_s3_client = std::move(new_client); client_lock.unlock(); - LOG_INFO(log, "S3 backup client was updated"); + LOG_INFO(log, "S3 client was updated"); } catch (...) { - LOG_ERROR(log, "Failed to create an S3 client for backup"); + LOG_ERROR(log, "Failed to create an S3 client for snapshots"); tryLogCurrentException(__PRETTY_FUNCTION__); } } -void KeeperDispatcher::snapshotBackupThread() +void KeeperDispatcher::snapshotS3Thread() { - setThreadName("KeeperBSnpT"); + setThreadName("KeeperS3SnpT"); - UUID uuid = UUIDHelpers::generateV4(); + auto uuid = UUIDHelpers::generateV4(); while (!shutdown_called) { std::string snapshot_path; - if (!snapshots_backup_queue.pop(snapshot_path)) + if (!snapshots_s3_queue.pop(snapshot_path)) break; if (shutdown_called) @@ -321,40 +332,112 @@ void KeeperDispatcher::snapshotBackupThread() try { - auto backup_client = getBackupS3Client(); - if (backup_client == nullptr) + auto s3_client = getSnapshotS3Client(); + if (s3_client == nullptr) continue; - LOG_INFO(log, "Will try to backup snapshot on {}", snapshot_path); + LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); ReadBufferFromFile snapshot_file(snapshot_path); S3Settings::ReadWriteSettings read_write_settings; read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000; - auto snapshot_name = fs::path(snapshot_path).filename(); + const auto create_writer = [&](const auto & key) + { + return WriteBufferFromS3 + { + s3_client->client, + s3_client->uri.bucket, + key, + read_write_settings + }; + }; + + auto snapshot_name = fs::path(snapshot_path).filename().string(); + auto lock_file = fmt::format(".{}_LOCK", snapshot_name); + + const auto file_exists = [&](const auto & key) + { + Aws::S3::Model::HeadObjectRequest request; + request.SetBucket(s3_client->uri.bucket); + request.SetKey(key); + auto outcome = s3_client->client->HeadObject(request); + + if (outcome.IsSuccess()) + return true; + + const auto & error = outcome.GetError(); + if (error.GetErrorType() != Aws::S3::S3Errors::NO_SUCH_KEY && error.GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throw S3Exception(error.GetErrorType(), "Failed to verify existence of lock file: {}", error.GetMessage()); + + return false; + }; + + if (file_exists(snapshot_name)) + { + LOG_ERROR(log, "Snapshot {} already exists", snapshot_name); + continue; + } + + // First we need to verify that there isn't already a lock file for the snapshot we want to upload + if (file_exists(lock_file)) + { + LOG_ERROR(log, "Lock file for {} already, exists. Probably a different node is already uploading the snapshot", snapshot_name); + continue; + } + + // We write our UUID to lock file LOG_DEBUG(log, "Trying to create a lock file"); - WriteBufferFromS3 lock_writer + WriteBufferFromS3 lock_writer = create_writer(lock_file); + writeUUIDText(uuid, lock_writer); + lock_writer.finalize(); + + const auto read_lock_file = [&]() -> std::string { - backup_client->client, - backup_client->uri.bucket, - fmt::format(".{}_LOCK", snapshot_path), - read_write_settings + ReadBufferFromS3 lock_reader + { + s3_client->client, + s3_client->uri.bucket, + lock_file, + "", + 1, + {} + }; + + std::string read_uuid; + readStringUntilEOF(read_uuid, lock_reader); + + return read_uuid; }; - WriteBufferFromS3 s3_writer - { - backup_client->client, - backup_client->uri.bucket, - fs::path(snapshot_path).filename(), - read_write_settings - }; - copyData(snapshot_file, s3_writer); + // We read back the written UUID, if it's the same we can upload the file + auto read_uuid = read_lock_file(); + + if (read_uuid != toString(uuid)) + { + LOG_ERROR(log, "Failed to create a lock file"); + continue; + } + + WriteBufferFromS3 snapshot_writer = create_writer(snapshot_name); + copyData(snapshot_file, snapshot_writer); + snapshot_writer.finalize(); + + LOG_INFO(log, "Successfully uploaded {} to S3", snapshot_path); + + LOG_INFO(log, "Removing lock file"); + Aws::S3::Model::DeleteObjectRequest delete_request; + delete_request.SetBucket(s3_client->uri.bucket); + delete_request.SetKey(lock_file); + auto delete_outcome = s3_client->client->DeleteObject(delete_request); + + if (!delete_outcome.IsSuccess()) + throw S3Exception(delete_outcome.GetError().GetMessage(), delete_outcome.GetError().GetErrorType()); - s3_writer.finalize(); } catch (...) { - LOG_INFO(log, "Failed to backup {}", snapshot_path); + LOG_INFO(log, "Failure during upload of {} to S3", snapshot_path); tryLogCurrentException(__PRETTY_FUNCTION__); } } @@ -444,7 +527,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf request_thread = ThreadFromGlobalPool([this] { requestThread(); }); responses_thread = ThreadFromGlobalPool([this] { responseThread(); }); snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); }); - snapshot_backup_thread = ThreadFromGlobalPool([this] { snapshotBackupThread(); }); + snapshot_s3_thread = ThreadFromGlobalPool([this] { snapshotS3Thread(); }); server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue); @@ -510,9 +593,9 @@ void KeeperDispatcher::shutdown() if (snapshot_thread.joinable()) snapshot_thread.join(); - snapshots_backup_queue.finish(); - if (snapshot_backup_thread.joinable()) - snapshot_backup_thread.join(); + snapshots_s3_queue.finish(); + if (snapshot_s3_thread.joinable()) + snapshot_s3_thread.join(); update_configuration_queue.finish(); if (update_configuration_thread.joinable()) @@ -704,10 +787,10 @@ void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, Keep requests_for_sessions.clear(); } -std::shared_ptr KeeperDispatcher::getBackupS3Client() const +std::shared_ptr KeeperDispatcher::getSnapshotS3Client() const { - std::lock_guard lock{backup_s3_client_mutex}; - return backup_s3_client; + std::lock_guard lock{snapshot_s3_client_mutex}; + return snapshot_s3_client; } int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 89027640f7c..ba0c57a7bea 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -38,7 +38,7 @@ private: SnapshotsQueue snapshots_queue{1}; using SnapshotBackupQueue = ConcurrentBoundedQueue; - SnapshotBackupQueue snapshots_backup_queue; + SnapshotBackupQueue snapshots_s3_queue; /// More than 1k updates is definitely misconfiguration. UpdateConfigurationQueue update_configuration_queue{1000}; @@ -66,8 +66,8 @@ private: ThreadFromGlobalPool session_cleaner_thread; /// Dumping new snapshots to disk ThreadFromGlobalPool snapshot_thread; - /// Backup new snapshots to S3 - ThreadFromGlobalPool snapshot_backup_thread; + /// Upload new snapshots to S3 + ThreadFromGlobalPool snapshot_s3_thread; /// Apply or wait for configuration changes ThreadFromGlobalPool update_configuration_thread; @@ -84,8 +84,8 @@ private: std::atomic internal_session_id_counter{0}; struct S3Configuration; - mutable std::mutex backup_s3_client_mutex; - std::shared_ptr backup_s3_client; + mutable std::mutex snapshot_s3_client_mutex; + std::shared_ptr snapshot_s3_client; /// Thread put requests to raft void requestThread(); @@ -96,7 +96,7 @@ private: /// Thread create snapshots in the background void snapshotThread(); /// Thread backup snapshots to S3 in the background - void snapshotBackupThread(); + void snapshotS3Thread(); /// Thread apply or wait configuration changes from leader void updateConfigurationThread(); @@ -110,7 +110,7 @@ private: /// Clears both arguments void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); - std::shared_ptr getBackupS3Client() const; + std::shared_ptr getSnapshotS3Client() const; public: /// Just allocate some objects, real initialization is done by `intialize method` From 32a297339b1ecff6a581d1c9e76c9a9fc6675c86 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 15 Sep 2022 10:11:09 +0000 Subject: [PATCH 021/252] Fix build --- src/IO/S3/PocoHTTPClient.h | 17 +++-- src/IO/S3Common.cpp | 99 +++++++++++++++++------------- src/IO/S3Common.h | 17 +++-- src/Storages/StorageS3Settings.cpp | 4 -- 4 files changed, 81 insertions(+), 56 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 7e2cdb80579..0bdfc0a2e02 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -14,11 +14,7 @@ #include #include - -namespace Aws::Http::Standard -{ -class StandardHttpResponse; -} +#endif namespace DB { @@ -32,6 +28,17 @@ struct HttpHeader }; using HeaderCollection = std::vector; +} + +#if USE_AWS_S3 + +namespace Aws::Http::Standard +{ +class StandardHttpResponse; +} + +namespace DB +{ class Context; } diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 0248c49d635..2f3f0e087de 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,5 +1,7 @@ #include +#include + #if USE_AWS_S3 # include @@ -9,7 +11,6 @@ # include # include -# include # include # include @@ -648,7 +649,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int S3_ERROR; - extern const int INVALID_CONFIG_PARAMETER; } namespace S3 @@ -834,48 +834,63 @@ namespace S3 return getObjectInfo(client_ptr, bucket, key, version_id, throw_on_error).size; } - AuthSettings AuthSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) - { - auto access_key_id = config.getString(config_elem + ".access_key_id", ""); - auto secret_access_key = config.getString(config_elem + ".secret_access_key", ""); - auto region = config.getString(config_elem + ".region", ""); - auto server_side_encryption_customer_key_base64 = config.getString(config_elem + ".server_side_encryption_customer_key_base64", ""); - - std::optional use_environment_credentials; - if (config.has(config_elem + ".use_environment_credentials")) - use_environment_credentials = config.getBool(config_elem + ".use_environment_credentials"); - - std::optional use_insecure_imds_request; - if (config.has(config_elem + ".use_insecure_imds_request")) - use_insecure_imds_request = config.getBool(config_elem + ".use_insecure_imds_request"); - - HeaderCollection headers; - Poco::Util::AbstractConfiguration::Keys subconfig_keys; - config.keys(config_elem, subconfig_keys); - for (const String & subkey : subconfig_keys) - { - if (subkey.starts_with("header")) - { - auto header_str = config.getString(config_elem + "." + subkey); - auto delimiter = header_str.find(':'); - if (delimiter == String::npos) - throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); - headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)}); - } - } - - return AuthSettings - { - std::move(access_key_id), std::move(secret_access_key), - std::move(region), - std::move(server_side_encryption_customer_key_base64), - std::move(headers), - use_environment_credentials, - use_insecure_imds_request - }; - } } } #endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_CONFIG_PARAMETER; +} + +namespace S3 +{ + +AuthSettings AuthSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) +{ + auto access_key_id = config.getString(config_elem + ".access_key_id", ""); + auto secret_access_key = config.getString(config_elem + ".secret_access_key", ""); + auto region = config.getString(config_elem + ".region", ""); + auto server_side_encryption_customer_key_base64 = config.getString(config_elem + ".server_side_encryption_customer_key_base64", ""); + + std::optional use_environment_credentials; + if (config.has(config_elem + ".use_environment_credentials")) + use_environment_credentials = config.getBool(config_elem + ".use_environment_credentials"); + + std::optional use_insecure_imds_request; + if (config.has(config_elem + ".use_insecure_imds_request")) + use_insecure_imds_request = config.getBool(config_elem + ".use_insecure_imds_request"); + + HeaderCollection headers; + Poco::Util::AbstractConfiguration::Keys subconfig_keys; + config.keys(config_elem, subconfig_keys); + for (const String & subkey : subconfig_keys) + { + if (subkey.starts_with("header")) + { + auto header_str = config.getString(config_elem + "." + subkey); + auto delimiter = header_str.find(':'); + if (delimiter == String::npos) + throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); + headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)}); + } + } + + return AuthSettings + { + std::move(access_key_id), std::move(secret_access_key), + std::move(region), + std::move(server_side_encryption_customer_key_base64), + std::move(headers), + use_environment_credentials, + use_insecure_imds_request + }; +} + +} +} diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index b6fc9573c18..f686b53d450 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -1,6 +1,7 @@ #pragma once #include +#include #if USE_AWS_S3 @@ -8,7 +9,6 @@ #include #include #include -#include #include #include @@ -27,8 +27,6 @@ namespace ErrorCodes } class RemoteHostFilter; -struct HttpHeader; -using HeaderCollection = std::vector; class S3Exception : public Exception { @@ -128,6 +126,17 @@ S3::ObjectInfo getObjectInfo(std::shared_ptr client_ptr size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id = {}, bool throw_on_error = true); +} +#endif + +namespace Poco::Util +{ +class AbstractConfiguration; +}; + +namespace DB::S3 +{ + struct AuthSettings { static AuthSettings loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); @@ -146,5 +155,3 @@ struct AuthSettings }; } - -#endif diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index f68a2a1ff42..6993fb5b609 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -11,10 +11,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int INVALID_CONFIG_PARAMETER; -} void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings) { From c7775b7f16ae48fb071afc121e05c22c68dd0053 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 15 Sep 2022 13:37:17 +0000 Subject: [PATCH 022/252] Add tests for S3 snapshot upload --- .../test_keeper_s3_snapshot/__init__.py | 1 + .../configs/keeper_config1.xml | 42 +++++++ .../configs/keeper_config2.xml | 42 +++++++ .../configs/keeper_config3.xml | 42 +++++++ .../test_keeper_s3_snapshot/test.py | 116 ++++++++++++++++++ 5 files changed, 243 insertions(+) create mode 100644 tests/integration/test_keeper_s3_snapshot/__init__.py create mode 100644 tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml create mode 100644 tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml create mode 100644 tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml create mode 100644 tests/integration/test_keeper_s3_snapshot/test.py diff --git a/tests/integration/test_keeper_s3_snapshot/__init__.py b/tests/integration/test_keeper_s3_snapshot/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml new file mode 100644 index 00000000000..8459ea3e068 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml @@ -0,0 +1,42 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 50 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml new file mode 100644 index 00000000000..dfe73628f66 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml @@ -0,0 +1,42 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml new file mode 100644 index 00000000000..948d9527718 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml @@ -0,0 +1,42 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + diff --git a/tests/integration/test_keeper_s3_snapshot/test.py b/tests/integration/test_keeper_s3_snapshot/test.py new file mode 100644 index 00000000000..9f335998507 --- /dev/null +++ b/tests/integration/test_keeper_s3_snapshot/test.py @@ -0,0 +1,116 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from time import sleep + +from kazoo.client import KazooClient + +# from kazoo.protocol.serialization import Connect, read_buffer, write_buffer + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + main_configs=["configs/keeper_config1.xml"], + stay_alive=True, + with_minio=True, +) +node2 = cluster.add_instance( + "node2", + main_configs=["configs/keeper_config2.xml"], + stay_alive=True, + with_minio=True, +) +node3 = cluster.add_instance( + "node3", + main_configs=["configs/keeper_config3.xml"], + stay_alive=True, + with_minio=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + cluster.minio_client.make_bucket("snapshots") + + yield cluster + + finally: + cluster.shutdown() + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient( + hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + ) + _fake_zk_instance.start() + return _fake_zk_instance + + +def destroy_zk_client(zk): + try: + if zk: + zk.stop() + zk.close() + except: + pass + + +def wait_node(node): + for _ in range(100): + zk = None + try: + zk = get_fake_zk(node.name, timeout=30.0) + zk.sync("/") + print("node", node.name, "ready") + break + except Exception as ex: + sleep(0.2) + print("Waiting until", node.name, "will be ready, exception", ex) + finally: + destroy_zk_client(zk) + else: + raise Exception("Can't wait node", node.name, "to become ready") + + +def test_s3_upload(started_cluster): + node1_zk = get_fake_zk(node1.name) + + for _ in range(210): + node1_zk.create("/test", sequence=True) + + def get_saved_snapshots(): + return [ + obj.object_name + for obj in list(cluster.minio_client.list_objects("snapshots")) + ] + + saved_snapshots = get_saved_snapshots() + assert set(saved_snapshots) == set( + [ + "snapshot_50.bin.zstd", + "snapshot_100.bin.zstd", + "snapshot_150.bin.zstd", + "snapshot_200.bin.zstd", + ] + ) + + destroy_zk_client(node1_zk) + node1.stop_clickhouse(kill=True) + + wait_node(node2) + node2_zk = get_fake_zk(node2.name) + for _ in range(200): + node2_zk.create("/test", sequence=True) + + saved_snapshots = get_saved_snapshots() + + assert len(saved_snapshots) > 4 + + success_upload_message = "Successfully uploaded" + assert node2.contains_in_log(success_upload_message) or node3.contains_in_log( + success_upload_message + ) + + destroy_zk_client(node2_zk) From d3d1676352269b23864cf851f46320a34bcdc9ea Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 15 Sep 2022 13:48:39 +0000 Subject: [PATCH 023/252] Fix config update detection --- src/Coordination/KeeperDispatcher.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index cf4101056b1..c74cf0c672b 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -260,10 +260,12 @@ void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfigura auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); auto endpoint = config.getString(config_prefix + ".endpoint"); + auto new_uri = S3::URI{Poco::URI(endpoint)}; std::unique_lock client_lock{snapshot_s3_client_mutex}; - if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings && snapshot_s3_client->uri.endpoint == endpoint) + if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings + && snapshot_s3_client->uri.uri == new_uri.uri) return; LOG_INFO(log, "S3 configuration was updated"); @@ -276,8 +278,6 @@ void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfigura static constexpr size_t s3_max_redirects = 10; static constexpr bool enable_s3_requests_logging = false; - auto new_uri = S3::URI{Poco::URI(endpoint)}; - if (!new_uri.key.empty()) { LOG_ERROR(log, "Invalid endpoint defined for S3, it shouldn't contain key, endpoint: {}", endpoint); From ad7864cb5f0083394b68afe4af206156a16ebcfe Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 15 Sep 2022 14:06:22 +0000 Subject: [PATCH 024/252] enable S3 if client is built --- src/Coordination/KeeperDispatcher.cpp | 25 +++++++++++++++++++++---- src/Coordination/KeeperDispatcher.h | 15 +++++++++++++-- src/IO/S3/PocoHTTPClient.h | 7 +++++-- src/IO/S3Common.cpp | 13 ++++++------- src/IO/S3Common.h | 13 ++++++++----- 5 files changed, 53 insertions(+), 20 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index c74cf0c672b..53cea2665c1 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -1,4 +1,5 @@ #include + #include #include @@ -8,14 +9,15 @@ #include #include +#if USE_AWS_S3 #include -#include -#include -#include #include #include #include +#include +#include +#include #include #include @@ -23,6 +25,7 @@ #include #include #include +#endif #include #include @@ -49,7 +52,9 @@ namespace ErrorCodes KeeperDispatcher::KeeperDispatcher() : responses_queue(std::numeric_limits::max()) +#if USE_AWS_S3 , snapshots_s3_queue(std::numeric_limits::max()) +#endif , configuration_and_settings(std::make_shared()) , log(&Poco::Logger::get("KeeperDispatcher")) { @@ -211,8 +216,9 @@ void KeeperDispatcher::snapshotThread() try { - auto snapshot_path = task.create_snapshot(std::move(task.snapshot)); + [[maybe_unused]] auto snapshot_path = task.create_snapshot(std::move(task.snapshot)); +#if USE_AWS_S3 if (snapshot_path.empty()) continue; @@ -221,6 +227,7 @@ void KeeperDispatcher::snapshotThread() if (!snapshots_s3_queue.push(snapshot_path)) LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", snapshot_path); } +#endif } catch (...) { @@ -229,6 +236,7 @@ void KeeperDispatcher::snapshotThread() } } +#if USE_AWS_S3 struct KeeperDispatcher::S3Configuration { S3Configuration(S3::URI uri_, S3::AuthSettings auth_settings_, std::shared_ptr client_) @@ -442,6 +450,7 @@ void KeeperDispatcher::snapshotS3Thread() } } } +#endif void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) { @@ -527,7 +536,9 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf request_thread = ThreadFromGlobalPool([this] { requestThread(); }); responses_thread = ThreadFromGlobalPool([this] { responseThread(); }); snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); }); +#if USE_AWS_S3 snapshot_s3_thread = ThreadFromGlobalPool([this] { snapshotS3Thread(); }); +#endif server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue); @@ -593,9 +604,11 @@ void KeeperDispatcher::shutdown() if (snapshot_thread.joinable()) snapshot_thread.join(); +#if USE_AWS_S3 snapshots_s3_queue.finish(); if (snapshot_s3_thread.joinable()) snapshot_s3_thread.join(); +#endif update_configuration_queue.finish(); if (update_configuration_thread.joinable()) @@ -787,11 +800,13 @@ void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, Keep requests_for_sessions.clear(); } +#if USE_AWS_S3 std::shared_ptr KeeperDispatcher::getSnapshotS3Client() const { std::lock_guard lock{snapshot_s3_client_mutex}; return snapshot_s3_client; } +#endif int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { @@ -933,7 +948,9 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue"); } +#if USE_AWS_S3 updateS3Configuration(config); +#endif } void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms) diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index ba0c57a7bea..04593813dce 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -1,7 +1,6 @@ #pragma once #include -#include "base/defines.h" #include "config_core.h" #if USE_NURAFT @@ -37,8 +36,10 @@ private: ResponsesQueue responses_queue; SnapshotsQueue snapshots_queue{1}; +#if USE_AWS_S3 using SnapshotBackupQueue = ConcurrentBoundedQueue; SnapshotBackupQueue snapshots_s3_queue; +#endif /// More than 1k updates is definitely misconfiguration. UpdateConfigurationQueue update_configuration_queue{1000}; @@ -66,8 +67,10 @@ private: ThreadFromGlobalPool session_cleaner_thread; /// Dumping new snapshots to disk ThreadFromGlobalPool snapshot_thread; +#if USE_AWS_S3 /// Upload new snapshots to S3 ThreadFromGlobalPool snapshot_s3_thread; +#endif /// Apply or wait for configuration changes ThreadFromGlobalPool update_configuration_thread; @@ -83,9 +86,11 @@ private: /// Counter for new session_id requests. std::atomic internal_session_id_counter{0}; +#if USE_AWS_S3 struct S3Configuration; mutable std::mutex snapshot_s3_client_mutex; std::shared_ptr snapshot_s3_client; +#endif /// Thread put requests to raft void requestThread(); @@ -95,8 +100,10 @@ private: void sessionCleanerTask(); /// Thread create snapshots in the background void snapshotThread(); - /// Thread backup snapshots to S3 in the background +#if USE_AWS_S3 + /// Thread upload snapshots to S3 in the background void snapshotS3Thread(); +#endif /// Thread apply or wait configuration changes from leader void updateConfigurationThread(); @@ -110,7 +117,9 @@ private: /// Clears both arguments void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); +#if USE_AWS_S3 std::shared_ptr getSnapshotS3Client() const; +#endif public: /// Just allocate some objects, real initialization is done by `intialize method` @@ -137,7 +146,9 @@ public: /// Registered in ConfigReloader callback. Add new configuration changes to /// update_configuration_queue. Keeper Dispatcher apply them asynchronously. void updateConfiguration(const Poco::Util::AbstractConfiguration & config); +#if USE_AWS_S3 void updateS3Configuration(const Poco::Util::AbstractConfiguration & config); +#endif /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 0bdfc0a2e02..5c1d9cc0cc3 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -2,6 +2,9 @@ #include +#include +#include + #if USE_AWS_S3 #include @@ -21,8 +24,8 @@ namespace DB struct HttpHeader { - String name; - String value; + std::string name; + std::string value; inline bool operator==(const HttpHeader & other) const { return name == other.name && value == other.value; } }; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 2f3f0e087de..ce6c0952100 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,17 +1,16 @@ -#include +#include +#include +#include #include #if USE_AWS_S3 -# include - # include # include # include - # include # include # include @@ -851,7 +850,7 @@ namespace ErrorCodes namespace S3 { -AuthSettings AuthSettings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config) +AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config) { auto access_key_id = config.getString(config_elem + ".access_key_id", ""); auto secret_access_key = config.getString(config_elem + ".secret_access_key", ""); @@ -869,13 +868,13 @@ AuthSettings AuthSettings::loadFromConfig(const String & config_elem, const Poco HeaderCollection headers; Poco::Util::AbstractConfiguration::Keys subconfig_keys; config.keys(config_elem, subconfig_keys); - for (const String & subkey : subconfig_keys) + for (const std::string & subkey : subconfig_keys) { if (subkey.starts_with("header")) { auto header_str = config.getString(config_elem + "." + subkey); auto delimiter = header_str.find(':'); - if (delimiter == String::npos) + if (delimiter == std::string::npos) throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); headers.emplace_back(HttpHeader{header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)}); } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index f686b53d450..27e197f66b3 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -3,6 +3,9 @@ #include #include +#include +#include + #if USE_AWS_S3 #include @@ -139,12 +142,12 @@ namespace DB::S3 struct AuthSettings { - static AuthSettings loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config); + static AuthSettings loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config); - String access_key_id; - String secret_access_key; - String region; - String server_side_encryption_customer_key_base64; + std::string access_key_id; + std::string secret_access_key; + std::string region; + std::string server_side_encryption_customer_key_base64; HeaderCollection headers; From 0101cc2e562b8bb45b891b9ce0c885e96b1b41f8 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Sep 2022 19:07:36 +0000 Subject: [PATCH 025/252] Support complex combinators in window transform, arrayReduce*, initializeAggregation and Aggregate functons versionning --- .../AggregateFunctionArray.h | 5 ++++ .../AggregateFunctionDistinct.h | 18 ++++++++++-- .../AggregateFunctionForEach.h | 18 ++++++++++-- src/AggregateFunctions/AggregateFunctionIf.h | 5 ++++ src/AggregateFunctions/AggregateFunctionMap.h | 18 ++++++++++-- .../AggregateFunctionNull.h | 23 +++++++++++++-- .../AggregateFunctionOrFill.h | 28 ++++++++++++++---- .../AggregateFunctionResample.h | 20 +++++++++++-- .../AggregateFunctionState.h | 5 ++++ src/AggregateFunctions/IAggregateFunction.h | 12 ++++++++ src/DataTypes/transformTypesRecursively.cpp | 6 ++++ src/DataTypes/transformTypesRecursively.h | 2 ++ src/Formats/NativeReader.cpp | 16 ++++++---- src/Formats/NativeWriter.cpp | 29 +++++++++++-------- src/Functions/array/arrayReduce.cpp | 14 ++------- src/Functions/array/arrayReduceInRanges.cpp | 15 ++-------- src/Functions/initializeAggregation.cpp | 14 ++------- src/Interpreters/InterpreterCreateQuery.cpp | 11 +++++-- src/Processors/Transforms/WindowTransform.cpp | 20 ++----------- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 13 ++++++--- 20 files changed, 201 insertions(+), 91 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h index abefe8e0de1..c6e29e77318 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.h +++ b/src/AggregateFunctions/AggregateFunctionArray.h @@ -156,6 +156,11 @@ public: nested_func->insertResultInto(place, to, arena); } + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + nested_func->insertMergeResultInto(place, to, arena); + } + bool allocatesMemoryInArena() const override { return nested_func->allocatesMemoryInArena(); diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 482d21363fe..46ded55f3e4 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -196,7 +196,8 @@ public: this->data(place).deserialize(buf, arena); } - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + template + void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const { auto arguments = this->data(place).getArguments(this->argument_types); ColumnRawPtrs arguments_raw(arguments.size()); @@ -205,7 +206,20 @@ public: assert(!arguments.empty()); nested_func->addBatchSinglePlace(0, arguments[0]->size(), getNestedPlace(place), arguments_raw.data(), arena); - nested_func->insertResultInto(getNestedPlace(place), to, arena); + if constexpr (merge) + nested_func->insertMergeResultInto(getNestedPlace(place), to, arena); + else + nested_func->insertResultInto(getNestedPlace(place), to, arena); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); } size_t sizeOfData() const override diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index 07713dcb304..62794ac0f53 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -257,7 +257,8 @@ public: } } - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + template + void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const { AggregateFunctionForEachData & state = data(place); @@ -268,13 +269,26 @@ public: char * nested_state = state.array_of_aggregate_datas; for (size_t i = 0; i < state.dynamic_array_size; ++i) { - nested_func->insertResultInto(nested_state, elems_to, arena); + if constexpr (merge) + nested_func->insertMergeResultInto(nested_state, elems_to, arena); + else + nested_func->insertResultInto(nested_state, elems_to, arena); nested_state += nested_size_of_data; } offsets_to.push_back(offsets_to.back() + state.dynamic_array_size); } + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + bool allocatesMemoryInArena() const override { return true; diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h index 6b0905d6d5e..e1b19a2ca5b 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.h +++ b/src/AggregateFunctions/AggregateFunctionIf.h @@ -183,6 +183,11 @@ public: nested_func->insertResultInto(place, to, arena); } + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + nested_func->insertMergeResultInto(place, to, arena); + } + bool allocatesMemoryInArena() const override { return nested_func->allocatesMemoryInArena(); diff --git a/src/AggregateFunctions/AggregateFunctionMap.h b/src/AggregateFunctions/AggregateFunctionMap.h index 4cb26fcc8d1..d349fc05944 100644 --- a/src/AggregateFunctions/AggregateFunctionMap.h +++ b/src/AggregateFunctions/AggregateFunctionMap.h @@ -264,7 +264,8 @@ public: } } - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + template + void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const { auto & map_column = assert_cast(to); auto & nested_column = map_column.getNestedColumn(); @@ -288,13 +289,26 @@ public: for (auto & key : keys) { key_column.insert(key); - nested_func->insertResultInto(merged_maps[key], val_column, arena); + if constexpr (merge) + nested_func->insertMergeResultInto(merged_maps[key], val_column, arena); + else + nested_func->insertResultInto(merged_maps[key], val_column, arena); } IColumn::Offsets & res_offsets = nested_column.getOffsets(); res_offsets.push_back(val_column.size()); } + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + bool allocatesMemoryInArena() const override { return true; } AggregateFunctionPtr getNestedFunction() const override { return nested_func; } diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index 3bb6f6fb6d2..b469637ed9f 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -163,14 +163,18 @@ public: } } - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + template + void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const { if constexpr (result_is_nullable) { ColumnNullable & to_concrete = assert_cast(to); if (getFlag(place)) { - nested_function->insertResultInto(nestedPlace(place), to_concrete.getNestedColumn(), arena); + if constexpr (merge) + nested_function->insertMergeResultInto(nestedPlace(place), to_concrete.getNestedColumn(), arena); + else + nested_function->insertResultInto(nestedPlace(place), to_concrete.getNestedColumn(), arena); to_concrete.getNullMapData().push_back(0); } else @@ -180,10 +184,23 @@ public: } else { - nested_function->insertResultInto(nestedPlace(place), to, arena); + if constexpr (merge) + nested_function->insertMergeResultInto(nestedPlace(place), to, arena); + else + nested_function->insertResultInto(nestedPlace(place), to, arena); } } + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + bool allocatesMemoryInArena() const override { return nested_function->allocatesMemoryInArena(); diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h index c5a0d60224a..eff4fb2bdc0 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.h +++ b/src/AggregateFunctions/AggregateFunctionOrFill.h @@ -265,10 +265,11 @@ public: } } - void insertResultInto( + template + void insertResultIntoImpl( AggregateDataPtr __restrict place, IColumn & to, - Arena * arena) const override + Arena * arena) const { if (place[size_of_data]) { @@ -277,7 +278,12 @@ public: // -OrNull if (inner_nullable) - nested_function->insertResultInto(place, to, arena); + { + if constexpr (merge) + nested_function->insertMergeResultInto(place, to, arena); + else + nested_function->insertResultInto(place, to, arena); + } else { ColumnNullable & col = typeid_cast(to); @@ -289,14 +295,26 @@ public: else { // -OrDefault - - nested_function->insertResultInto(place, to, arena); + if constexpr (merge) + nested_function->insertMergeResultInto(place, to, arena); + else + nested_function->insertResultInto(place, to, arena); } } else to.insertDefault(); } + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + AggregateFunctionPtr getNestedFunction() const override { return nested_function; } }; diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h index 471a6820939..fe04ada1a77 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.h +++ b/src/AggregateFunctions/AggregateFunctionResample.h @@ -195,17 +195,33 @@ public: return std::make_shared(nested_function->getReturnType()); } - void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + template + void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const { auto & col = assert_cast(to); auto & col_offsets = assert_cast(col.getOffsetsColumn()); for (size_t i = 0; i < total; ++i) - nested_function->insertResultInto(place + i * size_of_data, col.getData(), arena); + { + if constexpr (merge) + nested_function->insertMergeResultInto(place + i * size_of_data, col.getData(), arena); + else + nested_function->insertResultInto(place + i * size_of_data, col.getData(), arena); + } col_offsets.getData().push_back(col.getData().size()); } + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + insertResultIntoImpl(place, to, arena); + } + AggregateFunctionPtr getNestedFunction() const override { return nested_function; } }; diff --git a/src/AggregateFunctions/AggregateFunctionState.h b/src/AggregateFunctions/AggregateFunctionState.h index 6ab3dbab625..20ccb2e543c 100644 --- a/src/AggregateFunctions/AggregateFunctionState.h +++ b/src/AggregateFunctions/AggregateFunctionState.h @@ -111,6 +111,11 @@ public: assert_cast(to).getData().push_back(place); } + void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + assert_cast(to).insertFrom(place); + } + /// Aggregate function or aggregate function state. bool isState() const override { return true; } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 7a4feebbe0f..7c4fea493bf 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -164,6 +164,18 @@ public: /// window function. virtual void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const = 0; + /// Special method for aggregate functions with -State combinator, it behaves the same way as insertResultInto, + /// but if we need to insert AggregateData into ColumnAggregateFunction we use special method + /// insertInto that inserts default value and then performs merge with provided AggregateData + /// instead of just copying pointer to this AggregateData. Used in WindowTransform. + virtual void insertMergeResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const + { + if (isState()) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} is marked as State but method insertMergeResultInto is not implemented"); + + insertResultInto(place, to, arena); + } + /// Used for machine learning methods. Predict result from trained model. /// Will insert result into `to` column for rows in range [offset, offset + limit). virtual void predictValues( diff --git a/src/DataTypes/transformTypesRecursively.cpp b/src/DataTypes/transformTypesRecursively.cpp index 3544c7e477d..57128966565 100644 --- a/src/DataTypes/transformTypesRecursively.cpp +++ b/src/DataTypes/transformTypesRecursively.cpp @@ -175,4 +175,10 @@ void transformTypesRecursively(DataTypes & types, std::function callback) +{ + DataTypes types = {type}; + transformTypesRecursively(types, [callback](auto & data_types){ callback(data_types[0]); }, {}); +} + } diff --git a/src/DataTypes/transformTypesRecursively.h b/src/DataTypes/transformTypesRecursively.h index 5cb8f095494..54e6f2102ad 100644 --- a/src/DataTypes/transformTypesRecursively.h +++ b/src/DataTypes/transformTypesRecursively.h @@ -14,4 +14,6 @@ namespace DB /// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types. void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types); +void callOnNestedSimpleTypes(DataTypePtr & type, std::function callback); + } diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index 3ad0ce5cfc4..1d593038458 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -145,12 +146,17 @@ Block NativeReader::read() readBinary(type_name, istr); column.type = data_type_factory.get(type_name); - const auto * aggregate_function_data_type = typeid_cast(column.type.get()); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) + auto callback = [&](DataTypePtr & type) { - auto version = aggregate_function_data_type->getVersionFromRevision(server_revision); - aggregate_function_data_type->setVersion(version, /*if_empty=*/ true); - } + const auto * aggregate_function_data_type = typeid_cast(type.get()); + if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) + { + auto version = aggregate_function_data_type->getVersionFromRevision(server_revision); + aggregate_function_data_type->setVersion(version, /*if_empty=*/ true); + } + }; + + callOnNestedSimpleTypes(column.type, callback); SerializationPtr serialization; if (server_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index 9d4cfb68d56..a33f0581c3c 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -11,10 +11,10 @@ #include #include -#include -#include #include +#include #include +#include namespace DB { @@ -116,19 +116,24 @@ void NativeWriter::write(const Block & block) writeStringBinary(column.name, ostr); bool include_version = client_revision >= DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING; - const auto * aggregate_function_data_type = typeid_cast(column.type.get()); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) + auto callback = [&](DataTypePtr & type) { - if (include_version) + const auto * aggregate_function_data_type = typeid_cast(type.get()); + if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) { - auto version = aggregate_function_data_type->getVersionFromRevision(client_revision); - aggregate_function_data_type->setVersion(version, /* if_empty */true); + if (include_version) + { + auto version = aggregate_function_data_type->getVersionFromRevision(client_revision); + aggregate_function_data_type->setVersion(version, /* if_empty */true); + } + else + { + aggregate_function_data_type->setVersion(0, /* if_empty */false); + } } - else - { - aggregate_function_data_type->setVersion(0, /* if_empty */false); - } - } + }; + + callOnNestedSimpleTypes(column.type, callback); /// Type String type_name = column.type->getName(); diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index fd16f1fc986..c93e67d4b1c 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -152,13 +152,6 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume MutableColumnPtr result_holder = result_type->createColumn(); IColumn & res_col = *result_holder; - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(&res_col); - - if (!res_col_aggregate_function && agg_func.isState()) - throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " - + result_type->getName(), ErrorCodes::ILLEGAL_COLUMN); - PODArray places(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { @@ -190,10 +183,9 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume } for (size_t i = 0; i < input_rows_count; ++i) - if (!res_col_aggregate_function) - agg_func.insertResultInto(places[i], res_col, arena.get()); - else - res_col_aggregate_function->insertFrom(places[i]); + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + agg_func.insertMergeResultInto(places[i], res_col, arena.get()); return result_holder; } diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index d2a382e86ba..11d5e03eb3d 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -202,13 +202,6 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( result_arr->getOffsets().insert(ranges_offsets->begin(), ranges_offsets->end()); - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(&result_data); - - if (!res_col_aggregate_function && agg_func.isState()) - throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " - + result_type->getName(), ErrorCodes::ILLEGAL_COLUMN); - /// Perform the aggregation size_t begin = 0; @@ -379,11 +372,9 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( for (size_t k = local_begin; k < local_end; ++k) true_func->add(place, aggregate_arguments, begin + k, arena.get()); } - - if (!res_col_aggregate_function) - agg_func.insertResultInto(place, result_data, arena.get()); - else - res_col_aggregate_function->insertFrom(place); + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + agg_func.insertMergeResultInto(place, result_data, arena.get()); } } diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index b7dcce9c188..385a9571e82 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -114,13 +114,6 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam MutableColumnPtr result_holder = result_type->createColumn(); IColumn & res_col = *result_holder; - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(&res_col); - - if (!res_col_aggregate_function && agg_func.isState()) - throw Exception("State function " + agg_func.getName() + " inserts results into non-state column " - + result_type->getName(), ErrorCodes::ILLEGAL_COLUMN); - PODArray places(input_rows_count); for (size_t i = 0; i < input_rows_count; ++i) { @@ -151,10 +144,9 @@ ColumnPtr FunctionInitializeAggregation::executeImpl(const ColumnsWithTypeAndNam } for (size_t i = 0; i < input_rows_count; ++i) - if (!res_col_aggregate_function) - agg_func.insertResultInto(places[i], res_col, arena.get()); - else - res_col_aggregate_function->insertFrom(places[i]); + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + agg_func.insertMergeResultInto(places[i], res_col, arena.get()); return result_holder; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index e66fe543ab0..2428f8f29ad 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -481,9 +482,13 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( { column_type = DataTypeFactory::instance().get(col_decl.type); - const auto * aggregate_function_type = typeid_cast(column_type.get()); - if (attach && aggregate_function_type && aggregate_function_type->isVersioned()) - aggregate_function_type->setVersion(0, /* if_empty */true); + auto callback = [&](DataTypePtr & type) + { + const auto * aggregate_function_type = typeid_cast(type.get()); + if (attach && aggregate_function_type && aggregate_function_type->isVersioned()) + aggregate_function_type->setVersion(0, /* if_empty */true); + }; + callOnNestedSimpleTypes(column_type, callback); if (col_decl.null_modifier) { diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 4155ab7c7f1..b8feaf13b8f 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -28,7 +28,6 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; - extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -992,22 +991,9 @@ void WindowTransform::writeOutCurrentRow() // FIXME does it also allocate the result on the arena? // We'll have to pass it out with blocks then... - if (a->isState()) - { - /// AggregateFunction's states should be inserted into column using specific way - auto * res_col_aggregate_function = typeid_cast(result_column); - if (!res_col_aggregate_function) - { - throw Exception("State function " + a->getName() + " inserts results into non-state column ", - ErrorCodes::ILLEGAL_COLUMN); - } - res_col_aggregate_function->insertFrom(buf); - } - else - { - a->insertResultInto(buf, *result_column, arena.get()); - } - + /// We should use insertMergeResultInto to insert result into ColumnAggregateFunction + /// correctly if result contains AggregateFunction's states + a->insertMergeResultInto(buf, *result_column, arena.get()); } } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index a52de88321c..a570019f65d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1208,11 +1209,15 @@ void IMergeTreeDataPart::loadColumns(bool require) auto in = metadata_manager->read("columns.txt"); loaded_columns.readText(*in); - for (const auto & column : loaded_columns) + for (auto & column : loaded_columns) { - const auto * aggregate_function_data_type = typeid_cast(column.type.get()); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) - aggregate_function_data_type->setVersion(0, /* if_empty */true); + auto callback = [](DataTypePtr & type) + { + const auto * aggregate_function_data_type = typeid_cast(type.get()); + if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) + aggregate_function_data_type->setVersion(0, /* if_empty */true); + }; + callOnNestedSimpleTypes(column.type, callback); } } From c66f41230090835c7bbdbcff536265c9c92148b2 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 18 Sep 2022 07:11:52 +0000 Subject: [PATCH 026/252] pass session certificate for showCertificate() --- src/Functions/FunctionShowCertificate.h | 20 +++++++++++++++++--- src/Interpreters/ClientInfo.h | 1 + src/Interpreters/Session.cpp | 3 ++- src/Interpreters/Session.h | 2 +- src/Server/TCPHandler.cpp | 3 ++- src/Server/TCPHandler.h | 1 + src/Server/TCPProtocolStackData.h | 1 + src/Server/TLSHandler.h | 1 + 8 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionShowCertificate.h b/src/Functions/FunctionShowCertificate.h index 0724158f66b..832f80f8b1b 100644 --- a/src/Functions/FunctionShowCertificate.h +++ b/src/Functions/FunctionShowCertificate.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include @@ -33,14 +35,18 @@ class FunctionShowCertificate : public IFunction public: static constexpr auto name = "showCertificate"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr ctx) { #if !defined(USE_SSL) || USE_SSL == 0 throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support is disabled"); #endif - return std::make_shared(); + return std::make_shared(ctx->getQueryContext()->getClientInfo().certificate); } + std::string certificate; + + explicit FunctionShowCertificate(const std::string & certificate_ = "") : certificate(certificate_) {} + String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -61,7 +67,15 @@ public: if (input_rows_count) { #if USE_SSL - if (const X509 * cert = SSL_CTX_get0_certificate(Poco::Net::SSLManager::instance().defaultServerContext()->sslContext())) + std::unique_ptr x509_cert; + if (!certificate.empty()) + x509_cert = std::make_unique(certificate); + + const X509 * cert = x509_cert ? + x509_cert->certificate() : + SSL_CTX_get0_certificate(Poco::Net::SSLManager::instance().defaultServerContext()->sslContext()); + + if (cert) { BIO * b = BIO_new(BIO_s_mem()); SCOPE_EXIT( diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index a1096b99325..f7a172b226d 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -69,6 +69,7 @@ public: Interface interface = Interface::TCP; bool is_secure = false; + String certificate; /// For tcp String os_user; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 52588a5f4cc..7639dec813d 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -244,7 +244,7 @@ void Session::shutdownNamedSessions() NamedSessionsStorage::instance().shutdown(); } -Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure) +Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure, const std::string & certificate) : auth_id(UUIDHelpers::generateV4()), global_context(global_context_), log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session")) @@ -252,6 +252,7 @@ Session::Session(const ContextPtr & global_context_, ClientInfo::Interface inter prepared_client_info.emplace(); prepared_client_info->interface = interface_; prepared_client_info->is_secure = is_secure; + prepared_client_info->certificate = certificate; } Session::~Session() diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index ed4f7809dee..0f17c378915 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -32,7 +32,7 @@ public: /// Stops using named sessions. The method must be called at the server shutdown. static void shutdownNamedSessions(); - Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false); + Session(const ContextPtr & global_context_, ClientInfo::Interface interface_, bool is_secure = false, const std::string & certificate = ""); ~Session(); Session(const Session &&) = delete; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index eaa4e083a1f..fe30655c19a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -115,6 +115,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N , tcp_server(tcp_server_) , log(&Poco::Logger::get("TCPHandler")) , forwarded_for(stack_data.forwarded_for) + , certificate(stack_data.certificate) , server_display_name(std::move(server_display_name_)) { } @@ -1065,7 +1066,7 @@ std::unique_ptr TCPHandler::makeSession() { auto interface = is_interserver_mode ? ClientInfo::Interface::TCP_INTERSERVER : ClientInfo::Interface::TCP; - auto res = std::make_unique(server.context(), interface, socket().secure()); + auto res = std::make_unique(server.context(), interface, socket().secure(), certificate); auto & client_info = res->getClientInfo(); client_info.forwarded_for = forwarded_for; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 13c3c5f70c1..c36ce1e9378 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -153,6 +153,7 @@ private: Poco::Logger * log; String forwarded_for; + String certificate; String client_name; UInt64 client_version_major = 0; diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h index bc90de8c678..ea5641ec775 100644 --- a/src/Server/TCPProtocolStackData.h +++ b/src/Server/TCPProtocolStackData.h @@ -10,6 +10,7 @@ struct TCPProtocolStackData { Poco::Net::StreamSocket socket; std::string forwarded_for; + std::string certificate; }; } diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 32f0ca59776..5b7377515c1 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -43,6 +43,7 @@ public: ctx = new Context(Context::Usage::SERVER_USE, key, certificate, ctx->getCAPaths().caLocation); socket() = SecureStreamSocket::attach(socket(), ctx); stack_data.socket = socket(); + stack_data.certificate = certificate; #else throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; From fc78af3f6974f3f3db843de31bf1303c75c0fbe5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 19 Sep 2022 02:01:09 +0000 Subject: [PATCH 027/252] add 'default_database' endpoint config parameter --- src/Server/TCPHandler.cpp | 6 +++++- src/Server/TCPProtocolStackData.h | 1 + src/Server/TCPProtocolStackFactory.h | 2 +- src/Server/TCPProtocolStackHandler.h | 11 ++++++++--- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fe30655c19a..9ff572c6bb5 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -116,6 +116,7 @@ TCPHandler::TCPHandler(IServer & server_, TCPServer & tcp_server_, const Poco::N , log(&Poco::Logger::get("TCPHandler")) , forwarded_for(stack_data.forwarded_for) , certificate(stack_data.certificate) + , default_database(stack_data.default_database) , server_display_name(std::move(server_display_name_)) { } @@ -1093,6 +1094,7 @@ void TCPHandler::receiveHello() UInt64 packet_type = 0; String user; String password; + String default_db; readVarUInt(packet_type, *in); if (packet_type != Protocol::Client::Hello) @@ -1114,7 +1116,9 @@ void TCPHandler::receiveHello() readVarUInt(client_version_minor, *in); // NOTE For backward compatibility of the protocol, client cannot send its version_patch. readVarUInt(client_tcp_protocol_version, *in); - readStringBinary(default_database, *in); + readStringBinary(default_db, *in); + if (!default_db.empty()) + default_database = default_db; readStringBinary(user, *in); readStringBinary(password, *in); diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h index ea5641ec775..f2d00d8a845 100644 --- a/src/Server/TCPProtocolStackData.h +++ b/src/Server/TCPProtocolStackData.h @@ -11,6 +11,7 @@ struct TCPProtocolStackData Poco::Net::StreamSocket socket; std::string forwarded_for; std::string certificate; + std::string default_database; }; } diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 4acbd3e5059..50c6555fe9f 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -39,7 +39,7 @@ public: try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); - return new TCPProtocolStackHandler(tcp_server, socket, stack, conf_name); + return new TCPProtocolStackHandler(server, tcp_server, socket, stack, conf_name); } catch (const Poco::Net::NetException &) { diff --git a/src/Server/TCPProtocolStackHandler.h b/src/Server/TCPProtocolStackHandler.h index 7b513298022..9ca388da17b 100644 --- a/src/Server/TCPProtocolStackHandler.h +++ b/src/Server/TCPProtocolStackHandler.h @@ -3,7 +3,9 @@ #include #include #include -#include "Server/TCPProtocolStackData.h" +#include +#include +#include namespace DB @@ -15,19 +17,22 @@ class TCPProtocolStackHandler : public Poco::Net::TCPServerConnection using StreamSocket = Poco::Net::StreamSocket; using TCPServerConnection = Poco::Net::TCPServerConnection; private: + IServer & server; TCPServer & tcp_server; std::list stack; std::string conf_name; public: - TCPProtocolStackHandler(TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) - : TCPServerConnection(socket), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) + TCPProtocolStackHandler(IServer & server_, TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + : TCPServerConnection(socket), server(server_), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) {} void run() override { + const auto & conf = server.config(); TCPProtocolStackData stack_data; stack_data.socket = socket(); + stack_data.default_database = conf.getString(conf_name + ".default_database", ""); for (auto & factory : stack) { std::unique_ptr connection(factory->createConnection(socket(), tcp_server, stack_data)); From 4ccfbedea45a1b2b47310e8c83c250643fc34e15 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 20 Sep 2022 17:10:18 +0000 Subject: [PATCH 028/252] add allowed networks for endpoint --- src/Server/TCPProtocolStackFactory.h | 31 ++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 50c6555fe9f..c0ec29411d4 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -6,11 +6,18 @@ #include #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; + extern const int IP_ADDRESS_NOT_ALLOWED; +} + class TCPProtocolStackFactory : public TCPServerConnectionFactory { @@ -19,6 +26,7 @@ private: Poco::Logger * log; std::string conf_name; std::list stack; + AllowedClientHosts allowed_client_hosts; class DummyTCPHandler : public Poco::Net::TCPServerConnection { @@ -32,10 +40,33 @@ public: explicit TCPProtocolStackFactory(IServer & server_, const std::string & conf_name_, T... factory) : server(server_), log(&Poco::Logger::get("TCPProtocolStackFactory")), conf_name(conf_name_), stack({factory...}) { + const auto & config = server.config(); + /// Fill list of allowed hosts. + const auto networks_config = conf_name + ".networks"; + if (config.has(networks_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(networks_config, keys); + for (const String & key : keys) + { + String value = config.getString(networks_config + "." + key); + if (key.starts_with("ip")) + allowed_client_hosts.addSubnet(value); + else if (key.starts_with("host_regexp")) + allowed_client_hosts.addNameRegexp(value); + else if (key.starts_with("host")) + allowed_client_hosts.addName(value); + else + throw Exception("Unknown address pattern type: " + key, ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE); + } + } } Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override { + if (!allowed_client_hosts.empty() && !allowed_client_hosts.contains(socket.peerAddress().host())) + throw Exception("Connections from " + socket.peerAddress().toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + try { LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString()); From eefbbf53e854183e86a3941f50f6c69e1578c7f6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 21 Sep 2022 11:53:54 +0000 Subject: [PATCH 029/252] Extract S3 logic --- programs/keeper/CMakeLists.txt | 1 + src/Coordination/KeeperDispatcher.cpp | 271 +---------------- src/Coordination/KeeperDispatcher.h | 27 +- src/Coordination/KeeperSnapshotManagerS3.cpp | 293 +++++++++++++++++++ src/Coordination/KeeperSnapshotManagerS3.h | 63 ++++ 5 files changed, 365 insertions(+), 290 deletions(-) create mode 100644 src/Coordination/KeeperSnapshotManagerS3.cpp create mode 100644 src/Coordination/KeeperSnapshotManagerS3.h diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index a5ad506abe6..5436bcfed1e 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -45,6 +45,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManagerS3.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 060b7061616..177996d8ee8 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -9,23 +9,6 @@ #include #include -#if USE_AWS_S3 -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#endif #include #include @@ -54,14 +37,9 @@ namespace ErrorCodes KeeperDispatcher::KeeperDispatcher() : responses_queue(std::numeric_limits::max()) -#if USE_AWS_S3 - , snapshots_s3_queue(std::numeric_limits::max()) -#endif , configuration_and_settings(std::make_shared()) , log(&Poco::Logger::get("KeeperDispatcher")) -{ -} - +{} void KeeperDispatcher::requestThread() { @@ -220,16 +198,11 @@ void KeeperDispatcher::snapshotThread() { [[maybe_unused]] auto snapshot_path = task.create_snapshot(std::move(task.snapshot)); -#if USE_AWS_S3 if (snapshot_path.empty()) continue; - if (isLeader() && getSnapshotS3Client() != nullptr) - { - if (!snapshots_s3_queue.push(snapshot_path)) - LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", snapshot_path); - } -#endif + if (isLeader()) + snapshot_s3.uploadSnapshot(snapshot_path); } catch (...) { @@ -238,222 +211,6 @@ void KeeperDispatcher::snapshotThread() } } -#if USE_AWS_S3 -struct KeeperDispatcher::S3Configuration -{ - S3Configuration(S3::URI uri_, S3::AuthSettings auth_settings_, std::shared_ptr client_) - : uri(std::move(uri_)) - , auth_settings(std::move(auth_settings_)) - , client(std::move(client_)) - {} - - S3::URI uri; - S3::AuthSettings auth_settings; - std::shared_ptr client; -}; - -void KeeperDispatcher::updateS3Configuration(const Poco::Util::AbstractConfiguration & config) -{ - try - { - const std::string config_prefix = "keeper_server.s3_snapshot"; - - if (!config.has(config_prefix)) - { - std::lock_guard client_lock{snapshot_s3_client_mutex}; - if (snapshot_s3_client) - LOG_INFO(log, "S3 configuration was removed"); - snapshot_s3_client = nullptr; - return; - } - - auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); - - auto endpoint = config.getString(config_prefix + ".endpoint"); - auto new_uri = S3::URI{Poco::URI(endpoint)}; - - std::unique_lock client_lock{snapshot_s3_client_mutex}; - - if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings - && snapshot_s3_client->uri.uri == new_uri.uri) - return; - - LOG_INFO(log, "S3 configuration was updated"); - - client_lock.unlock(); - - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key); - HeaderCollection headers = auth_settings.headers; - - static constexpr size_t s3_max_redirects = 10; - static constexpr bool enable_s3_requests_logging = false; - - if (!new_uri.key.empty()) - { - LOG_ERROR(log, "Invalid endpoint defined for S3, it shouldn't contain key, endpoint: {}", endpoint); - return; - } - - S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - auth_settings.region, - RemoteHostFilter(), s3_max_redirects, - enable_s3_requests_logging, - /* for_disk_s3 = */ false); - - client_configuration.endpointOverride = new_uri.endpoint; - - auto client = S3::ClientFactory::instance().create( - client_configuration, - new_uri.is_virtual_hosted_style, - credentials.GetAWSAccessKeyId(), - credentials.GetAWSSecretKey(), - auth_settings.server_side_encryption_customer_key_base64, - std::move(headers), - auth_settings.use_environment_credentials.value_or(false), - auth_settings.use_insecure_imds_request.value_or(false)); - - auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); - - client_lock.lock(); - snapshot_s3_client = std::move(new_client); - client_lock.unlock(); - LOG_INFO(log, "S3 client was updated"); - } - catch (...) - { - LOG_ERROR(log, "Failed to create an S3 client for snapshots"); - tryLogCurrentException(__PRETTY_FUNCTION__); - } -} - -void KeeperDispatcher::snapshotS3Thread() -{ - setThreadName("KeeperS3SnpT"); - - auto uuid = UUIDHelpers::generateV4(); - while (!shutdown_called) - { - std::string snapshot_path; - if (!snapshots_s3_queue.pop(snapshot_path)) - break; - - if (shutdown_called) - break; - - try - { - auto s3_client = getSnapshotS3Client(); - if (s3_client == nullptr) - continue; - - LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); - ReadBufferFromFile snapshot_file(snapshot_path); - - S3Settings::ReadWriteSettings read_write_settings; - read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000; - - const auto create_writer = [&](const auto & key) - { - return WriteBufferFromS3 - { - s3_client->client, - s3_client->uri.bucket, - key, - read_write_settings - }; - }; - - auto snapshot_name = fs::path(snapshot_path).filename().string(); - auto lock_file = fmt::format(".{}_LOCK", snapshot_name); - - const auto file_exists = [&](const auto & key) - { - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(s3_client->uri.bucket); - request.SetKey(key); - auto outcome = s3_client->client->HeadObject(request); - - if (outcome.IsSuccess()) - return true; - - const auto & error = outcome.GetError(); - if (error.GetErrorType() != Aws::S3::S3Errors::NO_SUCH_KEY && error.GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - throw S3Exception(error.GetErrorType(), "Failed to verify existence of lock file: {}", error.GetMessage()); - - return false; - }; - - if (file_exists(snapshot_name)) - { - LOG_ERROR(log, "Snapshot {} already exists", snapshot_name); - continue; - } - - // First we need to verify that there isn't already a lock file for the snapshot we want to upload - if (file_exists(lock_file)) - { - LOG_ERROR(log, "Lock file for {} already, exists. Probably a different node is already uploading the snapshot", snapshot_name); - continue; - } - - // We write our UUID to lock file - LOG_DEBUG(log, "Trying to create a lock file"); - WriteBufferFromS3 lock_writer = create_writer(lock_file); - writeUUIDText(uuid, lock_writer); - lock_writer.finalize(); - - const auto read_lock_file = [&]() -> std::string - { - ReadBufferFromS3 lock_reader - { - s3_client->client, - s3_client->uri.bucket, - lock_file, - "", - 1, - {} - }; - - std::string read_uuid; - readStringUntilEOF(read_uuid, lock_reader); - - return read_uuid; - }; - - // We read back the written UUID, if it's the same we can upload the file - auto read_uuid = read_lock_file(); - - if (read_uuid != toString(uuid)) - { - LOG_ERROR(log, "Failed to create a lock file"); - continue; - } - - WriteBufferFromS3 snapshot_writer = create_writer(snapshot_name); - copyData(snapshot_file, snapshot_writer); - snapshot_writer.finalize(); - - LOG_INFO(log, "Successfully uploaded {} to S3", snapshot_path); - - LOG_INFO(log, "Removing lock file"); - Aws::S3::Model::DeleteObjectRequest delete_request; - delete_request.SetBucket(s3_client->uri.bucket); - delete_request.SetKey(lock_file); - auto delete_outcome = s3_client->client->DeleteObject(delete_request); - - if (!delete_outcome.IsSuccess()) - throw S3Exception(delete_outcome.GetError().GetMessage(), delete_outcome.GetError().GetErrorType()); - - } - catch (...) - { - LOG_INFO(log, "Failure during upload of {} to S3", snapshot_path); - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } -} -#endif - void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) { std::lock_guard lock(session_to_response_callback_mutex); @@ -539,9 +296,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf responses_thread = ThreadFromGlobalPool([this] { responseThread(); }); snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); }); -#if USE_AWS_S3 - snapshot_s3_thread = ThreadFromGlobalPool([this] { snapshotS3Thread(); }); -#endif + snapshot_s3.startup(); server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue); @@ -607,11 +362,7 @@ void KeeperDispatcher::shutdown() if (snapshot_thread.joinable()) snapshot_thread.join(); -#if USE_AWS_S3 - snapshots_s3_queue.finish(); - if (snapshot_s3_thread.joinable()) - snapshot_s3_thread.join(); -#endif + snapshot_s3.shutdown(); update_configuration_queue.finish(); if (update_configuration_thread.joinable()) @@ -803,14 +554,6 @@ void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, Keep requests_for_sessions.clear(); } -#if USE_AWS_S3 -std::shared_ptr KeeperDispatcher::getSnapshotS3Client() const -{ - std::lock_guard lock{snapshot_s3_client_mutex}; - return snapshot_s3_client; -} -#endif - int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) { /// New session id allocation is a special request, because we cannot process it in normal @@ -951,9 +694,7 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push configuration update to queue"); } -#if USE_AWS_S3 - updateS3Configuration(config); -#endif + snapshot_s3.updateS3Configuration(config); } void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms) diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 04593813dce..b4644a34422 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB { @@ -36,11 +37,6 @@ private: ResponsesQueue responses_queue; SnapshotsQueue snapshots_queue{1}; -#if USE_AWS_S3 - using SnapshotBackupQueue = ConcurrentBoundedQueue; - SnapshotBackupQueue snapshots_s3_queue; -#endif - /// More than 1k updates is definitely misconfiguration. UpdateConfigurationQueue update_configuration_queue{1000}; @@ -67,10 +63,6 @@ private: ThreadFromGlobalPool session_cleaner_thread; /// Dumping new snapshots to disk ThreadFromGlobalPool snapshot_thread; -#if USE_AWS_S3 - /// Upload new snapshots to S3 - ThreadFromGlobalPool snapshot_s3_thread; -#endif /// Apply or wait for configuration changes ThreadFromGlobalPool update_configuration_thread; @@ -86,11 +78,7 @@ private: /// Counter for new session_id requests. std::atomic internal_session_id_counter{0}; -#if USE_AWS_S3 - struct S3Configuration; - mutable std::mutex snapshot_s3_client_mutex; - std::shared_ptr snapshot_s3_client; -#endif + KeeperSnapshotManagerS3 snapshot_s3; /// Thread put requests to raft void requestThread(); @@ -100,10 +88,6 @@ private: void sessionCleanerTask(); /// Thread create snapshots in the background void snapshotThread(); -#if USE_AWS_S3 - /// Thread upload snapshots to S3 in the background - void snapshotS3Thread(); -#endif /// Thread apply or wait configuration changes from leader void updateConfigurationThread(); @@ -117,10 +101,6 @@ private: /// Clears both arguments void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); -#if USE_AWS_S3 - std::shared_ptr getSnapshotS3Client() const; -#endif - public: /// Just allocate some objects, real initialization is done by `intialize method` KeeperDispatcher(); @@ -146,9 +126,6 @@ public: /// Registered in ConfigReloader callback. Add new configuration changes to /// update_configuration_queue. Keeper Dispatcher apply them asynchronously. void updateConfiguration(const Poco::Util::AbstractConfiguration & config); -#if USE_AWS_S3 - void updateS3Configuration(const Poco::Util::AbstractConfiguration & config); -#endif /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp new file mode 100644 index 00000000000..770d8dc94a6 --- /dev/null +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -0,0 +1,293 @@ +#include + +#if USE_AWS_S3 +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace fs = std::filesystem; + +namespace DB +{ + +struct KeeperSnapshotManagerS3::S3Configuration +{ + S3Configuration(S3::URI uri_, S3::AuthSettings auth_settings_, std::shared_ptr client_) + : uri(std::move(uri_)) + , auth_settings(std::move(auth_settings_)) + , client(std::move(client_)) + {} + + S3::URI uri; + S3::AuthSettings auth_settings; + std::shared_ptr client; +}; + +KeeperSnapshotManagerS3::KeeperSnapshotManagerS3() + : snapshots_s3_queue(std::numeric_limits::max()) + , log(&Poco::Logger::get("KeeperSnapshotManagerS3")) +{} + +void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractConfiguration & config) +{ + try + { + const std::string config_prefix = "keeper_server.s3_snapshot"; + + if (!config.has(config_prefix)) + { + std::lock_guard client_lock{snapshot_s3_client_mutex}; + if (snapshot_s3_client) + LOG_INFO(log, "S3 configuration was removed"); + snapshot_s3_client = nullptr; + return; + } + + auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); + + auto endpoint = config.getString(config_prefix + ".endpoint"); + auto new_uri = S3::URI{Poco::URI(endpoint)}; + + std::unique_lock client_lock{snapshot_s3_client_mutex}; + + if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings + && snapshot_s3_client->uri.uri == new_uri.uri) + return; + + LOG_INFO(log, "S3 configuration was updated"); + + client_lock.unlock(); + + auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key); + HeaderCollection headers = auth_settings.headers; + + static constexpr size_t s3_max_redirects = 10; + static constexpr bool enable_s3_requests_logging = false; + + if (!new_uri.key.empty()) + { + LOG_ERROR(log, "Invalid endpoint defined for S3, it shouldn't contain key, endpoint: {}", endpoint); + return; + } + + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( + auth_settings.region, + RemoteHostFilter(), s3_max_redirects, + enable_s3_requests_logging, + /* for_disk_s3 = */ false); + + client_configuration.endpointOverride = new_uri.endpoint; + + auto client = S3::ClientFactory::instance().create( + client_configuration, + new_uri.is_virtual_hosted_style, + credentials.GetAWSAccessKeyId(), + credentials.GetAWSSecretKey(), + auth_settings.server_side_encryption_customer_key_base64, + std::move(headers), + auth_settings.use_environment_credentials.value_or(false), + auth_settings.use_insecure_imds_request.value_or(false)); + + auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); + + client_lock.lock(); + snapshot_s3_client = std::move(new_client); + client_lock.unlock(); + LOG_INFO(log, "S3 client was updated"); + } + catch (...) + { + LOG_ERROR(log, "Failed to create an S3 client for snapshots"); + tryLogCurrentException(__PRETTY_FUNCTION__); + } +} +std::shared_ptr KeeperSnapshotManagerS3::getSnapshotS3Client() const +{ + std::lock_guard lock{snapshot_s3_client_mutex}; + return snapshot_s3_client; +} + +void KeeperSnapshotManagerS3::snapshotS3Thread() +{ + setThreadName("KeeperS3SnpT"); + + auto uuid = UUIDHelpers::generateV4(); + while (!shutdown_called) + { + std::string snapshot_path; + if (!snapshots_s3_queue.pop(snapshot_path)) + break; + + if (shutdown_called) + break; + + try + { + auto s3_client = getSnapshotS3Client(); + if (s3_client == nullptr) + continue; + + LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); + ReadBufferFromFile snapshot_file(snapshot_path); + + S3Settings::ReadWriteSettings read_write_settings; + read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000; + + const auto create_writer = [&](const auto & key) + { + return WriteBufferFromS3 + { + s3_client->client, + s3_client->uri.bucket, + key, + read_write_settings + }; + }; + + auto snapshot_name = fs::path(snapshot_path).filename().string(); + auto lock_file = fmt::format(".{}_LOCK", snapshot_name); + + const auto file_exists = [&](const auto & key) + { + Aws::S3::Model::HeadObjectRequest request; + request.SetBucket(s3_client->uri.bucket); + request.SetKey(key); + auto outcome = s3_client->client->HeadObject(request); + + if (outcome.IsSuccess()) + return true; + + const auto & error = outcome.GetError(); + if (error.GetErrorType() != Aws::S3::S3Errors::NO_SUCH_KEY && error.GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) + throw S3Exception(error.GetErrorType(), "Failed to verify existence of lock file: {}", error.GetMessage()); + + return false; + }; + + if (file_exists(snapshot_name)) + { + LOG_ERROR(log, "Snapshot {} already exists", snapshot_name); + continue; + } + + // First we need to verify that there isn't already a lock file for the snapshot we want to upload + if (file_exists(lock_file)) + { + LOG_ERROR(log, "Lock file for {} already, exists. Probably a different node is already uploading the snapshot", snapshot_name); + continue; + } + + // We write our UUID to lock file + LOG_DEBUG(log, "Trying to create a lock file"); + WriteBufferFromS3 lock_writer = create_writer(lock_file); + writeUUIDText(uuid, lock_writer); + lock_writer.finalize(); + + const auto read_lock_file = [&]() -> std::string + { + ReadBufferFromS3 lock_reader + { + s3_client->client, + s3_client->uri.bucket, + lock_file, + "", + 1, + {} + }; + + std::string read_uuid; + readStringUntilEOF(read_uuid, lock_reader); + + return read_uuid; + }; + + // We read back the written UUID, if it's the same we can upload the file + auto read_uuid = read_lock_file(); + + if (read_uuid != toString(uuid)) + { + LOG_ERROR(log, "Failed to create a lock file"); + continue; + } + + WriteBufferFromS3 snapshot_writer = create_writer(snapshot_name); + copyData(snapshot_file, snapshot_writer); + snapshot_writer.finalize(); + + LOG_INFO(log, "Successfully uploaded {} to S3", snapshot_path); + + LOG_INFO(log, "Removing lock file"); + Aws::S3::Model::DeleteObjectRequest delete_request; + delete_request.SetBucket(s3_client->uri.bucket); + delete_request.SetKey(lock_file); + auto delete_outcome = s3_client->client->DeleteObject(delete_request); + + if (!delete_outcome.IsSuccess()) + throw S3Exception(delete_outcome.GetError().GetMessage(), delete_outcome.GetError().GetErrorType()); + + } + catch (...) + { + LOG_INFO(log, "Failure during upload of {} to S3", snapshot_path); + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +void KeeperSnapshotManagerS3::uploadSnapshot(const std::string & path) +{ + if (getSnapshotS3Client() == nullptr) + return; + + if (!snapshots_s3_queue.push(path)) + LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", path); +} + +void KeeperSnapshotManagerS3::startup() +{ + snapshot_s3_thread = ThreadFromGlobalPool([this] { snapshotS3Thread(); }); +} + +void KeeperSnapshotManagerS3::shutdown() +{ + if (shutdown_called) + return; + + LOG_DEBUG(log, "Shutting down KeeperSnapshotManagerS3"); + shutdown_called = true; + + try + { + snapshots_s3_queue.finish(); + if (snapshot_s3_thread.joinable()) + snapshot_s3_thread.join(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + LOG_INFO(log, "KeeperSnapshotManagerS3 shut down"); +} + +} + +#endif diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h new file mode 100644 index 00000000000..50464dabc2c --- /dev/null +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -0,0 +1,63 @@ +#pragma once + +#include +#include +#include + +#if USE_AWS_S3 +#include +#include +#include + +#include +#endif + +namespace DB +{ + +#if USE_AWS_S3 +class KeeperSnapshotManagerS3 +{ +public: + KeeperSnapshotManagerS3(); + + void updateS3Configuration(const Poco::Util::AbstractConfiguration & config); + void uploadSnapshot(const std::string & path); + + void startup(); + void shutdown(); +private: + using SnapshotS3Queue = ConcurrentBoundedQueue; + SnapshotS3Queue snapshots_s3_queue; + + /// Upload new snapshots to S3 + ThreadFromGlobalPool snapshot_s3_thread; + + struct S3Configuration; + mutable std::mutex snapshot_s3_client_mutex; + std::shared_ptr snapshot_s3_client; + + std::atomic shutdown_called{false}; + + Poco::Logger * log; + + /// Thread upload snapshots to S3 in the background + void snapshotS3Thread(); + std::shared_ptr getSnapshotS3Client() const; +}; +#else +class KeeperSnapshotManagerS3 +{ +public: + KeeperSnapshotManagerS3() = default; + + void updateS3Configuration(const Poco::Util::AbstractConfiguration &) {} + void uploadSnapshot(const std::string &) {} + + void startup() {} + + void shutdown() {} +}; +#endif + +} From 67fe53e274d72dc5760898c5040732754e25f97d Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 21 Sep 2022 13:01:56 +0000 Subject: [PATCH 030/252] Fix style, add tests --- src/Functions/initializeAggregation.cpp | 1 - ..._combinators_with_over_statement.reference | 50 +++++++++++++++++++ .../02428_combinators_with_over_statement.sql | 10 ++++ ...2429_combinators_in_array_reduce.reference | 6 +++ .../02429_combinators_in_array_reduce.sql | 6 +++ ...ize_aggregation_with_combinators.reference | 3 ++ ...nitialize_aggregation_with_combinators.sql | 4 ++ 7 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02428_combinators_with_over_statement.reference create mode 100644 tests/queries/0_stateless/02428_combinators_with_over_statement.sql create mode 100644 tests/queries/0_stateless/02429_combinators_in_array_reduce.reference create mode 100644 tests/queries/0_stateless/02429_combinators_in_array_reduce.sql create mode 100644 tests/queries/0_stateless/02430_initialize_aggregation_with_combinators.reference create mode 100644 tests/queries/0_stateless/02430_initialize_aggregation_with_combinators.sql diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index 385a9571e82..08352553b9c 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; } diff --git a/tests/queries/0_stateless/02428_combinators_with_over_statement.reference b/tests/queries/0_stateless/02428_combinators_with_over_statement.reference new file mode 100644 index 00000000000..55be3f35cb1 --- /dev/null +++ b/tests/queries/0_stateless/02428_combinators_with_over_statement.reference @@ -0,0 +1,50 @@ +{1:'\0ñwRß'} +{1:'\0Dõ@='} +{1:'\07¬Ž'} +{1:'\0ÁâÞ²'} +{1:'\0"Qœ'} +{1:'\0V\'ˆã'} +{1:'\0Òâ\0¨'} +{1:'\0£_Ç'} +{1:'\0qµ4h'} +{1:'\0gÔ7'} +['\0ñwRß'] +['\0Dõ@='] +['\07¬Ž'] +['\0ÁâÞ²'] +['\0"Qœ'] +['\0V\'ˆã'] +['\0Òâ\0¨'] +['\0£_Ç'] +['\0qµ4h'] +['\0gÔ7'] +['\0éZÂ','\0\0'] +['\0â4nÿ','\0\0'] +['\0“Þ“','\0\0'] +['\0¹ð1','\0\0'] +['\0‹_ Date: Thu, 22 Sep 2022 13:03:27 +0000 Subject: [PATCH 031/252] Address PR comments --- src/Coordination/KeeperDispatcher.cpp | 3 +- src/Coordination/KeeperSnapshotManagerS3.cpp | 54 +++++++++++-------- src/Coordination/KeeperSnapshotManagerS3.h | 4 +- .../test_keeper_s3_snapshot/test.py | 4 ++ 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 177996d8ee8..1b265abbfb6 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -296,7 +296,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf responses_thread = ThreadFromGlobalPool([this] { responseThread(); }); snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); }); - snapshot_s3.startup(); + snapshot_s3.startup(config); server = std::make_unique(configuration_and_settings, config, responses_queue, snapshots_queue); @@ -325,7 +325,6 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf /// Start it after keeper server start session_cleaner_thread = ThreadFromGlobalPool([this] { sessionCleanerTask(); }); update_configuration_thread = ThreadFromGlobalPool([this] { updateConfigurationThread(); }); - updateConfiguration(config); LOG_DEBUG(log, "Dispatcher initialized"); } diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 770d8dc94a6..4776b1f84c8 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -66,16 +66,16 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auto endpoint = config.getString(config_prefix + ".endpoint"); auto new_uri = S3::URI{Poco::URI(endpoint)}; - std::unique_lock client_lock{snapshot_s3_client_mutex}; - - if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings - && snapshot_s3_client->uri.uri == new_uri.uri) - return; + { + std::lock_guard client_lock{snapshot_s3_client_mutex}; + // if client is not changed (same auth settings, same endpoint) we don't need to update + if (snapshot_s3_client && snapshot_s3_client->client && auth_settings == snapshot_s3_client->auth_settings + && snapshot_s3_client->uri.uri == new_uri.uri) + return; + } LOG_INFO(log, "S3 configuration was updated"); - client_lock.unlock(); - auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key); HeaderCollection headers = auth_settings.headers; @@ -108,9 +108,10 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); - client_lock.lock(); - snapshot_s3_client = std::move(new_client); - client_lock.unlock(); + { + std::lock_guard client_lock{snapshot_s3_client_mutex}; + snapshot_s3_client = std::move(new_client); + } LOG_INFO(log, "S3 client was updated"); } catch (...) @@ -189,6 +190,7 @@ void KeeperSnapshotManagerS3::snapshotS3Thread() } // First we need to verify that there isn't already a lock file for the snapshot we want to upload + // Only leader uploads a snapshot, but there can be a rare case where we have 2 leaders in NuRaft if (file_exists(lock_file)) { LOG_ERROR(log, "Lock file for {} already, exists. Probably a different node is already uploading the snapshot", snapshot_name); @@ -228,21 +230,30 @@ void KeeperSnapshotManagerS3::snapshotS3Thread() continue; } + SCOPE_EXIT( + { + LOG_INFO(log, "Removing lock file"); + try + { + Aws::S3::Model::DeleteObjectRequest delete_request; + delete_request.SetBucket(s3_client->uri.bucket); + delete_request.SetKey(lock_file); + auto delete_outcome = s3_client->client->DeleteObject(delete_request); + if (!delete_outcome.IsSuccess()) + throw S3Exception(delete_outcome.GetError().GetMessage(), delete_outcome.GetError().GetErrorType()); + } + catch (...) + { + LOG_INFO(log, "Failed to delete lock file for {} from S3", snapshot_path); + tryLogCurrentException(__PRETTY_FUNCTION__); + } + }); + WriteBufferFromS3 snapshot_writer = create_writer(snapshot_name); copyData(snapshot_file, snapshot_writer); snapshot_writer.finalize(); LOG_INFO(log, "Successfully uploaded {} to S3", snapshot_path); - - LOG_INFO(log, "Removing lock file"); - Aws::S3::Model::DeleteObjectRequest delete_request; - delete_request.SetBucket(s3_client->uri.bucket); - delete_request.SetKey(lock_file); - auto delete_outcome = s3_client->client->DeleteObject(delete_request); - - if (!delete_outcome.IsSuccess()) - throw S3Exception(delete_outcome.GetError().GetMessage(), delete_outcome.GetError().GetErrorType()); - } catch (...) { @@ -261,8 +272,9 @@ void KeeperSnapshotManagerS3::uploadSnapshot(const std::string & path) LOG_WARNING(log, "Failed to add snapshot {} to S3 queue", path); } -void KeeperSnapshotManagerS3::startup() +void KeeperSnapshotManagerS3::startup(const Poco::Util::AbstractConfiguration & config) { + updateS3Configuration(config); snapshot_s3_thread = ThreadFromGlobalPool([this] { snapshotS3Thread(); }); } diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index 50464dabc2c..393f42696f9 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -24,7 +24,7 @@ public: void updateS3Configuration(const Poco::Util::AbstractConfiguration & config); void uploadSnapshot(const std::string & path); - void startup(); + void startup(const Poco::Util::AbstractConfiguration & config); void shutdown(); private: using SnapshotS3Queue = ConcurrentBoundedQueue; @@ -54,7 +54,7 @@ public: void updateS3Configuration(const Poco::Util::AbstractConfiguration &) {} void uploadSnapshot(const std::string &) {} - void startup() {} + void startup(const Poco::Util::AbstractConfiguration & config) {} void shutdown() {} }; diff --git a/tests/integration/test_keeper_s3_snapshot/test.py b/tests/integration/test_keeper_s3_snapshot/test.py index 9f335998507..3e19bc4822c 100644 --- a/tests/integration/test_keeper_s3_snapshot/test.py +++ b/tests/integration/test_keeper_s3_snapshot/test.py @@ -77,6 +77,8 @@ def wait_node(node): def test_s3_upload(started_cluster): node1_zk = get_fake_zk(node1.name) + # we defined in configs snapshot_distance as 50 + # so after 50 requests we should generate a snapshot for _ in range(210): node1_zk.create("/test", sequence=True) @@ -99,6 +101,8 @@ def test_s3_upload(started_cluster): destroy_zk_client(node1_zk) node1.stop_clickhouse(kill=True) + # wait for new leader to be picked and that it continues + # uploading snapshots wait_node(node2) node2_zk = get_fake_zk(node2.name) for _ in range(200): From 9da433a904b0f4b4c94d45601898188d882f47ca Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 22 Sep 2022 14:33:35 +0000 Subject: [PATCH 032/252] Fix unused --- src/Coordination/KeeperSnapshotManagerS3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index 393f42696f9..afc854e6ba3 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -54,7 +54,7 @@ public: void updateS3Configuration(const Poco::Util::AbstractConfiguration &) {} void uploadSnapshot(const std::string &) {} - void startup(const Poco::Util::AbstractConfiguration & config) {} + void startup(const Poco::Util::AbstractConfiguration &) {} void shutdown() {} }; From c23b1f4ceaa678e17562d477e734832dc1b503f1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Fri, 23 Sep 2022 10:54:12 -0400 Subject: [PATCH 033/252] remove maybe_unused --- src/Server/TLSHandlerFactory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/TLSHandlerFactory.h b/src/Server/TLSHandlerFactory.h index 8063ffa783d..9e3002d2971 100644 --- a/src/Server/TLSHandlerFactory.h +++ b/src/Server/TLSHandlerFactory.h @@ -18,7 +18,7 @@ namespace DB class TLSHandlerFactory : public TCPServerConnectionFactory { private: - IServer & server [[maybe_unused]]; + IServer & server; Poco::Logger * log; std::string conf_name; From 1407f6bcda8c237eb1c09bd6a38750563d9e28e6 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 25 Sep 2022 15:46:12 +0000 Subject: [PATCH 034/252] add test --- tests/integration/helpers/client.py | 7 ++- .../test_composable_protocols/__init__.py | 0 .../configs/client.xml | 10 +++ .../configs/config.xml | 57 +++++++++++++++++ .../configs/server.crt | 18 ++++++ .../configs/server.key | 28 +++++++++ .../configs/users.xml | 18 ++++++ .../test_composable_protocols/test.py | 63 +++++++++++++++++++ 8 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_composable_protocols/__init__.py create mode 100644 tests/integration/test_composable_protocols/configs/client.xml create mode 100644 tests/integration/test_composable_protocols/configs/config.xml create mode 100644 tests/integration/test_composable_protocols/configs/server.crt create mode 100644 tests/integration/test_composable_protocols/configs/server.key create mode 100644 tests/integration/test_composable_protocols/configs/users.xml create mode 100644 tests/integration/test_composable_protocols/test.py diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index a4407d5b442..fa7d1b379da 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -8,13 +8,18 @@ DEFAULT_QUERY_TIMEOUT = 600 class Client: - def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client"): + def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client", secure=False, config=None): self.host = host self.port = port self.command = [command] if os.path.basename(command) == "clickhouse": self.command.append("client") + + if secure: + self.command.append("--secure") + if config is not None: + self.command += ["--config-file", config] self.command += ["--host", self.host, "--port", str(self.port), "--stacktrace"] diff --git a/tests/integration/test_composable_protocols/__init__.py b/tests/integration/test_composable_protocols/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_composable_protocols/configs/client.xml b/tests/integration/test_composable_protocols/configs/client.xml new file mode 100644 index 00000000000..15d83a7b1ab --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/client.xml @@ -0,0 +1,10 @@ + + + + none + + AcceptCertificateHandler + + + + diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml new file mode 100644 index 00000000000..553128d4386 --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -0,0 +1,57 @@ + + + + + + /etc/clickhouse-server/config.d/server.crt + /etc/clickhouse-server/config.d/server.key + none + true + true + sslv2,sslv3 + true + + + + 0.0.0.0 + + + + tcp + 0.0.0.0 + 9000 + native protocol (tcp) + + + tls + tcp + 9440 + secure native protocol (tcp_secure) + + + tcp + 0.0.0.0 + 9001 + native protocol endpoint (tcp) + + + http + 8123 + http protocol + + + tls + http + 0.0.0.0 + 8443 + https protocol + + + https + 8444 + https protocol endpoint + + + + + diff --git a/tests/integration/test_composable_protocols/configs/server.crt b/tests/integration/test_composable_protocols/configs/server.crt new file mode 100644 index 00000000000..6f4deca038f --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJAIhI9ozZJ+TWMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0xOTA0MjIwNDMyNTJaFw0yMDA0MjEwNDMyNTJaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAK+wVUEdqF2uXvN0MJBgnAHyXi6JTi4p/F6igsrCjSNjJWzHH0vQmK8ujfcF +CkifW88i+W5eHctuEtQqNHK+t9x9YiZtXrj6m/XkOXs20mYgENSmbbbHbriTPnZB +zZrq6UqMlwIHNNAa+I3NMORQxVRaI0ybXnGVO5elr70xHpk03xL0JWKHpEqYp4db +2aBQgF6y3Ww4khxjIYqpUYXWXGFnVIRU7FKVEAM1xyKqvQzXjQ5sVM/wyHknveEF +3b/X4ggN+KNl5KOc0cWDh1/XaatJAPaUUPqZcq76tynLbP64Xm3dxHcj+gtRkO67 +ef6MSg6l63m3XQP6Qb+MIkd06OsCAwEAAaNQME4wHQYDVR0OBBYEFDmODTO8QLDN +ykR3x0LIOnjNhrKhMB8GA1UdIwQYMBaAFDmODTO8QLDNykR3x0LIOnjNhrKhMAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAAwaiJc7uqEpnH3aukbftDwX +m8GfEnj1HVdgg+9GGNq+9rvUYBF6gdPmjRCX9dO0cclLFx8jc2org0rTSq9WoOhX +E6qL4Eqrmc5SE3Y9jZM0h6GRD4oXK014FmtZ3T6ddZU3dQLj3BS2r1XrvmubTvGN +ZuTJNY8nx8Hh6H5XINmsEjUF9E5hog+PwCE03xt2adIdYL+gsbxASeNYyeUFpZv5 +zcXR3VoakBWnAaOVgCHq2qh96QAnL7ZKzFkGf/MdwV10KU3dmb+ICbQUUdf9Gc17 +aaDCIRws312F433FdXBkGs2UkB7ZZme9dfn6O1QbeTNvex2VLMqYx/CTkfFbOQA= +-----END CERTIFICATE----- diff --git a/tests/integration/test_composable_protocols/configs/server.key b/tests/integration/test_composable_protocols/configs/server.key new file mode 100644 index 00000000000..6eddb3295db --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCvsFVBHahdrl7z +dDCQYJwB8l4uiU4uKfxeooLKwo0jYyVsxx9L0JivLo33BQpIn1vPIvluXh3LbhLU +KjRyvrfcfWImbV64+pv15Dl7NtJmIBDUpm22x264kz52Qc2a6ulKjJcCBzTQGviN +zTDkUMVUWiNMm15xlTuXpa+9MR6ZNN8S9CVih6RKmKeHW9mgUIBest1sOJIcYyGK +qVGF1lxhZ1SEVOxSlRADNcciqr0M140ObFTP8Mh5J73hBd2/1+IIDfijZeSjnNHF +g4df12mrSQD2lFD6mXKu+rcpy2z+uF5t3cR3I/oLUZDuu3n+jEoOpet5t10D+kG/ +jCJHdOjrAgMBAAECggEARF66zrxb6RkSmmt8+rKeA6PuQu3sHsr4C1vyyjUr97l9 +tvdGlpp20LWtSZQMjHZ3pARYTTsTHTeY3DgQcRcHNicVKx8k3ZepWeeW9vw+pL+V +zSt3RsoVrH6gsCSrfr4sS3aqzX9AbjwQvh48CJ3mLQ1m70kHV+xbZIh1+4pB/hyP +1wKyUE18ZkOptXvO/TtoHzLQCecpkXtWzmry1Eh2isvXA+NMrAtLibGsyM1mtm7i +5ozevzHabvvCDBEe+KgZdONgVhhhvm2eOd+/s4w3rw4ETud4fI/ZAJyWXhiIKFnA +VJbElWruSAoVBW7p2bsF5PbmVzvo8vXL+VylxYD+AQKBgQDhLoRKTVhNkn/QjKxq +sdOh+QZra0LzjVpAmkQzu7wZMSHEz9qePQciDQQrYKrmRF1vNcIRCVUTqWYheJ/1 +lKRrCGa0ab6k96zkWMqLHD5u+UeJV7r1dJIx08ME9kNJ+x/XtB8klRIji16NiQUS +qc6p8z0M2AnbJzsRfWZRH8FeYwKBgQDHu8dzdtVGI7MtxfPOE/bfajiopDg8BdTC +pdug2T8XofRHRq7Q+0vYjTAZFT/slib91Pk6VvvPdo9VBZiL4omv4dAq6mOOdX/c +U14mJe1X5GCrr8ExZ8BfNJ3t/6sV1fcxyJwAw7iBguqxA2JqdM/wFk10K8XqvzVn +CD6O9yGt2QKBgFX1BMi8N538809vs41S7l9hCQNOQZNo/O+2M5yv6ECRkbtoQKKw +1x03bMUGNJaLuELweXE5Z8GGo5bZTe5X3F+DKHlr+DtO1C+ieUaa9HY2MAmMdLCn +2/qrREGLo+oEs4YKmuzC/taUp/ZNPKOAMISNdluFyFVg51pozPrgrVbTAoGBAKkE +LBl3O67o0t0vH8sJdeVFG8EJhlS0koBMnfgVHqC++dm+5HwPyvTrNQJkyv1HaqNt +r6FArkG3ED9gRuBIyT6+lctbIPgSUip9mbQqcBfqOCvQxGksZMur2ODncz09HLtS +CUFUXjOqNzOnq4ZuZu/Bz7U4vXiSaXxQq6+LTUKxAoGAFZU/qrI06XxnrE9A1X0W +l7DSkpZaDcu11NrZ473yONih/xOZNh4SSBpX8a7F6Pmh9BdtGqphML8NFPvQKcfP +b9H2iid2tc292uyrUEb5uTMmv61zoTwtitqLzO0+tS6PT3fXobX+eyeEWKzPBljL +HFtxG5CCXpkdnWRmaJnhTzA= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_composable_protocols/configs/users.xml b/tests/integration/test_composable_protocols/configs/users.xml new file mode 100644 index 00000000000..6f94d1696e3 --- /dev/null +++ b/tests/integration/test_composable_protocols/configs/users.xml @@ -0,0 +1,18 @@ + + + + 10000000000 + 64999 + + + + + + + + ::/0 + + default + + + diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py new file mode 100644 index 00000000000..d4607495da7 --- /dev/null +++ b/tests/integration/test_composable_protocols/test.py @@ -0,0 +1,63 @@ +import ssl +import pytest +import os.path as p +import os +from helpers.cluster import ClickHouseCluster +from helpers.client import Client +import urllib.request, urllib.parse + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) +server = cluster.add_instance("server", base_config_dir="configs", main_configs=["configs/server.crt", "configs/server.key"]) + + +@pytest.fixture(scope="module", autouse=True) +def setup_nodes(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def execute_query_https(host, port, query): + url = ( + f"https://{host}:{port}/?query={urllib.parse.quote(query)}" + ) + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + request = urllib.request.Request(url) + response = urllib.request.urlopen(request, context=ctx).read() + return response.decode("utf-8") + + +def execute_query_http(host, port, query): + url = ( + f"http://{host}:{port}/?query={urllib.parse.quote(query)}" + ) + + request = urllib.request.Request(url) + response = urllib.request.urlopen(request).read() + return response.decode("utf-8") + + +def test_connections(): + + client = Client(server.ip_address, 9000, command=cluster.client_bin_path) + assert client.query("SELECT 1") == "1\n" + + client = Client(server.ip_address, 9440, command=cluster.client_bin_path, secure=True, config=f"{SCRIPT_DIR}/configs/client.xml") + assert client.query("SELECT 1") == "1\n" + + client = Client(server.ip_address, 9001, command=cluster.client_bin_path) + assert client.query("SELECT 1") == "1\n" + + assert execute_query_http(server.ip_address, 8123, "SELECT 1") == "1\n" + + assert execute_query_https(server.ip_address, 8443, "SELECT 1") == "1\n" + + assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" From 7a6386c7329aeb81cf6cd6304fd78022cd31fd6e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 25 Sep 2022 16:13:17 +0000 Subject: [PATCH 035/252] Automatic style fix --- tests/integration/helpers/client.py | 11 ++++++++-- .../test_composable_protocols/test.py | 22 ++++++++++++------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/integration/helpers/client.py b/tests/integration/helpers/client.py index fa7d1b379da..ab1cc65e9a9 100644 --- a/tests/integration/helpers/client.py +++ b/tests/integration/helpers/client.py @@ -8,14 +8,21 @@ DEFAULT_QUERY_TIMEOUT = 600 class Client: - def __init__(self, host, port=9000, command="/usr/bin/clickhouse-client", secure=False, config=None): + def __init__( + self, + host, + port=9000, + command="/usr/bin/clickhouse-client", + secure=False, + config=None, + ): self.host = host self.port = port self.command = [command] if os.path.basename(command) == "clickhouse": self.command.append("client") - + if secure: self.command.append("--secure") if config is not None: diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index d4607495da7..c0c0e5e0a83 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -9,7 +9,11 @@ import urllib.request, urllib.parse SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) cluster = ClickHouseCluster(__file__) -server = cluster.add_instance("server", base_config_dir="configs", main_configs=["configs/server.crt", "configs/server.key"]) +server = cluster.add_instance( + "server", + base_config_dir="configs", + main_configs=["configs/server.crt", "configs/server.key"], +) @pytest.fixture(scope="module", autouse=True) @@ -22,9 +26,7 @@ def setup_nodes(): def execute_query_https(host, port, query): - url = ( - f"https://{host}:{port}/?query={urllib.parse.quote(query)}" - ) + url = f"https://{host}:{port}/?query={urllib.parse.quote(query)}" ctx = ssl.create_default_context() ctx.check_hostname = False @@ -36,9 +38,7 @@ def execute_query_https(host, port, query): def execute_query_http(host, port, query): - url = ( - f"http://{host}:{port}/?query={urllib.parse.quote(query)}" - ) + url = f"http://{host}:{port}/?query={urllib.parse.quote(query)}" request = urllib.request.Request(url) response = urllib.request.urlopen(request).read() @@ -50,7 +50,13 @@ def test_connections(): client = Client(server.ip_address, 9000, command=cluster.client_bin_path) assert client.query("SELECT 1") == "1\n" - client = Client(server.ip_address, 9440, command=cluster.client_bin_path, secure=True, config=f"{SCRIPT_DIR}/configs/client.xml") + client = Client( + server.ip_address, + 9440, + command=cluster.client_bin_path, + secure=True, + config=f"{SCRIPT_DIR}/configs/client.xml", + ) assert client.query("SELECT 1") == "1\n" client = Client(server.ip_address, 9001, command=cluster.client_bin_path) From 9b20468198992737351650469e620c6dc7edd107 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 27 Sep 2022 22:46:44 +0200 Subject: [PATCH 036/252] tests: fix debug symbols (and possible crashes) for backward compatiblity check stress running previous version of the server w/o correct debug symbols right now, since nobody restore clickhouse.debug file, and this can lead to the following issues, like in [1]: - incorrect stack traces - gdb crashes - clickhouse crashes, due to non-robust internal DWARF parser (probably) [1]: https://s3.amazonaws.com/clickhouse-test-reports/41730/8cc53a48ae99a765085f44a75fa49314d1f1cc7d/stress_test__ubsan_.html Right now I decided not to rework the script to make it less error prone, but simply fix the problem. Signed-off-by: Azat Khuzhin --- docker/test/stress/run.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index bf76fb20928..e982f6988d3 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -386,12 +386,23 @@ else clickhouse-client --query="SELECT 'Server version: ', version()" - # Install new package before running stress test because we should use new clickhouse-client and new clickhouse-test - # But we should leave old binary in /usr/bin/ for gdb (so it will print sane stacktarces) + # Install new package before running stress test because we should use new + # clickhouse-client and new clickhouse-test. + # + # But we should leave old binary in /usr/bin/ and debug symbols in + # /usr/lib/debug/usr/bin (if any) for gdb and internal DWARF parser, so it + # will print sane stacktraces and also to avoid possible crashes. + # + # FIXME: those files can be extracted directly from debian package, but + # actually better solution will be to use different PATH instead of playing + # games with files from packages. mv /usr/bin/clickhouse previous_release_package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug previous_release_package_folder/ install_packages package_folder mv /usr/bin/clickhouse package_folder/ + mv /usr/lib/debug/usr/bin/clickhouse.debug package_folder/ mv previous_release_package_folder/clickhouse /usr/bin/ + mv previous_release_package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug mkdir tmp_stress_output @@ -407,6 +418,7 @@ else # Start new server mv package_folder/clickhouse /usr/bin/ + mv package_folder/clickhouse.debug /usr/lib/debug/usr/bin/clickhouse.debug configure start 500 clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ From 2036641ea856859a6ccd07d1f9b5f8f760c92121 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Sep 2022 13:42:46 +0200 Subject: [PATCH 037/252] Fix 02267_file_globs_schema_inference.sql flakiness --- tests/queries/0_stateless/02267_file_globs_schema_inference.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index 6862d6f0602..b51c0cf6fa1 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -5,7 +5,7 @@ insert into function file('02267_data3.jsonl') select * from numbers(0); insert into function file('02267_data4.jsonl') select 1 as x; select * from file('02267_data*.jsonl') order by x; -insert into function file('02267_data1.jsonl', 'TSV') select 1 as x; +insert into function file('02267_data4.jsonl', 'TSV') select 1 as x; insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x; select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} From 1bd7e531db91a28d863e47d950ff9e2ba0c1992a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Sep 2022 12:07:25 +0000 Subject: [PATCH 038/252] Better exception message for duplicate column names in schema inference --- src/Processors/Formats/ISchemaReader.cpp | 18 ++++++++++++++++++ ..._column_names_in_schema_inference.reference | 0 ...licate_column_names_in_schema_inference.sql | 5 +++++ 3 files changed, 23 insertions(+) create mode 100644 tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.reference create mode 100644 tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 0e4d3f091b2..6f3f6f563aa 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -132,6 +132,16 @@ NamesAndTypesList IRowSchemaReader::readSchema() ErrorCodes::INCORRECT_DATA, "The number of column names {} differs with the number of types {}", column_names.size(), data_types.size()); } + else + { + std::unordered_set names_set; + for (const auto & name : column_names) + { + if (names_set.contains(name)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name); + names_set.insert(name); + } + } for (size_t i = 0; i != column_names.size(); ++i) { @@ -224,6 +234,9 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() names_order.reserve(names_and_types.size()); for (const auto & [name, type] : names_and_types) { + if (names_to_types.contains(name)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name); + auto hint_it = hints.find(name); if (hint_it != hints.end()) names_to_types[name] = hint_it->second; @@ -240,8 +253,13 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() /// We reached eof. break; + std::unordered_set names_set; /// We should check for duplicate column names in current row for (auto & [name, new_type] : new_names_and_types) { + if (names_set.contains(name)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate column name found while schema inference: \"{}\"", name); + names_set.insert(name); + auto it = names_to_types.find(name); /// If we didn't see this column before, just add it. if (it == names_to_types.end()) diff --git a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.reference b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql new file mode 100644 index 00000000000..3eb3549ee48 --- /dev/null +++ b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql @@ -0,0 +1,5 @@ +desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError INCORRECT_DATA} +desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError INCORRECT_DATA} +desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError INCORRECT_DATA} +desc format(CSV, '1,2,3') settings column_names_for_schema_inference='a, b, a'; -- {serverError INCORRECT_DATA} + From 2583e6d3ce0bd718d101fe641a901eeba91a41a2 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Sep 2022 13:14:54 +0000 Subject: [PATCH 039/252] Use string_view --- src/Processors/Formats/ISchemaReader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 6f3f6f563aa..a26ed6b0b40 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -134,7 +134,7 @@ NamesAndTypesList IRowSchemaReader::readSchema() } else { - std::unordered_set names_set; + std::unordered_set names_set; for (const auto & name : column_names) { if (names_set.contains(name)) @@ -253,7 +253,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() /// We reached eof. break; - std::unordered_set names_set; /// We should check for duplicate column names in current row + std::unordered_set names_set; /// We should check for duplicate column names in current row for (auto & [name, new_type] : new_names_and_types) { if (names_set.contains(name)) From 4f427e5b805ce13b5220efb6320a1adeb11c4db5 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Sep 2022 16:34:26 +0200 Subject: [PATCH 040/252] Truncate files on first insert --- .../0_stateless/02267_file_globs_schema_inference.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index b51c0cf6fa1..b2a2997beab 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -1,11 +1,11 @@ -- Tags: no-fasttest, no-parallel -insert into function file('02267_data2.jsonl') select NULL as x; -insert into function file('02267_data3.jsonl') select * from numbers(0); -insert into function file('02267_data4.jsonl') select 1 as x; +insert into function file('02267_data2.jsonl') select NULL as x SETTINGS engine_file_truncate_on_insert = 1; +insert into function file('02267_data3.jsonl') select * from numbers(0) SETTINGS engine_file_truncate_on_insert = 1; +insert into function file('02267_data4.jsonl') select 1 as x SETTINGS engine_file_truncate_on_insert = 1; select * from file('02267_data*.jsonl') order by x; insert into function file('02267_data4.jsonl', 'TSV') select 1 as x; -insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x; +insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1; select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} From e16cfd361b49c6fd459098c88d8a8d5468d9d64d Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Sep 2022 16:54:59 +0000 Subject: [PATCH 041/252] Improve DateTime type inference for text formats --- src/Formats/EscapingRuleUtils.cpp | 55 ++++++++++++++----- .../02456_datetime_schema_inference.reference | 11 ++++ .../02456_datetime_schema_inference.sql | 15 +++++ 3 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/02456_datetime_schema_inference.reference create mode 100644 tests/queries/0_stateless/02456_datetime_schema_inference.sql diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index ef554ecdcdf..7bfddfc6c77 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -453,23 +454,51 @@ void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & secon second = std::move(types[1]); } -DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings) +bool tryInferDate(const std::string_view & field) { - if (settings.try_infer_dates) + ReadBufferFromString buf(field); + DayNum tmp; + return tryReadDateText(tmp, buf) && buf.eof(); +} + +bool tryInferDateTime(const std::string_view & field, const FormatSettings & settings) +{ + ReadBufferFromString buf(field); + Float64 tmp_float; + /// Check if it's just a number, and if so, don't try to infer DateTime from it, + /// because we can interpret this number as a timestamp and it will lead to + /// inferring DateTime instead of simple Int64/Float64 in some cases. + if (tryReadFloatText(tmp_float, buf) && buf.eof()) + return false; + + buf.seek(0, SEEK_SET); /// Return position to the beginning + DateTime64 tmp; + switch (settings.date_time_input_format) { - ReadBufferFromString buf(field); - DayNum tmp; - if (tryReadDateText(tmp, buf) && buf.eof()) - return makeNullable(std::make_shared()); + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) + return true; + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) + return true; + break; } - if (settings.try_infer_datetimes) - { - ReadBufferFromString buf(field); - DateTime64 tmp; - if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) - return makeNullable(std::make_shared(9)); - } + return false; +} + +DataTypePtr tryInferDateOrDateTime(const std::string_view & field, const FormatSettings & settings) +{ + if (settings.try_infer_dates && tryInferDate(field)) + return makeNullable(std::make_shared()); + + if (settings.try_infer_datetimes && tryInferDateTime(field, settings)) + return makeNullable(std::make_shared(9)); return nullptr; } diff --git a/tests/queries/0_stateless/02456_datetime_schema_inference.reference b/tests/queries/0_stateless/02456_datetime_schema_inference.reference new file mode 100644 index 00000000000..2b88f2783dc --- /dev/null +++ b/tests/queries/0_stateless/02456_datetime_schema_inference.reference @@ -0,0 +1,11 @@ +222222222222222 +22222222222.2222 +2022-04-22 03:45:06.381000000 +2022-04-22T03:45:06.381Z +01/12/1925 +2022-04-22 03:45:06.381000000 +2022-04-22 03:45:06.381000000 +1925-12-01 00:00:00.000000000 +2022-04-22 03:45:06.381000000 +2022-04-22 03:45:06.381000000 +1925-01-12 00:00:00.000000000 diff --git a/tests/queries/0_stateless/02456_datetime_schema_inference.sql b/tests/queries/0_stateless/02456_datetime_schema_inference.sql new file mode 100644 index 00000000000..f00dba1a9ea --- /dev/null +++ b/tests/queries/0_stateless/02456_datetime_schema_inference.sql @@ -0,0 +1,15 @@ +select * from format('TSV', '222222222222222'); +select * from format('TSV', '22222222222.2222'); +set date_time_input_format = 'basic'; +select * from format('TSV', '2022-04-22T03:45:06.381'); +select * from format('TSV', '2022-04-22T03:45:06.381Z'); +select * from format('TSV', '01/12/1925'); +set date_time_input_format = 'best_effort'; +select * from format('TSV', '2022-04-22T03:45:06.381'); +select * from format('TSV', '2022-04-22T03:45:06.381Z'); +select * from format('TSV', '01/12/1925'); +set date_time_input_format = 'best_effort_us'; +select * from format('TSV', '2022-04-22T03:45:06.381'); +select * from format('TSV', '2022-04-22T03:45:06.381Z'); +select * from format('TSV', '01/12/1925'); + From 5e40f2ebcaf222ea4bc82aca4314c65551245c92 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 29 Sep 2022 00:13:40 +0000 Subject: [PATCH 042/252] review suggestions --- programs/server/Server.cpp | 92 ++++++++++++------------- src/Server/TCPProtocolStackData.h | 5 ++ src/Server/TCPProtocolStackFactory.h | 3 +- src/Server/TCPProtocolStackHandler.h | 5 +- src/Server/TCPServerConnectionFactory.h | 2 +- 5 files changed, 56 insertions(+), 51 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 04c708e3a0f..3154af81ae8 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1928,62 +1928,62 @@ void Server::createServers( std::string prefix = conf_name + "."; std::unordered_set pset {conf_name}; - if (config.has(prefix + "port")) + if (!config.has(prefix + "port")) + continue; + + std::string description {" protocol"}; + if (config.has(prefix + "description")) + description = config.getString(prefix + "description"); + std::string port_name = prefix + "port"; + bool is_secure = false; + auto stack = std::make_unique(*this, conf_name); + + while (true) { - std::string description {" protocol"}; - if (config.has(prefix + "description")) - description = config.getString(prefix + "description"); - std::string port_name = prefix + "port"; - bool is_secure = false; - auto stack = std::make_unique(*this, conf_name); - - while (true) + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) + std::string type = config.getString(prefix + "type"); + if (type == "tls") { - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; - } - - stack->append(create_factory(type, conf_name)); + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; } - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + stack->append(create_factory(type, conf_name)); } - if (!stack || stack->size() == 0) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); + if (!config.has(prefix + "impl")) + break; - createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, host, port, is_secure); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; - return ProtocolServerAdapter( - host, - port_name.c_str(), - description + ": " + address.toString(), - std::make_unique( - stack.release(), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); } + + if (stack->empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); + + createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, host, port, is_secure); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + + return ProtocolServerAdapter( + host, + port_name.c_str(), + description + ": " + address.toString(), + std::make_unique( + stack.release(), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); } } diff --git a/src/Server/TCPProtocolStackData.h b/src/Server/TCPProtocolStackData.h index f2d00d8a845..4ad401e723f 100644 --- a/src/Server/TCPProtocolStackData.h +++ b/src/Server/TCPProtocolStackData.h @@ -6,11 +6,16 @@ namespace DB { +// Data to communicate between protocol layers struct TCPProtocolStackData { + // socket implementation can be replaced by some layer - TLS as an example Poco::Net::StreamSocket socket; + // host from PROXY layer std::string forwarded_for; + // certificate path from TLS layer to TCP layer std::string certificate; + // default database from endpoint configuration to TCP layer std::string default_database; }; diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index c0ec29411d4..448b019b849 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -25,7 +25,7 @@ private: IServer & server [[maybe_unused]]; Poco::Logger * log; std::string conf_name; - std::list stack; + std::vector stack; AllowedClientHosts allowed_client_hosts; class DummyTCPHandler : public Poco::Net::TCPServerConnection @@ -85,6 +85,7 @@ public: } size_t size() { return stack.size(); } + bool empty() { return stack.empty(); } }; diff --git a/src/Server/TCPProtocolStackHandler.h b/src/Server/TCPProtocolStackHandler.h index 9ca388da17b..e16a6b6b2ca 100644 --- a/src/Server/TCPProtocolStackHandler.h +++ b/src/Server/TCPProtocolStackHandler.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -19,11 +18,11 @@ class TCPProtocolStackHandler : public Poco::Net::TCPServerConnection private: IServer & server; TCPServer & tcp_server; - std::list stack; + std::vector stack; std::string conf_name; public: - TCPProtocolStackHandler(IServer & server_, TCPServer & tcp_server_, const StreamSocket & socket, const std::list & stack_, const std::string & conf_name_) + TCPProtocolStackHandler(IServer & server_, TCPServer & tcp_server_, const StreamSocket & socket, const std::vector & stack_, const std::string & conf_name_) : TCPServerConnection(socket), server(server_), tcp_server(tcp_server_), stack(stack_), conf_name(conf_name_) {} diff --git a/src/Server/TCPServerConnectionFactory.h b/src/Server/TCPServerConnectionFactory.h index ab9b0848ed7..18b30557b00 100644 --- a/src/Server/TCPServerConnectionFactory.h +++ b/src/Server/TCPServerConnectionFactory.h @@ -1,7 +1,7 @@ #pragma once #include -#include "Server/TCPProtocolStackData.h" +#include namespace Poco { From 3f9c9c9a77ed9906c186962c80935b43dc1babdf Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 30 Sep 2022 19:11:05 +0200 Subject: [PATCH 043/252] fix intersecting part if session expired during queue initialization --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index c7a7c18848f..46146a8eebb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -41,7 +41,7 @@ ReplicatedMergeTreeQueue::ReplicatedMergeTreeQueue(StorageReplicatedMergeTree & void ReplicatedMergeTreeQueue::clear() { auto locks = lockQueue(); - assert(future_parts.empty()); + chassert(future_parts.empty()); current_parts.clear(); virtual_parts.clear(); queue.clear(); @@ -62,6 +62,7 @@ void ReplicatedMergeTreeQueue::setBrokenPartsToEnqueueFetchesOnLoading(Strings & void ReplicatedMergeTreeQueue::initialize(zkutil::ZooKeeperPtr zookeeper) { + clear(); std::lock_guard lock(state_mutex); LOG_TRACE(log, "Initializing parts in queue"); From a2c29076d604054f5b098f6256bcc25e3ef6a66b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 30 Sep 2022 20:09:18 +0200 Subject: [PATCH 044/252] avoid detached broken part duplicates --- .../MergeTree/DataPartStorageOnDisk.cpp | 62 ++++++++++++- .../MergeTree/DataPartStorageOnDisk.h | 5 +- src/Storages/MergeTree/IDataPartStorage.h | 6 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 21 +++-- src/Storages/MergeTree/IMergeTreeDataPart.h | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 4 +- .../MergeTree/MergeTreeDataPartInMemory.cpp | 2 +- .../test_partition/configs/testkeeper.xml | 6 ++ tests/integration/test_partition/test.py | 91 ++++++++++++++++++- 9 files changed, 186 insertions(+), 15 deletions(-) create mode 100644 tests/integration/test_partition/configs/testkeeper.xml diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index e2a2f3f793f..7b95224d28a 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -406,7 +406,7 @@ void DataPartStorageOnDisk::clearDirectory( } } -std::string DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const +std::optional DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const { String res; @@ -414,6 +414,9 @@ std::string DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, if (detached) full_relative_path /= "detached"; + std::optional original_checksums_content; + std::optional original_files_list; + for (int try_no = 0; try_no < 10; ++try_no) { res = (prefix.empty() ? "" : prefix + "_") + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); @@ -421,12 +424,69 @@ std::string DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, if (!volume->getDisk()->exists(full_relative_path / res)) return res; + if (broken && looksLikeBrokenDetachedPartHasTheSameContent(res, original_checksums_content, original_files_list)) + { + LOG_WARNING(log, "Directory {} (to detach to) already exists, " + "but its content looks similar to content of the broken part which we are going to detach. " + "Assuming it was already cloned to detached, will not do it again to avoid redundant copies of broken part.", res); + return {}; + } + LOG_WARNING(log, "Directory {} (to detach to) already exists. Will detach to directory with '_tryN' suffix.", res); } return res; } +bool DataPartStorageOnDisk::looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, + std::optional & original_checksums_content, + std::optional & original_files_list) const +{ + /// We cannot know for sure that content of detached part is the same, + /// but in most cases it's enough to compare checksums.txt and list of files. + + if (!exists("checksums.txt")) + return false; + + auto detached_full_path = fs::path(root_path) / "detached" / detached_part_path; + auto disk = volume->getDisk(); + if (!disk->exists(detached_full_path / "checksums.txt")) + return false; + + if (!original_checksums_content) + { + auto in = disk->readFile(detached_full_path / "checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); + original_checksums_content.emplace(); + readStringUntilEOF(*original_checksums_content, *in); + } + + if (original_checksums_content->empty()) + return false; + + auto part_full_path = fs::path(root_path) / part_dir; + String detached_checksums_content; + { + auto in = readFile("checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); + readStringUntilEOF(detached_checksums_content, *in); + } + + if (original_checksums_content != detached_checksums_content) + return false; + + if (!original_files_list) + { + original_files_list.emplace(); + disk->listFiles(part_full_path, *original_files_list); + std::sort(original_files_list->begin(), original_files_list->end()); + } + + Strings detached_files_list; + disk->listFiles(detached_full_path, detached_files_list); + std::sort(detached_files_list.begin(), detached_files_list.end()); + + return original_files_list == detached_files_list; +} + void DataPartStorageBuilderOnDisk::setRelativePath(const std::string & path) { part_dir = path; diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index adf1b78cdfb..c457e4a7291 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -52,7 +52,10 @@ public: MergeTreeDataPartState state, Poco::Logger * log) override; - std::string getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const override; + std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const override; + + bool looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, std::optional & original_checksums_content, + std::optional & original_files_list) const override; void setRelativePath(const std::string & path) override; void onRename(const std::string & new_root_path, const std::string & new_part_dir) override; diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 17af6dd2909..b355cda1e19 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -129,7 +129,11 @@ public: /// Get a name like 'prefix_partdir_tryN' which does not exist in a root dir. /// TODO: remove it. - virtual std::string getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const = 0; + virtual std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const = 0; + + virtual bool looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, + std::optional & original_checksums_content, + std::optional & original_files_list) const = 0; /// Reset part directory, used for im-memory parts. /// TODO: remove it. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 46323f12305..293b0d5c4c4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1482,7 +1482,7 @@ void IMergeTreeDataPart::remove() const data_part_storage->remove(std::move(can_remove_callback), checksums, projection_checksums, is_temp, getState(), storage.log); } -String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached) const +std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const { String res; @@ -1495,22 +1495,24 @@ String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool if (detached && parent_part) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot detach projection"); - return data_part_storage->getRelativePathForPrefix(storage.log, prefix, detached); + return data_part_storage->getRelativePathForPrefix(storage.log, prefix, detached, broken); } -String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) const +std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix, bool broken) const { /// Do not allow underscores in the prefix because they are used as separators. assert(prefix.find_first_of('_') == String::npos); assert(prefix.empty() || std::find(DetachedPartInfo::DETACH_REASONS.begin(), DetachedPartInfo::DETACH_REASONS.end(), prefix) != DetachedPartInfo::DETACH_REASONS.end()); - return "detached/" + getRelativePathForPrefix(prefix, /* detached */ true); + if (auto path = getRelativePathForPrefix(prefix, /* detached */ true, broken)) + return "detached/" + *path; + return {}; } void IMergeTreeDataPart::renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const { - renameTo(getRelativePathForDetachedPart(prefix), true, builder); + renameTo(*getRelativePathForDetachedPart(prefix, /* broken */ false), true, builder); part_is_probably_removed_from_disk = true; } @@ -1522,9 +1524,16 @@ void IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const Storag /// because hardlinks tracking doesn't work for detached parts. bool copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication; + /// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times. + /// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts. + bool broken = !prefix.empty(); + auto maybe_path_in_detached = getRelativePathForDetachedPart(prefix, broken); + if (!maybe_path_in_detached) + return; + data_part_storage->freeze( storage.relative_data_path, - getRelativePathForDetachedPart(prefix), + *maybe_path_in_detached, /*make_source_readonly*/ true, {}, copy_instead_of_hardlink, diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 32afa2a482d..6f034574fb4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -347,7 +347,7 @@ public: /// Calculate column and secondary indices sizes on disk. void calculateColumnsAndSecondaryIndicesSizesOnDisk(); - String getRelativePathForPrefix(const String & prefix, bool detached = false) const; + std::optional getRelativePathForPrefix(const String & prefix, bool detached = false, bool broken = false) const; bool isProjectionPart() const { return parent_part != nullptr; } @@ -485,7 +485,7 @@ protected: /// disk using columns and checksums. virtual void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const = 0; - String getRelativePathForDetachedPart(const String & prefix) const; + std::optional getRelativePathForDetachedPart(const String & prefix, bool broken) const; /// Checks that part can be actually removed from disk. /// In ordinary scenario always returns true, but in case of diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0f200a5a1c8..67101a97a75 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6210,7 +6210,7 @@ std::pair MergeTreeData::cloneAn if (auto src_part_in_memory = asInMemoryPart(src_part)) { auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); - src_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); + src_part_storage = src_part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot); } String with_copy; @@ -6394,7 +6394,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( if (auto part_in_memory = asInMemoryPart(part)) { auto flushed_part_path = part_in_memory->getRelativePathForPrefix("tmp_freeze"); - data_part_storage = part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); + data_part_storage = part_in_memory->flushToDisk(*flushed_part_path, metadata_snapshot); } auto callback = [this, &part, &backup_part_path](const DiskPtr & disk) diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index c7c831c23ec..7a3c5f11c81 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -142,7 +142,7 @@ DataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & new_rel void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const { - String detached_path = getRelativePathForDetachedPart(prefix); + String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); flushToDisk(detached_path, metadata_snapshot); } diff --git a/tests/integration/test_partition/configs/testkeeper.xml b/tests/integration/test_partition/configs/testkeeper.xml new file mode 100644 index 00000000000..5200b789a9b --- /dev/null +++ b/tests/integration/test_partition/configs/testkeeper.xml @@ -0,0 +1,6 @@ + + + + testkeeper + + \ No newline at end of file diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index f3df66631a5..320209b5d7e 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -2,9 +2,15 @@ import pytest import logging from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV +from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance("instance") +instance = cluster.add_instance( + "instance", + main_configs=[ + "configs/testkeeper.xml", + ], +) q = instance.query path_to_data = "/var/lib/clickhouse/" @@ -478,3 +484,86 @@ def test_detached_part_dir_exists(started_cluster): == "all_1_1_0\nall_1_1_0_try1\nall_2_2_0\nall_2_2_0_try1\n" ) q("drop table detached_part_dir_exists") + + +def test_make_clone_in_detached(started_cluster): + q( + "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n" + ) + + path = path_to_data + "data/default/clone_in_detached/" + + # broken part already detached + q("insert into clone_in_detached values (42, '¯\_(ツ)_/¯')") + instance.exec_in_container(["rm", path + "all_0_0_0/data.bin"]) + instance.exec_in_container( + ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"] + ) + assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") + assert ["broken_all_0_0_0",] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) + + # there's a directory with the same name, but different content + q("insert into clone_in_detached values (43, '¯\_(ツ)_/¯')") + instance.exec_in_container(["rm", path + "all_1_1_0/data.bin"]) + instance.exec_in_container( + ["cp", "-r", path + "all_1_1_0", path + "detached/broken_all_1_1_0"] + ) + instance.exec_in_container(["rm", path + "detached/broken_all_1_1_0/primary.idx"]) + instance.exec_in_container( + ["cp", "-r", path + "all_1_1_0", path + "detached/broken_all_1_1_0_try0"] + ) + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format( + path + "detached/broken_all_1_1_0_try0/checksums.txt" + ), + ] + ) + assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") + assert [ + "broken_all_0_0_0", + "broken_all_1_1_0", + "broken_all_1_1_0_try0", + "broken_all_1_1_0_try1", + ] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) + + # there are directories with the same name, but different content, and part already detached + q("insert into clone_in_detached values (44, '¯\_(ツ)_/¯')") + instance.exec_in_container(["rm", path + "all_2_2_0/data.bin"]) + instance.exec_in_container( + ["cp", "-r", path + "all_2_2_0", path + "detached/broken_all_2_2_0"] + ) + instance.exec_in_container(["rm", path + "detached/broken_all_2_2_0/primary.idx"]) + instance.exec_in_container( + ["cp", "-r", path + "all_2_2_0", path + "detached/broken_all_2_2_0_try0"] + ) + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format( + path + "detached/broken_all_2_2_0_try0/checksums.txt" + ), + ] + ) + instance.exec_in_container( + ["cp", "-r", path + "all_2_2_0", path + "detached/broken_all_2_2_0_try1"] + ) + assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") + assert [ + "broken_all_0_0_0", + "broken_all_1_1_0", + "broken_all_1_1_0_try0", + "broken_all_1_1_0_try1", + "broken_all_2_2_0", + "broken_all_2_2_0_try0", + "broken_all_2_2_0_try1", + ] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) From 42116050934164cc4b55a2db5891dc891a23e7c8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 3 Oct 2022 13:13:29 +0200 Subject: [PATCH 045/252] disable zk fault injections in bc check --- docker/test/stress/run.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6b9954c2431..4fa175e0823 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -47,7 +47,6 @@ function install_packages() function configure() { - export ZOOKEEPER_FAULT_INJECTION=1 # install test configs export USE_DATABASE_ORDINARY=1 export EXPORT_S3_STORAGE_POLICIES=1 @@ -203,6 +202,7 @@ quit install_packages package_folder +export ZOOKEEPER_FAULT_INJECTION=1 configure azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & @@ -243,6 +243,7 @@ stop # Let's enable S3 storage by default export USE_S3_STORAGE_FOR_MERGE_TREE=1 +export ZOOKEEPER_FAULT_INJECTION=1 configure # But we still need default disk because some tables loaded only into it @@ -375,6 +376,8 @@ else install_packages previous_release_package_folder # Start server from previous release + # Previous version may not be ready for fault injections + export ZOOKEEPER_FAULT_INJECTION=0 configure # Avoid "Setting s3_check_objects_after_upload is neither a builtin setting..." @@ -410,6 +413,7 @@ else # Start new server mv package_folder/clickhouse /usr/bin/ + export ZOOKEEPER_FAULT_INJECTION=1 configure start 500 clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ From dab5e80c81c74b96bdb32bc97a7de6a06b71d4bf Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 3 Oct 2022 13:15:32 +0000 Subject: [PATCH 046/252] Add support of Date32 arguments --- src/Functions/dateDiff.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index b8bf3c11698..f328afbd4d1 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -69,11 +69,11 @@ public: throw Exception("First argument for function " + getName() + " (unit) must be String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (!isDate(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) + if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) throw Exception("Second argument for function " + getName() + " must be Date or DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (!isDate(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) + if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) throw Exception("Third argument for function " + getName() + " must be Date or DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -137,12 +137,16 @@ private: dispatchForSecondColumn(*x_vec_16, y, timezone_x, timezone_y, result); else if (const auto * x_vec_32 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_32, y, timezone_x, timezone_y, result); + else if (const auto * x_vec_32_s = checkAndGetColumn(&x)) + dispatchForSecondColumn(*x_vec_32_s, y, timezone_x, timezone_y, result); else if (const auto * x_vec_64 = checkAndGetColumn(&x)) dispatchForSecondColumn(*x_vec_64, y, timezone_x, timezone_y, result); else if (const auto * x_const_16 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_16->getValue(), y, timezone_x, timezone_y, result); else if (const auto * x_const_32 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_32->getValue(), y, timezone_x, timezone_y, result); + else if (const auto * x_const_32_s = checkAndGetColumnConst(&x)) + dispatchConstForSecondColumn(x_const_32_s->getValue(), y, timezone_x, timezone_y, result); else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); else @@ -159,12 +163,16 @@ private: vectorVector(x, *y_vec_16, timezone_x, timezone_y, result); else if (const auto * y_vec_32 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_32, timezone_x, timezone_y, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + vectorVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) vectorVector(x, *y_vec_64, timezone_x, timezone_y, result); else if (const auto * y_const_16 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_16->getValue(), timezone_x, timezone_y, result); else if (const auto * y_const_32 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_32->getValue(), timezone_x, timezone_y, result); + else if (const auto * y_const_32_s = checkAndGetColumnConst(&y)) + vectorConstant(x, y_const_32_s->getValue(), timezone_x, timezone_y, result); else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); else From 087a0e939bdddbf8c2d5f8eda84f484a45f03486 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 4 Oct 2022 01:38:54 +0300 Subject: [PATCH 047/252] fix date csv parsing --- .../Serializations/SerializationDate.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 60db191a9dc..3b78da97271 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -76,9 +76,22 @@ void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num, void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { - LocalDate value; - readCSV(value, istr); - assert_cast(column).getData().push_back(value.getDayNum()); + DayNum x; + + if (istr.eof()) + throwReadAfterEOF(); + + char maybe_quote = *istr.position(); + + if (maybe_quote == '\'' || maybe_quote == '\"') + ++istr.position(); + + readDateText(x, istr); + + if (maybe_quote == '\'' || maybe_quote == '\"') + assertChar(maybe_quote, istr); + + assert_cast(column).getData().push_back(x); } } From 04f73d79ff33d45c16d5c53a65ecb414b0ce82ee Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 4 Oct 2022 10:52:59 +0300 Subject: [PATCH 048/252] added test --- .../0_stateless/02457_csv_parse_date_out_of_range.reference | 2 ++ .../0_stateless/02457_csv_parse_date_out_of_range.sql | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference create mode 100644 tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql diff --git a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference new file mode 100644 index 00000000000..3c8426819b3 --- /dev/null +++ b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference @@ -0,0 +1,2 @@ +\'above\' 2149-06-06 +\'below\' 1970-01-01 diff --git a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql new file mode 100644 index 00000000000..9368986a2f5 --- /dev/null +++ b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS test_date_out_of_range sync; +CREATE TABLE test_date_out_of_range (f String, t Date) engine=Memory(); +INSERT INTO test_date_out_of_range format CSV 'above',2200-12-31 +INSERT INTO test_date_out_of_range format CSV 'below',1900-01-01 +SELECT * from test_date_out_of_range; +DROP TABLE test_date_out_of_range SYNC; From 738a34c83a603dcf65517b7328eea11728362c0c Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 4 Oct 2022 22:55:47 +0300 Subject: [PATCH 049/252] fix test --- .../02457_csv_parse_date_out_of_range.reference | 4 ++-- .../02457_csv_parse_date_out_of_range.sh | 15 +++++++++++++++ .../02457_csv_parse_date_out_of_range.sql | 6 ------ 3 files changed, 17 insertions(+), 8 deletions(-) create mode 100755 tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sh delete mode 100644 tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql diff --git a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference index 3c8426819b3..544e8100fa4 100644 --- a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference +++ b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.reference @@ -1,2 +1,2 @@ -\'above\' 2149-06-06 -\'below\' 1970-01-01 +above 2149-06-06 +below 1970-01-01 diff --git a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sh b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sh new file mode 100755 index 00000000000..297f7e31bab --- /dev/null +++ b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test_date_out_of_range sync"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE test_date_out_of_range (f String, t Date) engine=Memory()"; + +printf '"above", 2200-12-31 +"below", 1900-01-01 +' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --query="INSERT INTO test_date_out_of_range FORMAT CSV"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM test_date_out_of_range"; +$CLICKHOUSE_CLIENT --query="DROP TABLE test_date_out_of_range"; \ No newline at end of file diff --git a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql b/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql deleted file mode 100644 index 9368986a2f5..00000000000 --- a/tests/queries/0_stateless/02457_csv_parse_date_out_of_range.sql +++ /dev/null @@ -1,6 +0,0 @@ -DROP TABLE IF EXISTS test_date_out_of_range sync; -CREATE TABLE test_date_out_of_range (f String, t Date) engine=Memory(); -INSERT INTO test_date_out_of_range format CSV 'above',2200-12-31 -INSERT INTO test_date_out_of_range format CSV 'below',1900-01-01 -SELECT * from test_date_out_of_range; -DROP TABLE test_date_out_of_range SYNC; From 4ab9c1504822fd3346c7cb3ff151dcd496cd1586 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 5 Oct 2022 14:19:32 +0000 Subject: [PATCH 050/252] Add ToExtendedRelativeDayNumImpl --- src/Functions/dateDiff.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index f328afbd4d1..ecb34c88dfb 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -33,6 +33,30 @@ namespace ErrorCodes namespace { +struct ToExtendedRelativeDayNumImpl +{ + static constexpr auto name = "toExtendedRelativeDayNum"; + + static inline Int64 execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toDayNum(t); + } + static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toDayNum(static_cast(t)); + } + static inline Int32 execute(Int32 d, const DateLUTImpl &) + { + return static_cast(d); + } + static inline UInt16 execute(UInt16 d, const DateLUTImpl &) + { + return static_cast(d); + } + + using FactorTransform = ZeroTransform; +}; + /** dateDiff('unit', t1, t2, [timezone]) * t1 and t2 can be Date or DateTime * @@ -113,7 +137,8 @@ public: else if (unit == "week" || unit == "wk" || unit == "ww") dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "day" || unit == "dd" || unit == "d") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + // dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "hour" || unit == "hh" || unit == "h") dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "minute" || unit == "mi" || unit == "n") From 4f268cb414dc626fa262e6f00912407de47f8eea Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 6 Oct 2022 18:02:51 +0200 Subject: [PATCH 051/252] Fix exception message on timeout (host was missing) Previously before the exception there is a loop for disconnecting, so dumpAddress* will not return anything, fix this, by saving the addresses before. Signed-off-by: Azat Khuzhin --- src/Client/MultiplexedConnections.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 72cd4c46477..70c86b77c32 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -411,6 +411,7 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead /// And we also check if read_list is still empty just in case. if (n <= 0 || read_list.empty()) { + const auto & addresses = dumpAddressesUnlocked(); for (ReplicaState & state : replica_states) { Connection * connection = state.connection; @@ -423,7 +424,7 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout ({} ms) exceeded while reading from {}", timeout.totalMilliseconds(), - dumpAddressesUnlocked()); + addresses); } } From 20dbf5eb3404b7473d70c6e6a48fc844d3d8b14c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 6 Oct 2022 16:43:58 +0200 Subject: [PATCH 052/252] Fix possible timeout exception for distributed queries with use_hedged_requests=0 In case of possible EINTR (i.e. query profiler) it is possible for select() from getReplicaForReading() (this is the stage when the initiator is waiting for Cancel packet from the remote shards, that can be sent in case of enough rows was read, or the query had been cancelled explicitly) to return without any sockets ready, and getReplicaForReading() will assume that the timeout happened. Here is a stacktrace example: [ 59205 ] {04f3d3a4-7346-4ef2-bf57-928f9e55ed89} TCPHandler: Code: 159. DB::Exception: Received from b8:9000. DB::Exception: Timeout (-1000 ms) exceeded while reading from . Stack trace: 0. Poco::Exception::Exception() @ 0x17e26eac in /usr/bin/clickhouse 1. DB::Exception::Exception() @ 0xb550b9a in /usr/bin/clickhouse 2. DB::Exception::Exception<>() @ 0x15ad1c81 in /usr/bin/clickhouse 3. DB::MultiplexedConnections::getReplicaForReading(bool) @ 0x15ad16fc in /usr/bin/clickhouse 4. DB::MultiplexedConnections::receivePacketUnlocked() @ 0x15ad02fd in /usr/bin/clickhouse 5. DB::MultiplexedConnections::drain() @ 0x15ad0df8 in /usr/bin/clickhouse 6. DB::ConnectionCollector::drainConnections(DB::IConnections&, bool) @ 0x1443c205 in /usr/bin/clickhouse 7. DB::RemoteQueryExecutor::finish() @ 0x1445ea6a in /usr/bin/clickhouse Signed-off-by: Azat Khuzhin --- src/Client/MultiplexedConnections.cpp | 37 +++++++++++++------ ...02466_distributed_query_profiler.reference | 10 +++++ .../02466_distributed_query_profiler.sql | 21 +++++++++++ 3 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02466_distributed_query_profiler.reference create mode 100644 tests/queries/0_stateless/02466_distributed_query_profiler.sql diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 70c86b77c32..87eda765a7a 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -393,19 +393,32 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead Poco::Net::Socket::SocketList write_list; Poco::Net::Socket::SocketList except_list; - for (const ReplicaState & state : replica_states) - { - Connection * connection = state.connection; - if (connection != nullptr) - read_list.push_back(*connection->socket); - } - auto timeout = is_draining ? drain_timeout : receive_timeout; - int n = Poco::Net::Socket::select( - read_list, - write_list, - except_list, - timeout); + int n = 0; + + /// EINTR loop + while (true) + { + read_list.clear(); + for (const ReplicaState & state : replica_states) + { + Connection * connection = state.connection; + if (connection != nullptr) + read_list.push_back(*connection->socket); + } + + /// poco returns 0 on EINTR, let's reset errno to ensure that EINTR came from select(). + errno = 0; + + n = Poco::Net::Socket::select( + read_list, + write_list, + except_list, + timeout); + if (n <= 0 && errno == EINTR) + continue; + break; + } /// We treat any error as timeout for simplicity. /// And we also check if read_list is still empty just in case. diff --git a/tests/queries/0_stateless/02466_distributed_query_profiler.reference b/tests/queries/0_stateless/02466_distributed_query_profiler.reference new file mode 100644 index 00000000000..4521d575ff3 --- /dev/null +++ b/tests/queries/0_stateless/02466_distributed_query_profiler.reference @@ -0,0 +1,10 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02466_distributed_query_profiler.sql b/tests/queries/0_stateless/02466_distributed_query_profiler.sql new file mode 100644 index 00000000000..9fc2fe7b4bd --- /dev/null +++ b/tests/queries/0_stateless/02466_distributed_query_profiler.sql @@ -0,0 +1,21 @@ +-- This is a regression test for EINTR handling in MultiplexedConnections::getReplicaForReading() + +select * from remote('127.{2,4}', view( + -- This is the emulation of the slow query, the server will return a line each 0.1 second + select sleep(0.1) from numbers(20) settings max_block_size=1) +) +-- LIMIT is to activate query cancellation in case of enough rows already read. +limit 10 +settings + -- This is to avoid draining in background and got the exception during query execution + drain_timeout=-1, + -- This is to activate as much signals as possible to trigger EINTR + query_profiler_real_time_period_ns=1, + -- This is to use MultiplexedConnections + use_hedged_requests=0, + -- This is to make the initiator waiting for cancel packet in MultiplexedConnections::getReplicaForReading() + -- + -- NOTE: that even smaller sleep will be enough to trigger this problem + -- with 100% probability, however just to make it more reliable, increase + -- it to 2 seconds. + sleep_in_receive_cancel_ms=2000; From bb78bf1c708162eb7b6d045005dd5149dd4e0b74 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Oct 2022 21:09:12 +0200 Subject: [PATCH 053/252] improve replica recovery --- src/Storages/StorageReplicatedMergeTree.cpp | 42 +++-- .../02448_clone_replica_lost_part.reference | 13 ++ .../02448_clone_replica_lost_part.sql | 147 ++++++++++++++++++ 3 files changed, 190 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02448_clone_replica_lost_part.reference create mode 100644 tests/queries/0_stateless/02448_clone_replica_lost_part.sql diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c12e9d0270a..a7706d440fb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2407,6 +2407,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo std::vector source_queue; ActiveDataPartSet get_part_set{format_version}; ActiveDataPartSet drop_range_set{format_version}; + std::unordered_set exact_part_names; { std::vector queue_get_futures; @@ -2444,14 +2445,22 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo info.parsed_entry->znode_name = source_queue_names[i]; if (info.parsed_entry->type == LogEntry::DROP_RANGE) + { drop_range_set.add(info.parsed_entry->new_part_name); - - if (info.parsed_entry->type == LogEntry::GET_PART) + } + else if (info.parsed_entry->type == LogEntry::GET_PART) { String maybe_covering_drop_range = drop_range_set.getContainingPart(info.parsed_entry->new_part_name); if (maybe_covering_drop_range.empty()) get_part_set.add(info.parsed_entry->new_part_name); } + else + { + /// We should keep local parts if they present in the queue of source replica. + /// There's a chance that we are the only replica that has these parts. + Strings entry_virtual_parts = info.parsed_entry->getVirtualPartNames(format_version); + std::move(entry_virtual_parts.begin(), entry_virtual_parts.end(), std::inserter(exact_part_names, exact_part_names.end())); + } } } @@ -2471,11 +2480,17 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : local_parts_in_zk) { - if (get_part_set.getContainingPart(part).empty()) - { - parts_to_remove_from_zk.emplace_back(part); - LOG_WARNING(log, "Source replica does not have part {}. Removing it from ZooKeeper.", part); - } + /// We look for exact match (and not for any covering part) + /// because our part might be dropped and covering part might be merged though gap. + /// (avoid resurrection of data that was removed a long time ago) + if (get_part_set.getContainingPart(part) == part) + continue; + + if (exact_part_names.contains(part)) + continue; + + parts_to_remove_from_zk.emplace_back(part); + LOG_WARNING(log, "Source replica does not have part {}. Removing it from ZooKeeper.", part); } { @@ -2497,11 +2512,14 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : local_active_parts) { - if (get_part_set.getContainingPart(part->name).empty()) - { - parts_to_remove_from_working_set.emplace_back(part); - LOG_WARNING(log, "Source replica does not have part {}. Removing it from working set.", part->name); - } + if (get_part_set.getContainingPart(part->name) == part->name) + continue; + + if (exact_part_names.contains(part->name)) + continue; + + parts_to_remove_from_working_set.emplace_back(part); + LOG_WARNING(log, "Source replica does not have part {}. Removing it from working set.", part->name); } if (getSettings()->detach_old_local_parts_when_cloning_replica) diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.reference b/tests/queries/0_stateless/02448_clone_replica_lost_part.reference new file mode 100644 index 00000000000..af82f72c49e --- /dev/null +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.reference @@ -0,0 +1,13 @@ +1 [2,3,4,5] +2 [1,2,3,4,5] +3 [1,2,3,4,5] +4 [3,4,5] +5 [1,2,3,4,5] +6 [1,2,3,4,5] +7 [1,2,3,4,5,20,30,40,50] +8 [1,2,3,4,5,10,20,30,40,50] +9 [1,2,3,4,5,10,20,30,40,50] +['all_18_23_1','all_7_17_2_13'] +10 [1,2,3,4,5,10,20,30,40,50] +11 [1,2,3,4,5,10,20,30,40,50,100,300,400,500,600] +12 [1,2,3,4,5,10,20,30,40,50,100,300,400,500,600] diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql new file mode 100644 index 00000000000..d395caf41db --- /dev/null +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -0,0 +1,147 @@ + +drop table if exists rmt1; +drop table if exists rmt2; +create table rmt1 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '1') order by tuple() + settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, old_parts_lifetime=0, max_parts_to_merge_at_once=5; +create table rmt2 (n int) engine=ReplicatedMergeTree('/test/02448/{database}/rmt', '2') order by tuple() + settings min_replicated_logs_to_keep=1, max_replicated_logs_to_keep=2, cleanup_delay_period=0, cleanup_delay_period_random_add=1, old_parts_lifetime=0, max_parts_to_merge_at_once=5; + +-- insert part only on one replica +system stop replicated sends rmt1; +insert into rmt1 values (1); +detach table rmt1; -- make replica inactive +system start replicated sends rmt1; + +-- trigger log rotation, rmt1 will be lost +insert into rmt2 values (2); +insert into rmt2 values (3); +insert into rmt2 values (4); +insert into rmt2 values (5); +-- check that entry was not removed from the queue (part is not lost) +set receive_timeout=5; +system sync replica rmt2; -- {serverError TIMEOUT_EXCEEDED} +set receive_timeout=300; + +select 1, arraySort(groupArray(n)) from rmt2; + +-- rmt1 will mimic rmt2 +attach table rmt1; +system sync replica rmt1; +system sync replica rmt2; + +-- check that no parts are lost +select 2, arraySort(groupArray(n)) from rmt1; +select 3, arraySort(groupArray(n)) from rmt2; + + +truncate table rmt1; +truncate table rmt2; + + +-- insert parts only on one replica and merge them +system stop replicated sends rmt2; +insert into rmt2 values (1); +insert into rmt2 values (2); +system sync replica rmt2; +optimize table rmt2 final; +system sync replica rmt2; +-- give it a chance to remove source parts +select sleep(2) format Null; -- increases probability of reproducing the issue +detach table rmt2; +system start replicated sends rmt2; + + +-- trigger log rotation, rmt2 will be lost +insert into rmt1 values (3); +insert into rmt1 values (4); +insert into rmt1 values (5); +set receive_timeout=5; +-- check that entry was not removed from the queue (part is not lost) +system sync replica rmt1; -- {serverError TIMEOUT_EXCEEDED} +set receive_timeout=300; + +select 4, arraySort(groupArray(n)) from rmt1; + +-- rmt1 will mimic rmt2 +system stop fetches rmt1; +attach table rmt2; +system sync replica rmt2; +-- give rmt2 a chance to remove merged part (but it should not do it) +select sleep(2) format Null; -- increases probability of reproducing the issue +system start fetches rmt1; +system sync replica rmt1; + +-- check that no parts are lost +select 5, arraySort(groupArray(n)) from rmt1; +select 6, arraySort(groupArray(n)) from rmt2; + + +-- insert part only on one replica +system stop replicated sends rmt1; +insert into rmt1 values (123); +alter table rmt1 update n=10 where n=123 settings mutations_sync=1; +-- give it a chance to remove source part +select sleep(2) format Null; -- increases probability of reproducing the issue +detach table rmt1; -- make replica inactive +system start replicated sends rmt1; + +-- trigger log rotation, rmt1 will be lost +insert into rmt2 values (20); +insert into rmt2 values (30); +insert into rmt2 values (40); +insert into rmt2 values (50); +-- check that entry was not removed from the queue (part is not lost) +set receive_timeout=5; +system sync replica rmt2; -- {serverError TIMEOUT_EXCEEDED} +set receive_timeout=300; + +select 7, arraySort(groupArray(n)) from rmt2; + +-- rmt1 will mimic rmt2 +system stop fetches rmt2; +attach table rmt1; +system sync replica rmt1; +-- give rmt1 a chance to remove mutated part (but it should not do it) +select sleep(2) format Null; -- increases probability of reproducing the issue +system start fetches rmt2; +system sync replica rmt2; + +-- check that no parts are lost +select 8, arraySort(groupArray(n)) from rmt1; +select 9, arraySort(groupArray(n)) from rmt2; + +-- avoid arbitrary merges after inserting +optimize table rmt2 final; +-- insert parts (all_18_18_0, all_19_19_0) on both replicas (will be deduplicated, but it does not matter) +insert into rmt1 values (100); +insert into rmt2 values (100); +insert into rmt1 values (200); +insert into rmt2 values (200); +detach table rmt1; + +-- create a gap in block numbers buy dropping part +insert into rmt2 values (300); +alter table rmt2 drop part 'all_19_19_0'; +insert into rmt2 values (400); +insert into rmt2 values (500); +insert into rmt2 values (600); +system sync replica rmt2; +-- merge through gap +optimize table rmt2; +select arraySort(groupArrayDistinct(_part)) from rmt2; +-- give it a chance to cleanup log +select sleep(2) format Null; -- increases probability of reproducing the issue + +-- rmt1 will mimic rmt2, but will not be able to fetch parts for a while +system stop replicated sends rmt2; +attach table rmt1; +-- rmt1 should not show the value (100) from dropped part +select 10, arraySort(groupArray(n)) from rmt1; +select 11, arraySort(groupArray(n)) from rmt2; + +system start replicated sends rmt2; +system sync replica rmt1; +select 12, arraySort(groupArray(n)) from rmt1; + +drop table rmt1; +drop table rmt2; From fc1de0a56a192f62598d447137457724231f09ee Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Oct 2022 01:10:33 +0000 Subject: [PATCH 054/252] move some functionality to Server::buildProtocolStackFromConfig --- programs/server/Server.cpp | 109 +++++++++++++++++++++---------------- programs/server/Server.h | 9 +++ 2 files changed, 70 insertions(+), 48 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3154af81ae8..f0e15ea536d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -85,7 +85,6 @@ #include #include #include -#include #include #include #include @@ -1858,27 +1857,13 @@ int Server::main(const std::vector & /*args*/) return Application::EXIT_OK; } - -void Server::createServers( - Poco::Util::AbstractConfiguration & config, - const Strings & listen_hosts, - const Strings & interserver_listen_hosts, - bool listen_try, - Poco::ThreadPool & server_pool, +std::unique_ptr Server::buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, AsynchronousMetrics & async_metrics, - std::vector & servers, - bool start_servers) + bool & is_secure) { - const Settings & settings = global_context->getSettingsRef(); - - Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); - Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; - http_params->setTimeout(settings.http_receive_timeout); - http_params->setKeepAliveTimeout(keep_alive_timeout); - - Poco::Util::AbstractConfiguration::Keys protocols; - config.keys("protocols", protocols); - auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr { if (type == "tcp") @@ -1914,6 +1899,61 @@ void Server::createServers( throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type); }; + std::string conf_name = "protocols." + protocol; + std::string prefix = conf_name + "."; + std::unordered_set pset {conf_name}; + + auto stack = std::make_unique(*this, conf_name); + + while (true) + { + // if there is no "type" - it's a reference to another protocol and this is just an endpoint + if (config.has(prefix + "type")) + { + std::string type = config.getString(prefix + "type"); + if (type == "tls") + { + if (is_secure) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); + is_secure = true; + } + + stack->append(create_factory(type, conf_name)); + } + + if (!config.has(prefix + "impl")) + break; + + conf_name = "protocols." + config.getString(prefix + "impl"); + prefix = conf_name + "."; + + if (!pset.insert(conf_name).second) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); + } + + return stack; +} + +void Server::createServers( + Poco::Util::AbstractConfiguration & config, + const Strings & listen_hosts, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(keep_alive_timeout); + + Poco::Util::AbstractConfiguration::Keys protocols; + config.keys("protocols", protocols); + for (const auto & protocol : protocols) { std::vector hosts; @@ -1926,7 +1966,6 @@ void Server::createServers( { std::string conf_name = "protocols." + protocol; std::string prefix = conf_name + "."; - std::unordered_set pset {conf_name}; if (!config.has(prefix + "port")) continue; @@ -1936,33 +1975,7 @@ void Server::createServers( description = config.getString(prefix + "description"); std::string port_name = prefix + "port"; bool is_secure = false; - auto stack = std::make_unique(*this, conf_name); - - while (true) - { - // if there is no "type" - it's a reference to another protocol and this is just an endpoint - if (config.has(prefix + "type")) - { - std::string type = config.getString(prefix + "type"); - if (type == "tls") - { - if (is_secure) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol); - is_secure = true; - } - - stack->append(create_factory(type, conf_name)); - } - - if (!config.has(prefix + "impl")) - break; - - conf_name = "protocols." + config.getString(prefix + "impl"); - prefix = conf_name + "."; - - if (!pset.insert(conf_name).second) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name); - } + auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure); if (stack->empty()) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol); diff --git a/programs/server/Server.h b/programs/server/Server.h index 44a5a441e43..53841b1fcd4 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,6 +3,8 @@ #include #include +#include +#include /** Server provides three interfaces: * 1. HTTP - simple interface for any applications. @@ -77,6 +79,13 @@ private: UInt16 port, [[maybe_unused]] bool secure = false) const; + std::unique_ptr buildProtocolStackFromConfig( + const Poco::Util::AbstractConfiguration & config, + const std::string & protocol, + Poco::Net::HTTPServerParams::Ptr http_params, + AsynchronousMetrics & async_metrics, + bool & is_secure); + using CreateServerFunc = std::function; void createServer( Poco::Util::AbstractConfiguration & config, From 997fa5e2953f6b811bd6f8ad5fe44118df598496 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 7 Oct 2022 01:16:22 +0000 Subject: [PATCH 055/252] review suggestions --- src/Server/TCPProtocolStackFactory.h | 2 +- tests/integration/test_composable_protocols/configs/users.xml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 448b019b849..16b57649a72 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -81,7 +81,7 @@ public: void append(TCPServerConnectionFactory::Ptr factory) { - stack.push_back(factory); + stack.push_back(std::move(factory)); } size_t size() { return stack.size(); } diff --git a/tests/integration/test_composable_protocols/configs/users.xml b/tests/integration/test_composable_protocols/configs/users.xml index 6f94d1696e3..da8425b3695 100644 --- a/tests/integration/test_composable_protocols/configs/users.xml +++ b/tests/integration/test_composable_protocols/configs/users.xml @@ -1,8 +1,6 @@ - 10000000000 - 64999 From cde0257d11a02425914e3aa947e06ab11a0d013c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 7 Oct 2022 14:36:51 +0000 Subject: [PATCH 056/252] Docs: Remove obsolete pages Removes page https://clickhouse.com/docs/en/sql-reference/statements/misc It indexes "miscellaneous" SQL statements but these are already easily reachable by the sidebar. --- docs/en/sql-reference/statements/misc.md | 21 --------------------- docs/ru/sql-reference/statements/misc.md | 21 --------------------- 2 files changed, 42 deletions(-) delete mode 100644 docs/en/sql-reference/statements/misc.md delete mode 100644 docs/ru/sql-reference/statements/misc.md diff --git a/docs/en/sql-reference/statements/misc.md b/docs/en/sql-reference/statements/misc.md deleted file mode 100644 index d812dd2008a..00000000000 --- a/docs/en/sql-reference/statements/misc.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -slug: /en/sql-reference/statements/misc -toc_hidden: true -sidebar_position: 70 ---- - -# Miscellaneous Statements - -- [ATTACH](../../sql-reference/statements/attach.md) -- [CHECK TABLE](../../sql-reference/statements/check-table.md) -- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) -- [DETACH](../../sql-reference/statements/detach.md) -- [DROP](../../sql-reference/statements/drop.md) -- [EXISTS](../../sql-reference/statements/exists.md) -- [KILL](../../sql-reference/statements/kill.md) -- [OPTIMIZE](../../sql-reference/statements/optimize.md) -- [RENAME](../../sql-reference/statements/rename.md) -- [SET](../../sql-reference/statements/set.md) -- [SET ROLE](../../sql-reference/statements/set-role.md) -- [TRUNCATE](../../sql-reference/statements/truncate.md) -- [USE](../../sql-reference/statements/use.md) diff --git a/docs/ru/sql-reference/statements/misc.md b/docs/ru/sql-reference/statements/misc.md deleted file mode 100644 index 437215f20ce..00000000000 --- a/docs/ru/sql-reference/statements/misc.md +++ /dev/null @@ -1,21 +0,0 @@ ---- -slug: /ru/sql-reference/statements/misc -sidebar_position: 41 ---- - -# Прочие виды запроÑов {#prochie-vidy-zaprosov} - -- [ATTACH](../../sql-reference/statements/attach.md) -- [CHECK TABLE](../../sql-reference/statements/check-table.md) -- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) -- [DETACH](../../sql-reference/statements/detach.md) -- [DROP](../../sql-reference/statements/drop.md) -- [EXISTS](../../sql-reference/statements/exists.md) -- [KILL](../../sql-reference/statements/kill.md) -- [OPTIMIZE](../../sql-reference/statements/optimize.md) -- [RENAME](../../sql-reference/statements/rename.md) -- [SET](../../sql-reference/statements/set.md) -- [SET ROLE](../../sql-reference/statements/set-role.md) -- [TRUNCATE](../../sql-reference/statements/truncate.md) -- [USE](../../sql-reference/statements/use.md) - From ed76584876a646d906cdc3e8f9108680a1cbea71 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Oct 2022 16:52:04 +0200 Subject: [PATCH 057/252] fix --- src/Interpreters/DDLWorker.cpp | 2 +- .../queries/0_stateless/02267_file_globs_schema_inference.sql | 3 ++- .../0_stateless/02448_clone_replica_lost_part.reference | 2 -- tests/queries/0_stateless/02448_clone_replica_lost_part.sql | 3 +-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 8873d851de1..0e4c658a1ee 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -114,7 +114,7 @@ DDLWorker::DDLWorker( void DDLWorker::startup() { [[maybe_unused]] bool prev_stop_flag = stop_flag.exchange(false); - chassert(true); + chassert(prev_stop_flag); main_thread = ThreadFromGlobalPool(&DDLWorker::runMainThread, this); cleanup_thread = ThreadFromGlobalPool(&DDLWorker::runCleanupThread, this); } diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql index 6862d6f0602..2a4d1402ac2 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql @@ -1,4 +1,5 @@ --- Tags: no-fasttest, no-parallel +-- Tags: no-fasttest, no-parallel, disabled +-- FIXME https://github.com/ClickHouse/ClickHouse/issues/41934 insert into function file('02267_data2.jsonl') select NULL as x; insert into function file('02267_data3.jsonl') select * from numbers(0); diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.reference b/tests/queries/0_stateless/02448_clone_replica_lost_part.reference index af82f72c49e..26c6cbf438b 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.reference +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.reference @@ -7,7 +7,5 @@ 7 [1,2,3,4,5,20,30,40,50] 8 [1,2,3,4,5,10,20,30,40,50] 9 [1,2,3,4,5,10,20,30,40,50] -['all_18_23_1','all_7_17_2_13'] -10 [1,2,3,4,5,10,20,30,40,50] 11 [1,2,3,4,5,10,20,30,40,50,100,300,400,500,600] 12 [1,2,3,4,5,10,20,30,40,50,100,300,400,500,600] diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index d395caf41db..2db12a3dcfd 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -128,7 +128,6 @@ insert into rmt2 values (600); system sync replica rmt2; -- merge through gap optimize table rmt2; -select arraySort(groupArrayDistinct(_part)) from rmt2; -- give it a chance to cleanup log select sleep(2) format Null; -- increases probability of reproducing the issue @@ -136,7 +135,7 @@ select sleep(2) format Null; -- increases probability of reproducing the issue system stop replicated sends rmt2; attach table rmt1; -- rmt1 should not show the value (100) from dropped part -select 10, arraySort(groupArray(n)) from rmt1; +select throwIf(n = 100) from rmt1 format Null; select 11, arraySort(groupArray(n)) from rmt2; system start replicated sends rmt2; From 1a78e8a8179eac13377a982106c486b81c1ce486 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Oct 2022 20:22:03 +0200 Subject: [PATCH 058/252] fix --- src/Storages/MergeTree/MergeTreeData.cpp | 5 ++++- tests/queries/0_stateless/02448_clone_replica_lost_part.sql | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4a7d2b2dd63..54b44679321 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3222,7 +3222,10 @@ void MergeTreeData::outdateBrokenPartAndCloneToDetached(const DataPartPtr & part LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->data_part_storage->getPartDirectory(), prefix, part_to_detach->name); part_to_detach->makeCloneInDetached(prefix, metadata_snapshot); - removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true); + + DataPartsLock lock = lockParts(); + if (part_to_detach->getState() == DataPartState::Active) + removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock); } void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 2db12a3dcfd..14b75d4c322 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -1,3 +1,4 @@ +-- Tags: long drop table if exists rmt1; drop table if exists rmt2; From 6bb62d4d03d66b6cf3c01ecc1a4e0e4ac8a0050f Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 8 Oct 2022 00:01:58 +0000 Subject: [PATCH 059/252] add test for PROXYv1 --- .../configs/config.xml | 6 ++++++ .../test_composable_protocols/test.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/tests/integration/test_composable_protocols/configs/config.xml b/tests/integration/test_composable_protocols/configs/config.xml index 553128d4386..35673c3e7e5 100644 --- a/tests/integration/test_composable_protocols/configs/config.xml +++ b/tests/integration/test_composable_protocols/configs/config.xml @@ -34,6 +34,12 @@ 9001 native protocol endpoint (tcp) + + proxy1 + tcp + 9100 + native protocol with PROXYv1 (tcp_proxy) + http 8123 diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index c0c0e5e0a83..a2e30c4480b 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -5,6 +5,8 @@ import os from helpers.cluster import ClickHouseCluster from helpers.client import Client import urllib.request, urllib.parse +import subprocess +import socket SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -44,6 +46,19 @@ def execute_query_http(host, port, query): response = urllib.request.urlopen(request).read() return response.decode("utf-8") +def netcat(hostname, port, content): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((hostname, port)) + s.sendall(content) + s.shutdown(socket.SHUT_WR) + data = [] + while 1: + d = s.recv(1024) + if len(d) == 0: + break + data.append(d) + s.close() + return b''.join(data) def test_connections(): @@ -67,3 +82,6 @@ def test_connections(): assert execute_query_https(server.ip_address, 8443, "SELECT 1") == "1\n" assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" + + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT \'Hello, world\'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + assert netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find(bytearray("Hello, world", "latin-1")) >= 0 From 3bed015a1f4994fb77dcc32385fae7c456e2f1e3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Oct 2022 00:09:24 +0000 Subject: [PATCH 060/252] Automatic style fix --- tests/integration/test_composable_protocols/test.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index a2e30c4480b..d861af929c3 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -46,6 +46,7 @@ def execute_query_http(host, port, query): response = urllib.request.urlopen(request).read() return response.decode("utf-8") + def netcat(hostname, port, content): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((hostname, port)) @@ -58,7 +59,8 @@ def netcat(hostname, port, content): break data.append(d) s.close() - return b''.join(data) + return b"".join(data) + def test_connections(): @@ -83,5 +85,10 @@ def test_connections(): assert execute_query_https(server.ip_address, 8444, "SELECT 1") == "1\n" - data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT \'Hello, world\'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" - assert netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find(bytearray("Hello, world", "latin-1")) >= 0 + data = "PROXY TCP4 255.255.255.255 255.255.255.255 65535 65535\r\n\0\021ClickHouse client\024\r\253\251\003\0\007default\0\004\001\0\001\0\0\t0.0.0.0:0\001\tmilovidov\021milovidov-desktop\vClickHouse \024\r\253\251\003\0\001\0\0\0\002\001\025SELECT 'Hello, world'\002\0\247\203\254l\325\\z|\265\254F\275\333\206\342\024\202\024\0\0\0\n\0\0\0\240\01\0\02\377\377\377\377\0\0\0" + assert ( + netcat(server.ip_address, 9100, bytearray(data, "latin-1")).find( + bytearray("Hello, world", "latin-1") + ) + >= 0 + ) From 1cb91e72fbb07c072e4c4f2409061a7fdaf1da3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Oct 2022 04:46:32 +0200 Subject: [PATCH 061/252] Add a test for #658 --- tests/queries/1_stateful/00097_constexpr_in_index.reference | 1 + tests/queries/1_stateful/00097_constexpr_in_index.sql | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/1_stateful/00097_constexpr_in_index.reference create mode 100644 tests/queries/1_stateful/00097_constexpr_in_index.sql diff --git a/tests/queries/1_stateful/00097_constexpr_in_index.reference b/tests/queries/1_stateful/00097_constexpr_in_index.reference new file mode 100644 index 00000000000..5080d6d4cd8 --- /dev/null +++ b/tests/queries/1_stateful/00097_constexpr_in_index.reference @@ -0,0 +1 @@ +1803 diff --git a/tests/queries/1_stateful/00097_constexpr_in_index.sql b/tests/queries/1_stateful/00097_constexpr_in_index.sql new file mode 100644 index 00000000000..b5cac75c767 --- /dev/null +++ b/tests/queries/1_stateful/00097_constexpr_in_index.sql @@ -0,0 +1,3 @@ +-- Even in presense of OR, we evaluate the "0 IN (1, 2, 3)" as a constant expression therefore it does not prevent the index analysis. + +SELECT count() FROM test.hits WHERE CounterID IN (14917930, 33034174) OR 0 IN (1, 2, 3) SETTINGS max_rows_to_read = 1000000, force_primary_key = 1; From 95c5c8a7442bf57643c2816fcdf9770eca05f43f Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 10 Oct 2022 14:50:09 +0200 Subject: [PATCH 062/252] Fail fast on empty URL --- src/Storages/HDFS/StorageHDFS.cpp | 2 ++ tests/queries/0_stateless/02458_empty_hdfs_url.reference | 0 tests/queries/0_stateless/02458_empty_hdfs_url.sql | 1 + 3 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/02458_empty_hdfs_url.reference create mode 100644 tests/queries/0_stateless/02458_empty_hdfs_url.sql diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 2170b4142e8..a1204877e00 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -712,6 +712,8 @@ void registerStorageHDFS(StorageFactory & factory) engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); String url = checkAndGetLiteralArgument(engine_args[0], "url"); + if (url.empty()) + throw Exception("Storage HDFS requires valid URL to be set, empty string provided", ErrorCodes::BAD_ARGUMENTS); String format_name = "auto"; if (engine_args.size() > 1) diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.reference b/tests/queries/0_stateless/02458_empty_hdfs_url.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.sql b/tests/queries/0_stateless/02458_empty_hdfs_url.sql new file mode 100644 index 00000000000..8c2cd142365 --- /dev/null +++ b/tests/queries/0_stateless/02458_empty_hdfs_url.sql @@ -0,0 +1 @@ +SELECT * FROM hdfsCluster('test_cluster', '', 'TSV'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file From 6a02d50b4765c77627d9728b14221061902161cc Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 10 Oct 2022 16:55:23 +0200 Subject: [PATCH 063/252] no hdfs in fast test --- tests/queries/0_stateless/02458_empty_hdfs_url.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.sql b/tests/queries/0_stateless/02458_empty_hdfs_url.sql index 8c2cd142365..98e81252fc6 100644 --- a/tests/queries/0_stateless/02458_empty_hdfs_url.sql +++ b/tests/queries/0_stateless/02458_empty_hdfs_url.sql @@ -1 +1,2 @@ +-- Tags: no-fasttest SELECT * FROM hdfsCluster('test_cluster', '', 'TSV'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file From 8a066b5eef7258caf965ee670a32b8ba801d06ea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 11 Oct 2022 01:28:36 +0200 Subject: [PATCH 064/252] Fix test --- base/base/safeExit.cpp | 1 - .../1_stateful/00168_parallel_processing_on_replicas_part_1.sh | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index 027ad4c8c4d..44d92643e91 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -1,7 +1,6 @@ #if defined(OS_LINUX) # include #endif -#include #include #include #include diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh index ecd0d281b53..a0e2442ae9e 100755 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh @@ -30,6 +30,7 @@ SkipList=( "00013_sorting_of_nested.sql" # It contains FINAL, which is not allowed together with parallel reading "00061_storage_buffer.sql" + "00097_constexpr_in_index.sql" "00095_hyperscan_profiler.sql" # too long in debug (there is a --no-debug tag inside a test) "00140_rename.sql" # Multiple renames are not allowed with DatabaseReplicated and tags are not forwarded through this test From 5e6a7c1678ea464275d09d226a098be5a14a60eb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 11 Oct 2022 09:27:46 +0000 Subject: [PATCH 065/252] Use multiread where possible --- .../ReplicatedMergeTreeCleanupThread.cpp | 16 ++++++---- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 26 +++++++++------- .../MergeTree/ReplicatedMergeTreeSink.cpp | 21 +++++++------ src/Storages/StorageKeeperMap.cpp | 14 +++------ src/Storages/StorageReplicatedMergeTree.cpp | 31 ++++++++++--------- 5 files changed, 59 insertions(+), 49 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 3936ee61b70..7993840f1d9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -419,14 +419,14 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & LOG_TRACE(log, "Checking {} blocks ({} are not cached){}", stat.numChildren, not_cached_blocks, " to clear old ones from ZooKeeper."); } - zkutil::AsyncResponses exists_futures; + std::vector exists_paths; for (const String & block : blocks) { auto it = cached_block_stats.find(block); if (it == cached_block_stats.end()) { /// New block. Fetch its stat asynchronously. - exists_futures.emplace_back(block, zookeeper.asyncExists(storage.zookeeper_path + "/blocks/" + block)); + exists_paths.emplace_back(storage.zookeeper_path + "/blocks/" + block); } else { @@ -436,14 +436,18 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & } } + auto exists_size = exists_paths.size(); + auto exists_results = zookeeper.exists(exists_paths); + /// Put fetched stats into the cache - for (auto & elem : exists_futures) + for (size_t i = 0; i < exists_size; ++i) { - auto status = elem.second.get(); + auto status = exists_results[i]; if (status.error != Coordination::Error::ZNONODE) { - cached_block_stats.emplace(elem.first, std::make_pair(status.stat.ctime, status.stat.version)); - timed_blocks.emplace_back(elem.first, status.stat.ctime, status.stat.version); + auto node_name = fs::path(exists_paths[i]).filename(); + cached_block_stats.emplace(node_name, std::make_pair(status.stat.ctime, status.stat.version)); + timed_blocks.emplace_back(node_name, status.stat.ctime, status.stat.version); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 0305ce440f9..9f9ef1ab0cb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -153,17 +153,19 @@ bool ReplicatedMergeTreeQueue::load(zkutil::ZooKeeperPtr zookeeper) ::sort(children.begin(), children.end()); - zkutil::AsyncResponses futures; - futures.reserve(children.size()); + auto children_num = children.size(); + std::vector paths; + paths.reserve(children_num); for (const String & child : children) - futures.emplace_back(child, zookeeper->asyncGet(fs::path(queue_path) / child)); + paths.emplace_back(fs::path(queue_path) / child); - for (auto & future : futures) + auto results = zookeeper->get(paths); + for (size_t i = 0; i < children_num; ++i) { - Coordination::GetResponse res = future.second.get(); + auto res = results[i]; LogEntryPtr entry = LogEntry::parse(res.data, res.stat); - entry->znode_name = future.first; + entry->znode_name = children[i]; std::lock_guard lock(state_mutex); @@ -641,11 +643,11 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper LOG_DEBUG(log, "Pulling {} entries to queue: {} - {}", (end - begin), *begin, *last); - zkutil::AsyncResponses futures; - futures.reserve(end - begin); + Strings get_paths; + get_paths.reserve(end - begin); for (auto it = begin; it != end; ++it) - futures.emplace_back(*it, zookeeper->asyncGet(fs::path(zookeeper_path) / "log" / *it)); + get_paths.emplace_back(fs::path(zookeeper_path) / "log" / *it); /// Simultaneously add all new entries to the queue and move the pointer to the log. @@ -655,9 +657,11 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper std::optional min_unprocessed_insert_time_changed; - for (auto & future : futures) + auto get_results = zookeeper->get(get_paths); + auto get_num = get_results.size(); + for (size_t i = 0; i < get_num; ++i) { - Coordination::GetResponse res = future.second.get(); + auto res = get_results[i]; copied_entries.emplace_back(LogEntry::parse(res.data, res.stat)); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b9bd027cde2..f81303b3f7b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -99,19 +99,22 @@ size_t ReplicatedMergeTreeSink::checkQuorumPrecondition(zkutil::ZooKeeperPtr & z quorum_info.status_path = storage.zookeeper_path + "/quorum/status"; Strings replicas = zookeeper->getChildren(fs::path(storage.zookeeper_path) / "replicas"); - std::vector> replicas_status_futures; - replicas_status_futures.reserve(replicas.size()); + + Strings exists_paths; for (const auto & replica : replicas) if (replica != storage.replica_name) - replicas_status_futures.emplace_back(zookeeper->asyncExists(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active")); + exists_paths.emplace_back(fs::path(storage.zookeeper_path) / "replicas" / replica / "is_active"); - std::future is_active_future = zookeeper->asyncTryGet(storage.replica_path + "/is_active"); - std::future host_future = zookeeper->asyncTryGet(storage.replica_path + "/host"); + auto exists_result = zookeeper->exists(exists_paths); + auto get_results = zookeeper->get(Strings{storage.replica_path + "/is_active", storage.replica_path + "/host"}); size_t active_replicas = 1; /// Assume current replica is active (will check below) - for (auto & status : replicas_status_futures) - if (status.get().error == Coordination::Error::ZOK) + for (size_t i = 0; i < exists_paths.size(); ++i) + { + auto status = exists_result[i]; + if (status.error == Coordination::Error::ZOK) ++active_replicas; + } size_t replicas_number = replicas.size(); size_t quorum_size = getQuorumSize(replicas_number); @@ -135,8 +138,8 @@ size_t ReplicatedMergeTreeSink::checkQuorumPrecondition(zkutil::ZooKeeperPtr & z /// Both checks are implicitly made also later (otherwise there would be a race condition). - auto is_active = is_active_future.get(); - auto host = host_future.get(); + auto is_active = get_results[0]; + auto host = get_results[1]; if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) throw Exception("Replica is not active right now", ErrorCodes::READONLY); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index e62874490f8..8dce6a7eb10 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -682,24 +682,20 @@ Chunk StorageKeeperMap::getBySerializedKeys(const std::span k auto client = getClient(); - std::vector> values; - values.reserve(keys.size()); + Strings full_key_paths; + full_key_paths.reserve(keys.size()); for (const auto & key : keys) { - const auto full_path = fullPathForKey(key); - values.emplace_back(client->asyncTryGet(full_path)); + full_key_paths.emplace_back(fullPathForKey(key)); } - auto wait_until = std::chrono::system_clock::now() + std::chrono::milliseconds(Coordination::DEFAULT_OPERATION_TIMEOUT_MS); + auto values = client->get(full_key_paths); for (size_t i = 0; i < keys.size(); ++i) { - auto & value = values[i]; - if (value.wait_until(wait_until) != std::future_status::ready) - throw DB::Exception(ErrorCodes::KEEPER_EXCEPTION, "Failed to fetch values: timeout"); + auto response = values[i]; - auto response = value.get(); Coordination::Error code = response.error; if (code == Coordination::Error::ZOK) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b55c59a3d6e..a8c88d49ed3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3206,16 +3206,17 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c int32_t log_version, MergeType merge_type) { - std::vector> exists_futures; - exists_futures.reserve(parts.size()); + Strings exists_paths; + exists_paths.reserve(parts.size()); for (const auto & part : parts) - exists_futures.emplace_back(zookeeper->asyncExists(fs::path(replica_path) / "parts" / part->name)); + exists_paths.emplace_back(fs::path(replica_path) / "parts" / part->name); + auto exists_results = zookeeper->exists(exists_paths); bool all_in_zk = true; for (size_t i = 0; i < parts.size(); ++i) { /// If there is no information about part in ZK, we will not merge it. - if (exists_futures[i].get().error == Coordination::Error::ZNONODE) + if (exists_results[i].error == Coordination::Error::ZNONODE) { all_in_zk = false; @@ -6228,19 +6229,20 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(const Strin auto zookeeper = getZooKeeper(); - std::vector> exists_futures; - exists_futures.reserve(part_names.size()); + Strings exists_paths; + exists_paths.reserve(part_names.size()); for (const String & part_name : part_names) { - String part_path = fs::path(replica_path) / "parts" / part_name; - exists_futures.emplace_back(zookeeper->asyncExists(part_path)); + exists_paths.emplace_back(fs::path(replica_path) / "parts" / part_name); } + auto exists_results = zookeeper->exists(exists_paths); + std::vector> remove_futures; remove_futures.reserve(part_names.size()); for (size_t i = 0; i < part_names.size(); ++i) { - Coordination::ExistsResponse exists_resp = exists_futures[i].get(); + Coordination::ExistsResponse exists_resp = exists_results[i]; if (exists_resp.error == Coordination::Error::ZOK) { Coordination::Requests ops; @@ -6286,9 +6288,9 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(const Strin void StorageReplicatedMergeTree::removePartsFromZooKeeper( zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, NameSet * parts_should_be_retried) { - std::vector> exists_futures; + Strings exists_paths; std::vector> remove_futures; - exists_futures.reserve(part_names.size()); + exists_paths.reserve(part_names.size()); remove_futures.reserve(part_names.size()); try { @@ -6296,13 +6298,14 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper( /// if zk session will be dropped for (const String & part_name : part_names) { - String part_path = fs::path(replica_path) / "parts" / part_name; - exists_futures.emplace_back(zookeeper->asyncExists(part_path)); + exists_paths.emplace_back(fs::path(replica_path) / "parts" / part_name); } + auto exists_results = zookeeper->exists(exists_paths); + for (size_t i = 0; i < part_names.size(); ++i) { - Coordination::ExistsResponse exists_resp = exists_futures[i].get(); + auto exists_resp = exists_results[i]; if (exists_resp.error == Coordination::Error::ZOK) { Coordination::Requests ops; From 28c7a224280701d30307e7bebdca2489e0cb130c Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Tue, 11 Oct 2022 11:59:48 +0200 Subject: [PATCH 066/252] address review comments --- src/Storages/HDFS/StorageHDFS.cpp | 13 ++++++++++--- tests/queries/0_stateless/02458_empty_hdfs_url.sql | 5 ++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index a1204877e00..804c424bf59 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -121,7 +121,16 @@ namespace std::pair getPathFromUriAndUriWithoutPath(const String & uri) { const size_t begin_of_path = uri.find('/', uri.find("//") + 2); - return {uri.substr(begin_of_path), uri.substr(0, begin_of_path)}; + + auto pos = url.find("//"); + if (pos != std::string::npos && std::advance(pos, 2) != url.end()) + { + pos = find('/', pos + 2); + if (pos != std::string::npos ) + return {uri.substr(pos), uri.substr(0, pos)}; + } + + throw Exception("Storage HDFS requires valid URL to be set, empty string provided", ErrorCodes::BAD_ARGUMENTS); } std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map * last_mod_times = nullptr) @@ -712,8 +721,6 @@ void registerStorageHDFS(StorageFactory & factory) engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); String url = checkAndGetLiteralArgument(engine_args[0], "url"); - if (url.empty()) - throw Exception("Storage HDFS requires valid URL to be set, empty string provided", ErrorCodes::BAD_ARGUMENTS); String format_name = "auto"; if (engine_args.size() > 1) diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.sql b/tests/queries/0_stateless/02458_empty_hdfs_url.sql index 98e81252fc6..6cbd402c4b2 100644 --- a/tests/queries/0_stateless/02458_empty_hdfs_url.sql +++ b/tests/queries/0_stateless/02458_empty_hdfs_url.sql @@ -1,2 +1,5 @@ -- Tags: no-fasttest -SELECT * FROM hdfsCluster('test_cluster', '', 'TSV'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file +SELECT * FROM hdfsCluster('test_cluster', '', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_cluster', ' ', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_cluster', '/', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_cluster', 'http/', 'TSV'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file From 3201c08bb566228c8b4aa3414745fd726baf7086 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Tue, 11 Oct 2022 13:54:03 +0200 Subject: [PATCH 067/252] style --- src/Storages/HDFS/StorageHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 804c424bf59..98ee578493b 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -126,7 +126,7 @@ namespace if (pos != std::string::npos && std::advance(pos, 2) != url.end()) { pos = find('/', pos + 2); - if (pos != std::string::npos ) + if (pos != std::string::npos) return {uri.substr(pos), uri.substr(0, pos)}; } From f3833e3a536242a7fba875870bc8df912adc5b9c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 11 Oct 2022 15:50:06 +0200 Subject: [PATCH 068/252] Introduce StaticDirectoryIterator Signed-off-by: Azat Khuzhin --- .../ObjectStorages/StaticDirectoryIterator.h | 32 +++++++++++++++++++ ...etadataStorageFromStaticFilesWebServer.cpp | 25 ++------------- 2 files changed, 35 insertions(+), 22 deletions(-) create mode 100644 src/Disks/ObjectStorages/StaticDirectoryIterator.h diff --git a/src/Disks/ObjectStorages/StaticDirectoryIterator.h b/src/Disks/ObjectStorages/StaticDirectoryIterator.h new file mode 100644 index 00000000000..891bdb688f0 --- /dev/null +++ b/src/Disks/ObjectStorages/StaticDirectoryIterator.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class StaticDirectoryIterator final : public IDirectoryIterator +{ +public: + explicit StaticDirectoryIterator(std::vector && dir_file_paths_) + : dir_file_paths(std::move(dir_file_paths_)) + , iter(dir_file_paths.begin()) + {} + + void next() override { ++iter; } + + bool isValid() const override { return iter != dir_file_paths.end(); } + + std::string path() const override { return iter->string(); } + + std::string name() const override { return iter->filename(); } + +private: + std::vector dir_file_paths; + std::vector::iterator iter; +}; + +} diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp index f13a51ddc03..06e36a2ddd8 100644 --- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp @@ -1,5 +1,6 @@ #include "MetadataStorageFromStaticFilesWebServer.h" #include +#include #include #include #include @@ -16,26 +17,6 @@ namespace ErrorCodes extern const int NETWORK_ERROR; } -class DiskWebServerDirectoryIterator final : public IDirectoryIterator -{ -public: - explicit DiskWebServerDirectoryIterator(std::vector && dir_file_paths_) - : dir_file_paths(std::move(dir_file_paths_)), iter(dir_file_paths.begin()) {} - - void next() override { ++iter; } - - bool isValid() const override { return iter != dir_file_paths.end(); } - - String path() const override { return iter->string(); } - - String name() const override { return iter->filename(); } - -private: - std::vector dir_file_paths; - std::vector::iterator iter; -}; - - MetadataStorageFromStaticFilesWebServer::MetadataStorageFromStaticFilesWebServer( const WebObjectStorage & object_storage_) : object_storage(object_storage_) @@ -169,7 +150,7 @@ DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(c if (!initializeIfNeeded(path)) { - return std::make_unique(std::move(dir_file_paths)); + return std::make_unique(std::move(dir_file_paths)); } assertExists(path); @@ -181,7 +162,7 @@ DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(c } LOG_TRACE(object_storage.log, "Iterate directory {} with {} files", path, dir_file_paths.size()); - return std::make_unique(std::move(dir_file_paths)); + return std::make_unique(std::move(dir_file_paths)); } std::string MetadataStorageFromStaticFilesWebServer::readFileToString(const std::string &) const From 688edafcc67e59bd3b5d4ab3fcf115d5a0fc3012 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 11 Oct 2022 17:21:05 +0200 Subject: [PATCH 069/252] Disks/ObjectStorages: add comments for some classes Signed-off-by: Azat Khuzhin --- src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h | 1 + src/Disks/ObjectStorages/LocalObjectStorage.h | 1 + src/Disks/ObjectStorages/MetadataStorageFromDisk.h | 2 ++ 3 files changed, 4 insertions(+) diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h index b1c8340ef1b..65cf012ddab 100644 --- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h @@ -9,6 +9,7 @@ namespace DB { +/// Store metadata in the disk itself. class FakeMetadataStorageFromDisk final : public IMetadataStorage { private: diff --git a/src/Disks/ObjectStorages/LocalObjectStorage.h b/src/Disks/ObjectStorages/LocalObjectStorage.h index 644c5249d8f..0e4c71b4a47 100644 --- a/src/Disks/ObjectStorages/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/LocalObjectStorage.h @@ -12,6 +12,7 @@ class Logger; namespace DB { +/// Treat local disk as an object storage (for interface compatibility). class LocalObjectStorage : public IObjectStorage { public: diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 8c1fb6edd14..b06a2a41f2b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -10,6 +10,8 @@ namespace DB { +/// Store metadata on a separate disk +/// (used for object storages, like S3 and related). class MetadataStorageFromDisk final : public IMetadataStorage { private: From daaf01c344c0509ca5e74fccb5d0f23fa5ae5b53 Mon Sep 17 00:00:00 2001 From: xinhuitian Date: Wed, 12 Oct 2022 12:18:17 +0800 Subject: [PATCH 070/252] early return --- src/Storages/StorageMergeTree.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index aea853b6c39..c721f39043a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -993,14 +993,6 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( const StorageMetadataPtr & metadata_snapshot, String * /* disable_reason */, TableLockHolder & /* table_lock_holder */, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) { - size_t max_ast_elements = getContext()->getSettingsRef().max_expanded_ast_elements; - - auto future_part = std::make_shared(); - if (storage_settings.get()->assign_part_uuids) - future_part->uuid = UUIDHelpers::generateV4(); - - CurrentlyMergingPartsTaggerPtr tagger; - if (current_mutations_by_version.empty()) return {}; @@ -1014,6 +1006,14 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( return {}; } + size_t max_ast_elements = getContext()->getSettingsRef().max_expanded_ast_elements; + + auto future_part = std::make_shared(); + if (storage_settings.get()->assign_part_uuids) + future_part->uuid = UUIDHelpers::generateV4(); + + CurrentlyMergingPartsTaggerPtr tagger; + auto mutations_end_it = current_mutations_by_version.end(); for (const auto & part : getDataPartsVectorForInternalUsage()) { @@ -1152,7 +1152,8 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign return false; merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, nullptr, share_lock, lock, txn); - if (!merge_entry) + + if (!merge_entry && !current_mutations_by_version.empty()) mutate_entry = selectPartsToMutate(metadata_snapshot, nullptr, share_lock, lock); has_mutations = !current_mutations_by_version.empty(); From 74b32429e2c5ddc56531d6a7121aa9ca06ec1998 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 12 Oct 2022 11:31:57 +0200 Subject: [PATCH 071/252] no hdfs for arm --- tests/queries/0_stateless/02458_empty_hdfs_url.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.sql b/tests/queries/0_stateless/02458_empty_hdfs_url.sql index 6cbd402c4b2..7e91043255d 100644 --- a/tests/queries/0_stateless/02458_empty_hdfs_url.sql +++ b/tests/queries/0_stateless/02458_empty_hdfs_url.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-cpu-aarch64 SELECT * FROM hdfsCluster('test_cluster', '', 'TSV'); -- { serverError BAD_ARGUMENTS } SELECT * FROM hdfsCluster('test_cluster', ' ', 'TSV'); -- { serverError BAD_ARGUMENTS } SELECT * FROM hdfsCluster('test_cluster', '/', 'TSV'); -- { serverError BAD_ARGUMENTS } From 590ed7b640ef4fdcbdfa827538f50b0257b7b88e Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 12 Oct 2022 15:16:09 +0000 Subject: [PATCH 072/252] Add transforms for other arguments lke year, quarter etc --- src/Functions/dateDiff.cpp | 184 +++++++++++++++++++++++++++++++++++-- 1 file changed, 176 insertions(+), 8 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index ecb34c88dfb..82ad0322c4b 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -33,6 +33,102 @@ namespace ErrorCodes namespace { +struct ToExtendedRelativeYearNumImpl +{ + static constexpr auto name = "toExtendedRelativeYearNum"; + + static inline Int16 execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toYear(t); + } + static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toYear(static_cast(t)); + } + static inline Int16 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toYear(ExtendedDayNum(d)); + } + static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toYear(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToExtendedRelativeQuarterNumImpl +{ + static constexpr auto name = "toExtendedRelativeQuarterNum"; + + static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeQuarterNum(t); + } + static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeQuarterNum(static_cast(t)); + } + static inline Int32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeQuarterNum(ExtendedDayNum(d)); + } + static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeQuarterNum(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToExtendedRelativeMonthNumImpl +{ + static constexpr auto name = "toExtendedRelativeMonthNum"; + + static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMonthNum(t); + } + static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMonthNum(static_cast(t)); + } + static inline Int32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMonthNum(ExtendedDayNum(d)); + } + static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMonthNum(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToExtendedRelativeWeekNumImpl +{ + static constexpr auto name = "toExtendedRelativeWeekNum"; + + static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeWeekNum(t); + } + static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeWeekNum(static_cast(t)); + } + static inline Int32 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeWeekNum(ExtendedDayNum(d)); + } + static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeWeekNum(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + struct ToExtendedRelativeDayNumImpl { static constexpr auto name = "toExtendedRelativeDayNum"; @@ -57,6 +153,79 @@ struct ToExtendedRelativeDayNumImpl using FactorTransform = ZeroTransform; }; +struct ToExtendedRelativeHourNumImpl +{ + static constexpr auto name = "toExtendedRelativeHourNum"; + + static inline Int64 execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeHourNum(t); + } + static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeHourNum(static_cast(t)); + } + static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeHourNum(ExtendedDayNum(d)); + } + static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeHourNum(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToExtendedRelativeMinuteNumImpl +{ + static constexpr auto name = "toExtendedRelativeMinuteNum"; + + static inline Int64 execute(Int64 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMinuteNum(t); + } + static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMinuteNum(static_cast(t)); + } + static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMinuteNum(ExtendedDayNum(d)); + } + static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.toRelativeMinuteNum(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToExtendedRelativeSecondNumImpl +{ + static constexpr auto name = "toExtendedRelativeSecondNum"; + + static inline Int64 execute(Int64 t, const DateLUTImpl &) + { + return t; + } + static inline UInt32 execute(UInt32 t, const DateLUTImpl &) + { + return t; + } + static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) + { + return time_zone.fromDayNum(ExtendedDayNum(d)); + } + static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return time_zone.fromDayNum(DayNum(d)); + } + + using FactorTransform = ZeroTransform; +}; + + /** dateDiff('unit', t1, t2, [timezone]) * t1 and t2 can be Date or DateTime * @@ -129,22 +298,21 @@ public: const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "yy" || unit == "yyyy") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "quarter" || unit == "qq" || unit == "q") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "month" || unit == "mm" || unit == "m") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "week" || unit == "wk" || unit == "ww") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "day" || unit == "dd" || unit == "d") - // dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "hour" || unit == "hh" || unit == "h") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "minute" || unit == "mi" || unit == "n") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "second" || unit == "ss" || unit == "s") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else throw Exception("Function " + getName() + " does not support '" + unit + "' unit", ErrorCodes::BAD_ARGUMENTS); From e7ca30cd114b8bdd0b2a2409fbd558566cf44762 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Thu, 13 Oct 2022 10:10:13 +0200 Subject: [PATCH 073/252] fix typo --- src/Storages/HDFS/StorageHDFS.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 98ee578493b..d71cbe630b0 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -122,8 +122,8 @@ namespace { const size_t begin_of_path = uri.find('/', uri.find("//") + 2); - auto pos = url.find("//"); - if (pos != std::string::npos && std::advance(pos, 2) != url.end()) + auto pos = uri.find("//"); + if (pos != std::string::npos && std::advance(pos, 2) != uri.end()) { pos = find('/', pos + 2); if (pos != std::string::npos) From 13920f83d9b8f1de1136a396961643765faaa884 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 13 Oct 2022 08:24:51 +0000 Subject: [PATCH 074/252] Add test 02457_datediff_via_unix_epoch --- .../02457_datediff_via_unix_epoch.reference | 16 +++++++++++++ .../02457_datediff_via_unix_epoch.sql | 23 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference create mode 100644 tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference new file mode 100644 index 00000000000..c2498781ac7 --- /dev/null +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference @@ -0,0 +1,16 @@ +year 1 +year 1 +quarter 1 +quarter 1 +month 1 +month 1 +week 1 +week 1 +day 11 +day 11 +hour 240 +hour 240 +minute 1440 +minute 20 +second 86400 +second 1200 diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql new file mode 100644 index 00000000000..796b4cc6e8f --- /dev/null +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql @@ -0,0 +1,23 @@ +select 'year', date_diff('year', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'year', date_diff('year', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'quarter', date_diff('quarter', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'quarter', date_diff('quarter', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'month', date_diff('month', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'month', date_diff('month', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'week', date_diff('week', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'week', date_diff('week', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'day', date_diff('day', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'day', date_diff('day', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'hour', date_diff('hour', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'hour', date_diff('hour', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); + +select 'minute', date_diff('minute', toDate32('1969-12-31'), toDate32('1970-01-01')); +select 'minute', date_diff('minute', toDateTime64('1969-12-31 23:50:00.000', 3), toDateTime64('1970-01-01 00:10:00.000', 3)); + +select 'second', date_diff('second', toDate32('1969-12-31'), toDate32('1970-01-01')); +select 'second', date_diff('second', toDateTime64('1969-12-31 23:50:00.000', 3), toDateTime64('1970-01-01 00:10:00.000', 3)); From 986c6fbaaa85d43cbaafe25b59bc11b434ecebbc Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 13 Oct 2022 08:57:38 +0000 Subject: [PATCH 075/252] Set UTC for 02457_datediff_via_unix_epoch --- .../02457_datediff_via_unix_epoch.sql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql index 796b4cc6e8f..b8c76626ec8 100644 --- a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql @@ -1,23 +1,23 @@ select 'year', date_diff('year', toDate32('1969-12-25'), toDate32('1970-01-05')); -select 'year', date_diff('year', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); +select 'year', date_diff('year', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); select 'quarter', date_diff('quarter', toDate32('1969-12-25'), toDate32('1970-01-05')); -select 'quarter', date_diff('quarter', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); +select 'quarter', date_diff('quarter', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); select 'month', date_diff('month', toDate32('1969-12-25'), toDate32('1970-01-05')); -select 'month', date_diff('month', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); +select 'month', date_diff('month', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); select 'week', date_diff('week', toDate32('1969-12-25'), toDate32('1970-01-05')); -select 'week', date_diff('week', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); +select 'week', date_diff('week', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); select 'day', date_diff('day', toDate32('1969-12-25'), toDate32('1970-01-05')); -select 'day', date_diff('day', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); +select 'day', date_diff('day', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); select 'hour', date_diff('hour', toDate32('1969-12-25'), toDate32('1970-01-05')); -select 'hour', date_diff('hour', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); +select 'hour', date_diff('hour', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); select 'minute', date_diff('minute', toDate32('1969-12-31'), toDate32('1970-01-01')); -select 'minute', date_diff('minute', toDateTime64('1969-12-31 23:50:00.000', 3), toDateTime64('1970-01-01 00:10:00.000', 3)); +select 'minute', date_diff('minute', toDateTime64('1969-12-31 23:50:00.000', 3, 'UTC'), toDateTime64('1970-01-01 00:10:00.000', 3, 'UTC')); select 'second', date_diff('second', toDate32('1969-12-31'), toDate32('1970-01-01')); -select 'second', date_diff('second', toDateTime64('1969-12-31 23:50:00.000', 3), toDateTime64('1970-01-01 00:10:00.000', 3)); +select 'second', date_diff('second', toDateTime64('1969-12-31 23:50:00.000', 3, 'UTC'), toDateTime64('1970-01-01 00:10:00.000', 3, 'UTC')); From bbf88b66a0c5527456251d096782f0d9e307efe1 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Thu, 13 Oct 2022 11:42:35 +0200 Subject: [PATCH 076/252] more --- src/Storages/HDFS/StorageHDFS.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index d71cbe630b0..208a8aa6845 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -120,17 +120,15 @@ namespace std::pair getPathFromUriAndUriWithoutPath(const String & uri) { - const size_t begin_of_path = uri.find('/', uri.find("//") + 2); - auto pos = uri.find("//"); - if (pos != std::string::npos && std::advance(pos, 2) != uri.end()) + if (pos != std::string::npos && pos + 2 != uri.end()) { - pos = find('/', pos + 2); + pos = uri.find('/', pos + 2); if (pos != std::string::npos) return {uri.substr(pos), uri.substr(0, pos)}; } - throw Exception("Storage HDFS requires valid URL to be set, empty string provided", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Storage HDFS requires valid URL to be set", ErrorCodes::BAD_ARGUMENTS); } std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map * last_mod_times = nullptr) From 4cf29130545c48317f1d372e8e1b564fab03c307 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 13 Oct 2022 12:58:22 +0000 Subject: [PATCH 077/252] Fix message about allowed argument types --- src/Functions/dateDiff.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 82ad0322c4b..b7cd511a381 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -263,11 +263,11 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception("Second argument for function " + getName() + " must be Date or DateTime", + throw Exception("Second argument for function " + getName() + " must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) - throw Exception("Third argument for function " + getName() + " must be Date or DateTime", + throw Exception("Third argument for function " + getName() + " must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (arguments.size() == 4 && !isString(arguments[3])) From 14ba9fa51da3d505c3e040460600b07687400432 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 13 Oct 2022 13:35:14 +0000 Subject: [PATCH 078/252] Fix Date32 argument in dispatchConstForSecondColumn --- src/Functions/dateDiff.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index b7cd511a381..dabf70e2f6f 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -343,7 +343,7 @@ private: else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); else - throw Exception("Illegal column for first argument of function " + getName() + ", must be Date, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal column for first argument of function " + getName() + ", must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); } template @@ -369,7 +369,7 @@ private: else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); else - throw Exception("Illegal column for second argument of function " + getName() + ", must be Date or DateTime", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal column for second argument of function " + getName() + ", must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); } template @@ -382,10 +382,12 @@ private: constantVector(x, *y_vec_16, timezone_x, timezone_y, result); else if (const auto * y_vec_32 = checkAndGetColumn(&y)) constantVector(x, *y_vec_32, timezone_x, timezone_y, result); + else if (const auto * y_vec_32_s = checkAndGetColumn(&y)) + constantVector(x, *y_vec_32_s, timezone_x, timezone_y, result); else if (const auto * y_vec_64 = checkAndGetColumn(&y)) constantVector(x, *y_vec_64, timezone_x, timezone_y, result); else - throw Exception("Illegal column for second argument of function " + getName() + ", must be Date or DateTime", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal column for second argument of function " + getName() + ", must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); } template From 64ef83649d50a83d92cacc1c509672d5b616cc83 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 13 Oct 2022 13:38:32 +0000 Subject: [PATCH 079/252] Add 02458_datediff_date32 test --- .../02458_datediff_date32.reference | 169 ++++++++++++++++++ .../0_stateless/02458_datediff_date32.sql | 101 +++++++++++ 2 files changed, 270 insertions(+) create mode 100644 tests/queries/0_stateless/02458_datediff_date32.reference create mode 100644 tests/queries/0_stateless/02458_datediff_date32.sql diff --git a/tests/queries/0_stateless/02458_datediff_date32.reference b/tests/queries/0_stateless/02458_datediff_date32.reference new file mode 100644 index 00000000000..58551402dd3 --- /dev/null +++ b/tests/queries/0_stateless/02458_datediff_date32.reference @@ -0,0 +1,169 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); +86400 +SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); +1440 +SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); +24 +SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); +1 +SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); +1 +SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); +1 +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); +1 +SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); +1 +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +86400 +SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +1440 +SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +24 +SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +1 +SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); +1 +SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); +1 +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); +1 +SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); +1 +-- DateTime64 vs Date32 +SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +86400 +SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +1440 +SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +24 +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +1 +SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08')); +1 +SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01')); +1 +SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01')); +1 +SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01')); +1 +-- With DateTime +-- Date32 vs DateTime +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +86400 +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +1440 +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +24 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +1 +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC')); +1 +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC')); +1 +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC')); +1 +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC')); +1 +-- DateTime vs Date32 +SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +86400 +SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +1440 +SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +24 +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +1 +SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC')); +1 +SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC')); +1 +SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC')); +1 +SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC')); +1 +-- With Date +-- Date32 vs Date +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +86400 +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +1440 +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +24 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +1 +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC')); +1 +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC')); +1 +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC')); +1 +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC')); +1 +-- Date vs Date32 +SELECT dateDiff('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +86400 +SELECT dateDiff('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +1440 +SELECT dateDiff('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +24 +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +1 +SELECT dateDiff('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC')); +1 +SELECT dateDiff('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC')); +1 +SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC')); +1 +SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC')); +1 +-- Const vs non-const columns +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); +1 +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +1 +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02'))); +1 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); +1 +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); +1 +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); +1 +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); +1 +-- Non-const vs const columns +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); +1 +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +1 +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02')); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC')); +1 +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC')); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC')); +1 +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC')); +1 +-- Non-const vs non-const columns +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); +1 +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +1 +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02'))); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); +1 +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); +1 +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); +1 +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); +1 diff --git a/tests/queries/0_stateless/02458_datediff_date32.sql b/tests/queries/0_stateless/02458_datediff_date32.sql new file mode 100644 index 00000000000..dfe0d55ee97 --- /dev/null +++ b/tests/queries/0_stateless/02458_datediff_date32.sql @@ -0,0 +1,101 @@ +-- { echo } + +-- Date32 vs Date32 +SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); +SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); +SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); + +-- With DateTime64 +-- Date32 vs DateTime64 +SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); +SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); + +-- DateTime64 vs Date32 +SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08')); +SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01')); +SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01')); +SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01')); + +-- With DateTime +-- Date32 vs DateTime +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC')); +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC')); +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC')); +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC')); + +-- DateTime vs Date32 +SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC')); +SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC')); +SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC')); +SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC')); + +-- With Date +-- Date32 vs Date +SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC')); +SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC')); +SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC')); +SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC')); + +-- Date vs Date32 +SELECT dateDiff('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC')); +SELECT dateDiff('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC')); +SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC')); +SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC')); + +-- Const vs non-const columns +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); +SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); + +-- Non-const vs const columns +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC')); + +-- Non-const vs non-const columns +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); +SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); From 90f9bd24a376a13d7db190da19f1485ccb23add7 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 14 Oct 2022 08:02:38 +0000 Subject: [PATCH 080/252] Update documentation --- docs/en/sql-reference/functions/date-time-functions.md | 4 ++-- docs/ru/sql-reference/functions/date-time-functions.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 76f66db924f..5b814ab1b1f 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -671,9 +671,9 @@ Aliases: `dateDiff`, `DATE_DIFF`. - `quarter` - `year` -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 897c4b3e86a..97ca0915eb4 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -721,9 +721,9 @@ date_diff('unit', startdate, enddate, [timezone]) - `quarter` - `year` -- `startdate` — Ð¿ÐµÑ€Ð²Ð°Ñ Ð´Ð°Ñ‚Ð° или дата Ñо временем, ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ Ð²Ñ‹Ñ‡Ð¸Ñ‚Ð°ÐµÑ‚ÑÑ Ð¸Ð· `enddate`. [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md). +- `startdate` — Ð¿ÐµÑ€Ð²Ð°Ñ Ð´Ð°Ñ‚Ð° или дата Ñо временем, ÐºÐ¾Ñ‚Ð¾Ñ€Ð°Ñ Ð²Ñ‹Ñ‡Ð¸Ñ‚Ð°ÐµÑ‚ÑÑ Ð¸Ð· `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — Ð²Ñ‚Ð¾Ñ€Ð°Ñ Ð´Ð°Ñ‚Ð° или дата Ñо временем, из которой вычитаетÑÑ `startdate`. [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md). +- `enddate` — Ð²Ñ‚Ð¾Ñ€Ð°Ñ Ð´Ð°Ñ‚Ð° или дата Ñо временем, из которой вычитаетÑÑ `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [чаÑовой поÑÑ](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необÑзательно). ЕÑли Ñтот аргумент указан, то он применÑетÑÑ ÐºÐ°Ðº Ð´Ð»Ñ `startdate`, так и Ð´Ð»Ñ `enddate`. ЕÑли Ñтот аргумент не указан, то иÑпользуютÑÑ Ñ‡Ð°Ñовые поÑÑа аргументов `startdate` и `enddate`. ЕÑли чаÑовые поÑÑа аргументов `startdate` и `enddate` не Ñовпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). From 040d61e047ecfe634f1d0470a41671ae5dc794c3 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 14 Oct 2022 08:13:04 +0000 Subject: [PATCH 081/252] Use {} in error message formatting --- src/Functions/dateDiff.cpp | 46 +++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index dabf70e2f6f..773340a7758 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -254,25 +254,30 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() != 3 && arguments.size() != 4) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 3 or 4", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", + getName(), toString(arguments.size())); if (!isString(arguments[0])) - throw Exception("First argument for function " + getName() + " (unit) must be String", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} (unit) must be String", + getName()); if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1])) - throw Exception("Second argument for function " + getName() + " must be Date, Date32, DateTime or DateTime64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument for function {} must be Date, Date32, DateTime or DateTime64", + getName()); if (!isDate(arguments[2]) && !isDate32(arguments[2]) && !isDateTime(arguments[2]) && !isDateTime64(arguments[2])) - throw Exception("Third argument for function " + getName() + " must be Date, Date32, DateTime or DateTime64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument for function {} must be Date, Date32, DateTime or DateTime64", + getName() + ); if (arguments.size() == 4 && !isString(arguments[3])) - throw Exception("Fourth argument for function " + getName() + " (timezone) must be String", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Fourth argument for function {} (timezone) must be String", + getName()); return std::make_shared(); } @@ -284,7 +289,9 @@ public: { const auto * unit_column = checkAndGetColumnConst(arguments[0].column.get()); if (!unit_column) - throw Exception("First argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "First argument for function {} must be constant String", + getName()); String unit = Poco::toLower(unit_column->getValue()); @@ -314,7 +321,8 @@ public: else if (unit == "second" || unit == "ss" || unit == "s") dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); else - throw Exception("Function " + getName() + " does not support '" + unit + "' unit", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} does not support '{}' unit", getName(), unit); return res; } @@ -343,7 +351,9 @@ private: else if (const auto * x_const_64 = checkAndGetColumnConst(&x)) dispatchConstForSecondColumn(x_const_64->getValue>(), y, timezone_x, timezone_y, result); else - throw Exception("Illegal column for first argument of function " + getName() + ", must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64", + getName()); } template @@ -369,7 +379,9 @@ private: else if (const auto * y_const_64 = checkAndGetColumnConst(&y)) vectorConstant(x, y_const_64->getValue>(), timezone_x, timezone_y, result); else - throw Exception("Illegal column for second argument of function " + getName() + ", must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", + getName()); } template @@ -387,7 +399,9 @@ private: else if (const auto * y_vec_64 = checkAndGetColumn(&y)) constantVector(x, *y_vec_64, timezone_x, timezone_y, result); else - throw Exception("Illegal column for second argument of function " + getName() + ", must be Date, Date32, DateTime or DateTime64", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64", + getName()); } template From 8830f0608df2da4df264d65b0e37872b6b6ac212 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 11 Oct 2022 17:28:20 +0200 Subject: [PATCH 082/252] Support BACKUP to S3 with as-is path/data structure Right now backup to S3 does not make a lot of sense, since: - it has random names, and to decoding them - requires metadata from local disk (/var/lib/disks/DISK/BACKUP_NAME) - or send_metadata (but it is also tricky even with it) So this patch adds simpler interface for S3, it is only suitable for BACKUP/RESTORE, so don't try to use it for MergeTree engine. It is done by adding separate disk - `s3_plain` for this, that: - does not support any extended features, like renames/hardlinks/attrs/... (so basically everything that MergeTree requires) - only write/read/unlink/list files Signed-off-by: Azat Khuzhin --- docs/en/operations/backup.md | 49 ++++ .../MetadataStorageFromPlainObjectStorage.cpp | 229 ++++++++++++++++++ .../MetadataStorageFromPlainObjectStorage.h | 129 ++++++++++ src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 32 ++- .../ObjectStorages/S3/registerDiskS3.cpp | 34 ++- .../test_backup_restore_s3/__init__.py | 0 .../configs/storage_conf.xml | 42 ++++ .../test_backup_restore_s3/test.py | 71 ++++++ 8 files changed, 576 insertions(+), 10 deletions(-) create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h create mode 100644 tests/integration/test_backup_restore_s3/__init__.py create mode 100644 tests/integration/test_backup_restore_s3/configs/storage_conf.xml create mode 100644 tests/integration/test_backup_restore_s3/test.py diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index d26d8f27820..a755e3ef9a6 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -171,6 +171,55 @@ end_time: 2022-08-30 09:21:46 1 row in set. Elapsed: 0.002 sec. ``` +## Backup to S3 + +It is possible to `BACKUP`/`RESTORE` to S3, but this disk should be configured +in a proper way, since by default you will need to backup metadata from local +disk to make backup full. + +First of all, you need to configure S3 disk in a special way: + +```xml + + + + + s3_plain + + + + + + + + +
+ s3 +
+
+
+
+
+ + + s3_plain + +
+``` + +And then `BACKUP`/`RESTORE` as usual: + +```sql +BACKUP TABLE data TO Disk('s3_plain', 'cloud_backup'); +RESTORE TABLE data AS data_restored FROM Disk('s3_plain', 'cloud_backup'); +``` + +:::note +But keep in mind that: +- This disk should not be used for `MergeTree` itself, only for `BACKUP`/`RESTORE` +- It has excessive API calls +::: + ## Alternatives ClickHouse stores data on disk, and there are many ways to backup disks. These are some alternatives that have been used in the past, and that may fit in well in your environment. diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp new file mode 100644 index 00000000000..35cd3be15d2 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -0,0 +1,229 @@ +#include "MetadataStorageFromPlainObjectStorage.h" +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; +} + +MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage( + ObjectStoragePtr object_storage_, + const std::string & object_storage_root_path_) + : object_storage(object_storage_) + , object_storage_root_path(object_storage_root_path_) +{ +} + +MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction() const +{ + return std::make_shared(*this); +} + +const std::string & MetadataStorageFromPlainObjectStorage::getPath() const +{ + return object_storage_root_path; +} + +bool MetadataStorageFromPlainObjectStorage::exists(const std::string & path) const +{ + auto object = StoredObject::create(*object_storage, fs::path(object_storage_root_path) / path); + return object_storage->exists(object); +} + +bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) const +{ + /// NOTE: This check is inaccurate and has excessive API calls + return !isDirectory(path) && exists(path); +} + +bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const +{ + std::string directory = path; + trimRight(directory); + directory += "/"; + + /// NOTE: This check is far from ideal, since it work only if the directory + /// really has files, and has excessive API calls + RelativePathsWithSize children; + object_storage->listPrefix(directory, children); + return !children.empty(); +} + +Poco::Timestamp MetadataStorageFromPlainObjectStorage::getLastModified(const std::string &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getLastModified is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +struct stat MetadataStorageFromPlainObjectStorage::stat(const std::string &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "stat is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +time_t MetadataStorageFromPlainObjectStorage::getLastChanged(const std::string &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getLastChanged is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const +{ + RelativePathsWithSize children; + object_storage->listPrefix(path, children); + if (children.empty()) + return 0; + if (children.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "listPrefix() return multiple paths ({}) for {}", children.size(), path); + return children.front().bytes_size; +} + +std::vector MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const +{ + RelativePathsWithSize children; + object_storage->listPrefix(path, children); + + std::vector result; + for (const auto & path_size : children) + { + result.push_back(path_size.relative_path); + } + return result; +} + +DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(const std::string & path) const +{ + /// NOTE: this is not required for BACKUP/RESTORE, but this is a first step + /// towards MergeTree on plain S3. + auto paths = listDirectory(path); + std::vector fs_paths(paths.begin(), paths.end()); + return std::make_unique(std::move(fs_paths)); +} + +std::string MetadataStorageFromPlainObjectStorage::readFileToString(const std::string &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "readFileToString is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +std::unordered_map MetadataStorageFromPlainObjectStorage::getSerializedMetadata(const std::vector &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getSerializedMetadata is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std::string & path) const +{ + std::string blob_name = object_storage->generateBlobNameForPath(path); + + std::string object_path = fs::path(object_storage_root_path) / blob_name; + size_t object_size = getFileSize(object_path); + + auto object = StoredObject::create(*object_storage, object_path, object_size, /* exists */true); + return {std::move(object)}; +} + +uint32_t MetadataStorageFromPlainObjectStorage::getHardlinkCount(const std::string &) const +{ + return 1; +} + +const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const +{ + return metadata_storage; +} + +void MetadataStorageFromPlainObjectStorageTransaction::writeStringToFile(const std::string &, const std::string & /* data */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "writeStringToFile is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::setLastModified(const std::string &, const Poco::Timestamp &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setLastModified is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path) +{ + auto object = StoredObject::create(*metadata_storage.object_storage, fs::path(metadata_storage.object_storage_root_path) / path); + metadata_storage.object_storage->removeObject(object); +} + +void MetadataStorageFromPlainObjectStorageTransaction::removeRecursive(const std::string &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "removeRecursive is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std::string &) +{ + /// Noop. It is an Object Storage not a filesystem. +} + +void MetadataStorageFromPlainObjectStorageTransaction::createDirectoryRecursive(const std::string &) +{ + /// Noop. It is an Object Storage not a filesystem. +} + +void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std::string &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "removeDirectory is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::moveFile(const std::string & /* path_from */, const std::string & /* path_to */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "moveFile is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::moveDirectory(const std::string & /* path_from */, const std::string & /* path_to */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "moveDirectory is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::replaceFile(const std::string & /* path_from */, const std::string & /* path_to */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "replaceFile is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::chmod(const String &, mode_t) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "chmod is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::setReadOnly(const std::string &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setReadOnly is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::createHardLink(const std::string & /* path_from */, const std::string & /* path_to */) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "createHardLink is not implemented for MetadataStorageFromPlainObjectStorage"); +} + +void MetadataStorageFromPlainObjectStorageTransaction::createEmptyMetadataFile(const std::string &) +{ + /// Noop, no separate metadata. +} + +void MetadataStorageFromPlainObjectStorageTransaction::createMetadataFile( + const std::string &, const std::string & /* blob_name */, uint64_t /* size_in_bytes */) +{ + /// Noop, no separate metadata. +} + +void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata( + const std::string &, const std::string & /* blob_name */, uint64_t /* size_in_bytes */) +{ + /// Noop, local metadata files is only one file, it is the metadata file itself. +} + +void MetadataStorageFromPlainObjectStorageTransaction::unlinkMetadata(const std::string &) +{ + /// Noop, no separate metadata. +} + +} diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h new file mode 100644 index 00000000000..bd993918413 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -0,0 +1,129 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/// Object storage is used as a filesystem, in a limited form: +/// - no directory concept, files only +/// - no stat/chmod/... +/// - no move/... +/// - limited unlink support +/// +/// Also it has excessive API calls. +/// +/// It is used to allow BACKUP/RESTORE to ObjectStorage (S3/...) with the same +/// structure as on disk MergeTree, and does not requires metadata from local +/// disk to restore. +class MetadataStorageFromPlainObjectStorage final : public IMetadataStorage +{ +private: + friend class MetadataStorageFromPlainObjectStorageTransaction; + + ObjectStoragePtr object_storage; + std::string object_storage_root_path; + +public: + MetadataStorageFromPlainObjectStorage( + ObjectStoragePtr object_storage_, + const std::string & object_storage_root_path_); + + MetadataTransactionPtr createTransaction() const override; + + const std::string & getPath() const override; + + bool exists(const std::string & path) const override; + + bool isFile(const std::string & path) const override; + + bool isDirectory(const std::string & path) const override; + + uint64_t getFileSize(const String & path) const override; + + Poco::Timestamp getLastModified(const std::string & path) const override; + + time_t getLastChanged(const std::string & path) const override; + + bool supportsChmod() const override { return false; } + + bool supportsStat() const override { return false; } + + struct stat stat(const String & path) const override; + + std::vector listDirectory(const std::string & path) const override; + + DirectoryIteratorPtr iterateDirectory(const std::string & path) const override; + + std::string readFileToString(const std::string & path) const override; + + std::unordered_map getSerializedMetadata(const std::vector & file_paths) const override; + + uint32_t getHardlinkCount(const std::string & path) const override; + + DiskPtr getDisk() const { return {}; } + + StoredObjects getStorageObjects(const std::string & path) const override; + + std::string getObjectStorageRootPath() const override { return object_storage_root_path; } +}; + +class MetadataStorageFromPlainObjectStorageTransaction final : public IMetadataTransaction +{ +private: + const MetadataStorageFromPlainObjectStorage & metadata_storage; + + std::vector operations; +public: + MetadataStorageFromPlainObjectStorageTransaction(const MetadataStorageFromPlainObjectStorage & metadata_storage_) + : metadata_storage(metadata_storage_) + {} + + ~MetadataStorageFromPlainObjectStorageTransaction() override = default; + + const IMetadataStorage & getStorageForNonTransactionalReads() const final; + + void commit() final {} + + void writeStringToFile(const std::string & path, const std::string & data) override; + + void createEmptyMetadataFile(const std::string & path) override; + + void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override; + + void addBlobToMetadata(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override; + + void setLastModified(const std::string & path, const Poco::Timestamp & timestamp) override; + + bool supportsChmod() const override { return false; } + + void chmod(const String & path, mode_t mode) override; + + void setReadOnly(const std::string & path) override; + + void unlinkFile(const std::string & path) override; + + void createDirectory(const std::string & path) override; + + void createDirectoryRecursive(const std::string & path) override; + + void removeDirectory(const std::string & path) override; + + void removeRecursive(const std::string & path) override; + + void createHardLink(const std::string & path_from, const std::string & path_to) override; + + void moveFile(const std::string & path_from, const std::string & path_to) override; + + void moveDirectory(const std::string & path_from, const std::string & path_to) override; + + void replaceFile(const std::string & path_from, const std::string & path_to) override; + + void unlinkMetadata(const std::string & path) override; +}; + +} diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index ce5235d4323..a193653db9a 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -43,8 +43,11 @@ struct S3ObjectStorageSettings class S3ObjectStorage : public IObjectStorage { -public: +private: + friend class S3PlainObjectStorage; + S3ObjectStorage( + const char * logger_name, std::unique_ptr && client_, std::unique_ptr && s3_settings_, String version_id_, @@ -61,6 +64,15 @@ public: data_source_description.description = connection_string; data_source_description.is_cached = false; data_source_description.is_encrypted = false; + + log = &Poco::Logger::get(logger_name); + } + +public: + template + S3ObjectStorage(std::unique_ptr && client_, Args && ...args) + : S3ObjectStorage("S3ObjectStorage", std::move(client_), std::forward(args)...) + { } DataSourceDescription getDataSourceDescription() const override @@ -181,10 +193,26 @@ private: const String version_id; - Poco::Logger * log = &Poco::Logger::get("S3ObjectStorage"); + Poco::Logger * log; DataSourceDescription data_source_description; }; +/// Do not encode keys, store as-is, and do not require separate disk for metadata. +/// But because of this does not support renames/hardlinks/attrs/... +/// +/// NOTE: This disk has excessive API calls. +class S3PlainObjectStorage : public S3ObjectStorage +{ +public: + std::string generateBlobNameForPath(const std::string & path) override { return path; } + std::string getName() const override { return "S3PlainObjectStorage"; } + + template + S3PlainObjectStorage(Args && ...args) + : S3ObjectStorage("S3PlainObjectStorage", std::forward(args)...) + {} +}; + } #endif diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 62cacde3f14..e09aef22122 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -122,15 +123,31 @@ void registerDiskS3(DiskFactory & factory) if (uri.key.back() != '/') throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); - auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); - - auto metadata_storage = std::make_shared(metadata_disk, uri.key); S3Capabilities s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); + std::shared_ptr s3_storage; - auto s3_storage = std::make_unique( - getClient(config, config_prefix, context), - getSettings(config, config_prefix, context), - uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); + String type = config.getString(config_prefix + ".type"); + chassert(type == "s3" || type == "s3_plain"); + + MetadataStoragePtr metadata_storage; + if (type == "s3_plain") + { + s3_storage = std::make_shared( + getClient(config, config_prefix, context), + getSettings(config, config_prefix, context), + uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); + metadata_storage = std::make_shared(s3_storage, uri.key); + } + else + { + s3_storage = std::make_shared( + getClient(config, config_prefix, context), + getSettings(config, config_prefix, context), + uri.version_id, s3_capabilities, uri.bucket, uri.endpoint); + + auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); + metadata_storage = std::make_shared(metadata_disk, uri.key); + } bool skip_access_check = config.getBool(config_prefix + ".skip_access_check", false); @@ -156,7 +173,7 @@ void registerDiskS3(DiskFactory & factory) std::shared_ptr s3disk = std::make_shared( name, uri.key, - "DiskS3", + type == "s3" ? "DiskS3" : "DiskS3Plain", std::move(metadata_storage), std::move(s3_storage), send_metadata, @@ -177,6 +194,7 @@ void registerDiskS3(DiskFactory & factory) return std::make_shared(disk_result); }; factory.registerDiskType("s3", creator); + factory.registerDiskType("s3_plain", creator); } } diff --git a/tests/integration/test_backup_restore_s3/__init__.py b/tests/integration/test_backup_restore_s3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_backup_restore_s3/configs/storage_conf.xml b/tests/integration/test_backup_restore_s3/configs/storage_conf.xml new file mode 100644 index 00000000000..0402be720c4 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/storage_conf.xml @@ -0,0 +1,42 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + 33554432 + + + s3_plain + http://minio1:9001/root/data/ + minio + minio123 + 33554432 + + + local + / + + + + + +
+ s3 +
+
+
+
+
+ + + default + + s3 + s3_plain + + /backups/ + +
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py new file mode 100644 index 00000000000..e18b3800fc0 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/test.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +# pylint: disable=unused-argument + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/storage_conf.xml"], + with_minio=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.mark.parametrize( + "storage_policy,to_disk", + [ + pytest.param( + "default", + "default", + id="from_local_to_local", + ), + pytest.param( + "s3", + "default", + id="from_s3_to_local", + ), + pytest.param( + "default", + "s3", + id="from_local_to_s3", + ), + pytest.param( + "s3", + "s3_plain", + id="from_s3_to_s3_plain", + ), + pytest.param( + "default", + "s3_plain", + id="from_local_to_s3_plain", + ), + ], +) +def test_backup_restore(start_cluster, storage_policy, to_disk): + backup_name = storage_policy + "_" + to_disk + node.query( + f""" + DROP TABLE IF EXISTS data NO DELAY; + CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}'; + INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT 1000; + BACKUP TABLE data TO Disk('{to_disk}', '{backup_name}'); + RESTORE TABLE data AS data_restored FROM Disk('{to_disk}', '{backup_name}'); + SELECT throwIf( + (SELECT groupArray(tuple(*)) FROM data) != + (SELECT groupArray(tuple(*)) FROM data_restored), + 'Data does not matched after BACKUP/RESTORE' + ); + DROP TABLE data NO DELAY; + DROP TABLE data_restored NO DELAY; + """ + ) From ad255206f4fd1e38c7a7349a9a08063f4c7223be Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 10 Oct 2022 11:26:07 +0000 Subject: [PATCH 083/252] Exclude comments from style-check defined extern --- src/Functions/throwIf.cpp | 9 ++------- src/Processors/Transforms/ArrayJoinTransform.cpp | 5 ----- utils/check-style/check-style | 4 +++- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index 692faf1883c..357c5e0651a 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -22,11 +22,6 @@ namespace ErrorCodes namespace { -/// The regex-based code style check script in CI complains when it sees "ErrorCodes:: ErrorCode" (space added to avoid another match). -/// Because this expression is only used in this file, don't add some suppression mechanism to the already complex style checker, instead -/// work around by creating a namespace alias. -namespace ErrorCodeAlias = ErrorCodes; - /// Throw an exception if the argument is non zero. class FunctionThrowIf : public IFunction { @@ -93,7 +88,7 @@ public: custom_message = message_column->getValue(); } - std::optional custom_error_code; + std::optional custom_error_code; if (allow_custom_error_code_argument && arguments.size() == 3) { if (!isColumnConst(*(arguments[2].column))) @@ -125,7 +120,7 @@ public: private: template - ColumnPtr execute(const IColumn * in_untyped, const std::optional & message, const std::optional & error_code) const + ColumnPtr execute(const IColumn * in_untyped, const std::optional & message, const std::optional & error_code) const { const auto * in = checkAndGetColumn>(in_untyped); diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp index 9058d7df2a0..eea1469c7a6 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.cpp +++ b/src/Processors/Transforms/ArrayJoinTransform.cpp @@ -4,11 +4,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - Block ArrayJoinTransform::transformHeader(Block header, const ArrayJoinActionPtr & array_join) { array_join->execute(header); diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 772f48ad088..9c34328e919 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -87,6 +87,8 @@ EXTERN_TYPES_EXCLUDES=( CurrentMetrics::Metric CurrentMetrics::values CurrentMetrics::Value + + ErrorCodes::ErrorCode ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} @@ -114,7 +116,7 @@ for extern_type in ${!EXTERN_TYPES[@]}; do find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::[$allowed_chars]+" } | while read file; do - grep -P "$extern_type::[$allowed_chars]+" $file | sed -r -e "s/^.*?$extern_type::([$allowed_chars]+).*?$/\1/" | while read val; do + grep -P "$extern_type::[$allowed_chars]+" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::([$allowed_chars]+).*?$/\1/" | while read val; do if ! grep -q "extern const $type_of_extern $val" $file; then if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then echo "$extern_type::$val is used in file $file but not defined" From 631b8e1abb0f866add8585f64b5e9708febb30c2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 14 Oct 2022 13:32:49 +0200 Subject: [PATCH 084/252] check-style: fix pattern of allowed chars for ErrorCodes checks Signed-off-by: Azat Khuzhin --- utils/check-style/check-style | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 9c34328e919..5805fcd1d43 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -60,7 +60,7 @@ EXTERN_TYPES[ErrorCodes]=int EXTERN_TYPES[ProfileEvents]=Event EXTERN_TYPES[CurrentMetrics]=Metric declare -A EXTERN_ALLOWED_CHARS -EXTERN_ALLOWED_CHARS[ErrorCodes]='_A-Z' +EXTERN_ALLOWED_CHARS[ErrorCodes]='_A-Za-z' EXTERN_ALLOWED_CHARS[ProfileEvents]='_A-Za-z' EXTERN_ALLOWED_CHARS[CurrentMetrics]='_A-Za-z' EXTERN_TYPES_EXCLUDES=( From 3361b27965ee4f5b2261eace62cbee54b3178f3f Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Fri, 14 Oct 2022 14:27:52 +0200 Subject: [PATCH 085/252] less --- src/Storages/HDFS/StorageHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 208a8aa6845..138c92ea62d 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -121,7 +121,7 @@ namespace std::pair getPathFromUriAndUriWithoutPath(const String & uri) { auto pos = uri.find("//"); - if (pos != std::string::npos && pos + 2 != uri.end()) + if (pos != std::string::npos && pos + 2 < uri.length()) { pos = uri.find('/', pos + 2); if (pos != std::string::npos) From 5a36b99933423c117a1223f1ffb46e1f460f215a Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 14 Oct 2022 12:29:09 +0000 Subject: [PATCH 086/252] Add UTC for toDate32 in tests --- .../02457_datediff_via_unix_epoch.sql | 16 ++--- .../02458_datediff_date32.reference | 66 +++++++++---------- .../0_stateless/02458_datediff_date32.sql | 66 +++++++++---------- 3 files changed, 74 insertions(+), 74 deletions(-) diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql index b8c76626ec8..036bb64a2d4 100644 --- a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql @@ -1,23 +1,23 @@ -select 'year', date_diff('year', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'year', date_diff('year', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); select 'year', date_diff('year', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); -select 'quarter', date_diff('quarter', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'quarter', date_diff('quarter', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); select 'quarter', date_diff('quarter', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); -select 'month', date_diff('month', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'month', date_diff('month', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); select 'month', date_diff('month', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); -select 'week', date_diff('week', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'week', date_diff('week', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); select 'week', date_diff('week', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); -select 'day', date_diff('day', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'day', date_diff('day', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); select 'day', date_diff('day', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); -select 'hour', date_diff('hour', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'hour', date_diff('hour', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); select 'hour', date_diff('hour', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); -select 'minute', date_diff('minute', toDate32('1969-12-31'), toDate32('1970-01-01')); +select 'minute', date_diff('minute', toDate32('1969-12-31', 'UTC'), toDate32('1970-01-01', 'UTC')); select 'minute', date_diff('minute', toDateTime64('1969-12-31 23:50:00.000', 3, 'UTC'), toDateTime64('1970-01-01 00:10:00.000', 3, 'UTC')); -select 'second', date_diff('second', toDate32('1969-12-31'), toDate32('1970-01-01')); +select 'second', date_diff('second', toDate32('1969-12-31', 'UTC'), toDate32('1970-01-01', 'UTC')); select 'second', date_diff('second', toDateTime64('1969-12-31 23:50:00.000', 3, 'UTC'), toDateTime64('1970-01-01 00:10:00.000', 3, 'UTC')); diff --git a/tests/queries/0_stateless/02458_datediff_date32.reference b/tests/queries/0_stateless/02458_datediff_date32.reference index 58551402dd3..482b3be633b 100644 --- a/tests/queries/0_stateless/02458_datediff_date32.reference +++ b/tests/queries/0_stateless/02458_datediff_date32.reference @@ -1,56 +1,56 @@ -- { echo } -- Date32 vs Date32 -SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); 86400 -SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); 1440 -SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); 24 -SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); 1 -SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); +SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-08', 'UTC')); 1 -SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); +SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDate32('1900-02-01', 'UTC')); 1 -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); +SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDate32('1900-04-01', 'UTC')); 1 -SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); +SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDate32('1901-01-01', 'UTC')); 1 -- With DateTime64 -- Date32 vs DateTime64 -SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); 86400 -SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); 1440 -SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); 24 -SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); 1 -SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); +SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); 1 -SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); 1 -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); 1 -SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); 1 -- DateTime64 vs Date32 -SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); 86400 -SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); 1440 -SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); 24 -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); 1 -SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08')); +SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08', 'UTC')); 1 -SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01')); +SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01', 'UTC')); 1 -SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01')); +SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01', 'UTC')); 1 -SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01')); +SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01', 'UTC')); 1 -- With DateTime -- Date32 vs DateTime @@ -123,11 +123,11 @@ SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', ' SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC')); 1 -- Const vs non-const columns -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); 1 -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); 1 -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); 1 SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); 1 @@ -138,11 +138,11 @@ SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-0 SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); 1 -- Non-const vs const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDate32('1900-01-02', 'UTC')); 1 -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); 1 -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02', 'UTC')); 1 SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC')); 1 @@ -153,11 +153,11 @@ SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015- SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC')); 1 -- Non-const vs non-const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); 1 -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); 1 -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); 1 SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); 1 diff --git a/tests/queries/0_stateless/02458_datediff_date32.sql b/tests/queries/0_stateless/02458_datediff_date32.sql index dfe0d55ee97..a5a0809fc39 100644 --- a/tests/queries/0_stateless/02458_datediff_date32.sql +++ b/tests/queries/0_stateless/02458_datediff_date32.sql @@ -1,35 +1,35 @@ -- { echo } -- Date32 vs Date32 -SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); -SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); -SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); -SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); +SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-08', 'UTC')); +SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDate32('1900-02-01', 'UTC')); +SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDate32('1900-04-01', 'UTC')); +SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDate32('1901-01-01', 'UTC')); -- With DateTime64 -- Date32 vs DateTime64 -SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); -SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); -SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); -SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); +SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); -- DateTime64 vs Date32 -SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); -SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); -SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02')); -SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08')); -SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01')); -SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01')); -SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01')); +SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08', 'UTC')); +SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01', 'UTC')); +SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01', 'UTC')); +SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01', 'UTC')); -- With DateTime -- Date32 vs DateTime @@ -74,27 +74,27 @@ SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', ' SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC')); -- Const vs non-const columns -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); -SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); +SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); -- Non-const vs const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02', 'UTC')); SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC')); SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC')); SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC')); SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC')); -- Non-const vs non-const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); -SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); From 980f79f9a72aa994f26fac8a6a1e3d0791b5a182 Mon Sep 17 00:00:00 2001 From: Dale McDiarmid Date: Fri, 14 Oct 2022 15:32:34 +0100 Subject: [PATCH 087/252] go update --- programs/diagnostics/go.mod | 2 +- programs/diagnostics/go.sum | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/programs/diagnostics/go.mod b/programs/diagnostics/go.mod index 19fc2ec8202..fb1568ea491 100644 --- a/programs/diagnostics/go.mod +++ b/programs/diagnostics/go.mod @@ -1,6 +1,6 @@ module github.com/ClickHouse/ClickHouse/programs/diagnostics -go 1.17 +go 1.19 require ( github.com/ClickHouse/clickhouse-go/v2 v2.0.12 diff --git a/programs/diagnostics/go.sum b/programs/diagnostics/go.sum index dd1b18ce0c7..aa69472e9c2 100644 --- a/programs/diagnostics/go.sum +++ b/programs/diagnostics/go.sum @@ -65,7 +65,6 @@ github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZ github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/ClickHouse/clickhouse-go v1.5.3 h1:Vok8zUb/wlqc9u8oEqQzBMBRDoFd8NxPRqgYEqMnV88= github.com/ClickHouse/clickhouse-go v1.5.3/go.mod h1:EaI/sW7Azgz9UATzd5ZdZHRUhHgv5+JMS9NSr2smCJI= github.com/ClickHouse/clickhouse-go/v2 v2.0.12 h1:Nbl/NZwoM6LGJm7smNBgvtdr/rxjlIssSW3eG/Nmb9E= github.com/ClickHouse/clickhouse-go/v2 v2.0.12/go.mod h1:u4RoNQLLM2W6hNSPYrIESLJqaWSInZVmfM+MlaAhXcg= @@ -457,7 +456,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/consul/api v1.11.0/go.mod h1:XjsvQN+RJGWI2TWy1/kqaE16HrR2J/FWgkYjdZQsX9M= -github.com/hashicorp/consul/api v1.12.0/go.mod h1:6pVBMo0ebnYdt2S3H87XhekM/HHrUoTD2XXb/VrZVy0= github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= github.com/hashicorp/errwrap v0.0.0-20141028054710-7554cd9344ce/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= @@ -663,9 +661,7 @@ github.com/paulmach/protoscan v0.2.1-0.20210522164731-4e53c6875432/go.mod h1:2sV github.com/pelletier/go-toml v1.9.4 h1:tjENF6MfZAg8e4ZmZTeWaWiT2vXtsoO6+iuOjFhECwM= github.com/pelletier/go-toml v1.9.4/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= -github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= -github.com/pierrec/lz4/v4 v4.1.12/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE= github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -717,7 +713,6 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4= github.com/sagikazarmark/crypt v0.3.0/go.mod h1:uD/D+6UF4SrIR1uGEv7bBNkNqLGqUr43MRiaGWX1Nig= -github.com/sagikazarmark/crypt v0.4.0/go.mod h1:ALv2SRj7GxYV4HO9elxH9nS6M9gW+xDNxqmyJ6RfDFM= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo= @@ -1083,7 +1078,6 @@ golang.org/x/sys v0.0.0-20211109184856-51b60fd695b3/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211210111614-af8b64212486/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -1202,7 +1196,6 @@ google.golang.org/api v0.57.0/go.mod h1:dVPlbZyBo2/OjBpmvNdpn2GRm6rPy75jyU7bmhdr google.golang.org/api v0.59.0/go.mod h1:sT2boj7M9YJxZzgeZqXogmhfmRWDtPzT31xkieUbuZU= google.golang.org/api v0.61.0/go.mod h1:xQRti5UdCmoCEqFxcz93fTl338AVqDgyaDRuOZ3hg9I= google.golang.org/api v0.62.0/go.mod h1:dKmwPCydfsad4qCH08MSdgWjfHOyfpd4VtDGgRFdavw= -google.golang.org/api v0.63.0/go.mod h1:gs4ij2ffTRXwuzzgJl/56BdwJaA194ijkfn++9tDuPo= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= From e0c25184166ee31271f490c5e47ef703b40d9160 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 Oct 2022 16:50:48 +0200 Subject: [PATCH 088/252] Fix another trash in zero-copy replication --- src/Storages/StorageReplicatedMergeTree.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b55c59a3d6e..6eec6178a1d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7673,14 +7673,14 @@ namespace /// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part. /// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas /// still need parent part. -NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version, Poco::Logger * log) +std::pair getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const std::string & part_info_str, MergeTreeDataFormatVersion format_version, Poco::Logger * log) { NameSet files_not_to_remove; MergeTreePartInfo part_info = MergeTreePartInfo::fromPartName(part_info_str, format_version); /// No mutations -- no hardlinks -- no issues if (part_info.mutation == 0) - return files_not_to_remove; + return {false, files_not_to_remove}; /// Getting all zero copy parts Strings parts_str; @@ -7725,10 +7725,10 @@ NameSet getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr, const std::stri LOG_TRACE(log, "Found files not to remove from parent part {}: [{}]", part_candidate_info_str, fmt::join(files_not_to_remove, ", ")); } - break; + return {true, files_not_to_remove}; } } - return files_not_to_remove; + return {false, files_not_to_remove}; } } @@ -7754,7 +7754,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( if (!files_not_to_remove_str.empty()) boost::split(files_not_to_remove, files_not_to_remove_str, boost::is_any_of("\n ")); - auto parent_not_to_remove = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version, logger); + auto [has_parent, parent_not_to_remove] = getParentLockedBlobs(zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_name, data_format_version, logger); files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end()); String zookeeper_part_uniq_node = fs::path(zc_zookeeper_path) / part_id; @@ -7764,8 +7764,17 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( LOG_TRACE(logger, "Remove zookeeper lock {} for part {}", zookeeper_part_replica_node, part_name); - if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK && ec != Coordination::Error::ZNONODE) + if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK) { + /// Very complex case. It means that lock already doesn't exist when we tried to remove it. + /// So we don't know are we owner of this part or not. Maybe we just mutated it, renamed on disk and failed to lock in ZK. + /// But during mutation we can have hardlinks to another part. So it's not Ok to remove blobs of this part if it was mutated. + if (ec == Coordination::Error::ZNONODE && has_parent) + { + LOG_INFO(logger, "Lock on path {} for part {} doesn't exist, refuse to remove blobs", zookeeper_part_replica_node, part_name); + return {false, {}}; + } + throw zkutil::KeeperException(ec, zookeeper_part_replica_node); } From b98b1759c8b2a95df03f69d5e8e9190bd2b045a2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 14 Oct 2022 17:04:42 +0000 Subject: [PATCH 089/252] Fix additional_table_filters for Distributed. --- src/Interpreters/ClusterProxy/executeQuery.cpp | 13 +++++++++++-- src/Interpreters/ClusterProxy/executeQuery.h | 2 +- src/Storages/getStructureOfRemoteTable.cpp | 4 ++-- .../0_stateless/02346_additional_filters.reference | 8 ++++++++ .../0_stateless/02346_additional_filters.sql | 6 ++++++ 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index d974721627e..39a7970683a 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -26,7 +27,7 @@ namespace ErrorCodes namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, Poco::Logger * log) +ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -96,6 +97,14 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c new_settings.limit.changed = false; } + if (query_info && query_info->additional_filter_ast) + { + Tuple tuple; + tuple.push_back(main_table.getFullTableName()); + tuple.push_back(queryToString(query_info->additional_filter_ast)); + new_settings.additional_table_filters.value.push_back(std::move(tuple)); + } + auto new_context = Context::createCopy(context); new_context->setSettings(new_settings); return new_context; @@ -121,7 +130,7 @@ void executeQuery( std::vector plans; SelectStreamFactory::Shards remote_shards; - auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, log); + auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log); new_context->getClientInfo().distributed_depth += 1; diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 1a5035015a7..ac88752ce74 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -35,7 +35,7 @@ class SelectStreamFactory; /// /// @return new Context with adjusted settings ContextMutablePtr updateSettingsForCluster( - const Cluster & cluster, ContextPtr context, const Settings & settings, Poco::Logger * log = nullptr); + const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr); /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. /// `stream_factory` object encapsulates the logic of creating plans for a different type of query diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 3d104ada0b6..a93a480adb0 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -58,7 +58,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( } ColumnsDescription res; - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef()); + auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id); /// Expect only needed columns from the result of DESC TABLE. NOTE 'comment' column is ignored for compatibility reasons. Block sample_block @@ -169,7 +169,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( const auto & shards_info = cluster.getShardsInfo(); auto query = "DESC TABLE " + remote_table_id.getFullTableName(); - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef()); + auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id); new_context->setSetting("describe_extend_object_types", true); /// Expect only needed columns from the result of DESC TABLE. diff --git a/tests/queries/0_stateless/02346_additional_filters.reference b/tests/queries/0_stateless/02346_additional_filters.reference index 22d53173e71..0a08995223d 100644 --- a/tests/queries/0_stateless/02346_additional_filters.reference +++ b/tests/queries/0_stateless/02346_additional_filters.reference @@ -60,6 +60,14 @@ select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filt 0 0 select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy != 0'}; +select * from distr_table settings additional_table_filters={'distr_table' : 'x = 2'}; +2 bb +2 bb +select * from distr_table settings additional_table_filters={'distr_table' : 'x != 2 and x != 3'}; +1 a +4 dddd +1 a +4 dddd select * from system.numbers limit 5; 0 1 diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql index 9e0bee4549b..f6b665713ec 100644 --- a/tests/queries/0_stateless/02346_additional_filters.sql +++ b/tests/queries/0_stateless/02346_additional_filters.sql @@ -1,3 +1,4 @@ +-- Tags: distributed drop table if exists table_1; drop table if exists table_2; drop table if exists v_numbers; @@ -6,6 +7,8 @@ drop table if exists mv_table; create table table_1 (x UInt32, y String) engine = MergeTree order by x; insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +CREATE TABLE distr_table (x UInt32, y String) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 'table_1'); + -- { echoOn } select * from table_1; @@ -29,6 +32,9 @@ select x from table_1 prewhere x != 2 where x != 2 settings additional_table_fil select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy = 0'}; select * from remote('127.0.0.{1,2}', system.one) settings additional_table_filters={'system.one' : 'dummy != 0'}; +select * from distr_table settings additional_table_filters={'distr_table' : 'x = 2'}; +select * from distr_table settings additional_table_filters={'distr_table' : 'x != 2 and x != 3'}; + select * from system.numbers limit 5; select * from system.numbers as t limit 5 settings additional_table_filters={'t' : 'number % 2 != 0'}; select * from system.numbers limit 5 settings additional_table_filters={'system.numbers' : 'number != 3'}; From 52427e602808c1d319d06eb09baac3e0e91236a9 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 14 Oct 2022 18:07:02 +0000 Subject: [PATCH 090/252] Remove code duplication --- .../AggregateFunctionDistinct.h | 4 ++-- src/DataTypes/DataTypeAggregateFunction.cpp | 18 +++++++++++++++++ src/DataTypes/DataTypeAggregateFunction.h | 9 +++++++-- src/Formats/NativeReader.cpp | 13 +----------- src/Formats/NativeWriter.cpp | 20 +------------------ src/Interpreters/InterpreterCreateQuery.cpp | 15 ++------------ src/Storages/MergeTree/IMergeTreeDataPart.cpp | 11 +--------- 7 files changed, 32 insertions(+), 58 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 46ded55f3e4..9884e92f425 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -196,7 +196,7 @@ public: this->data(place).deserialize(buf, arena); } - template + template void insertResultIntoImpl(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const { auto arguments = this->data(place).getArguments(this->argument_types); @@ -206,7 +206,7 @@ public: assert(!arguments.empty()); nested_func->addBatchSinglePlace(0, arguments[0]->size(), getNestedPlace(place), arguments_raw.data(), arena); - if constexpr (merge) + if constexpr (MergeResult) nested_func->insertMergeResultInto(getNestedPlace(place), to, arena); else nested_func->insertResultInto(getNestedPlace(place), to, arena); diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 8c0f0b95025..7056fcff42f 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -241,6 +242,23 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(function, argument_types, params_row, version); } +void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional revision) +{ + auto callback = [revision, if_empty](DataTypePtr & column_type) + { + const auto * aggregate_function_type = typeid_cast(column_type.get()); + if (aggregate_function_type && aggregate_function_type->isVersioned()) + { + if (revision) + aggregate_function_type->updateVersionFromRevision(*revision, if_empty); + else + aggregate_function_type->setVersion(0, if_empty); + } + }; + + callOnNestedSimpleTypes(type, callback); +} + void registerDataTypeAggregateFunction(DataTypeFactory & factory) { diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 39fbfb62917..4a92e6c5703 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -70,8 +70,6 @@ public: bool isVersioned() const { return function->isVersioned(); } - size_t getVersionFromRevision(size_t revision) const { return function->getVersionFromRevision(revision); } - /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according /// to server revision. /// It is ok to have an empty version value here - then for serialization a default (latest) @@ -84,6 +82,13 @@ public: version = version_; } + + void updateVersionFromRevision(size_t revision, bool if_empty) const + { + setVersion(function->getVersionFromRevision(revision), if_empty); + } }; +void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional revision = std::nullopt); + } diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index a334f1c14ca..98688bf03b7 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -13,7 +13,6 @@ #include #include #include -#include namespace DB @@ -146,17 +145,7 @@ Block NativeReader::read() readBinary(type_name, istr); column.type = data_type_factory.get(type_name); - auto callback = [&](DataTypePtr & type) - { - const auto * aggregate_function_data_type = typeid_cast(type.get()); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) - { - auto version = aggregate_function_data_type->getVersionFromRevision(server_revision); - aggregate_function_data_type->setVersion(version, /*if_empty=*/ true); - } - }; - - callOnNestedSimpleTypes(column.type, callback); + setVersionToAggregateFunctions(column.type, true, server_revision); SerializationPtr serialization; if (server_revision >= DBMS_MIN_REVISION_WITH_CUSTOM_SERIALIZATION) diff --git a/src/Formats/NativeWriter.cpp b/src/Formats/NativeWriter.cpp index a33f0581c3c..0cae2a2e789 100644 --- a/src/Formats/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -14,7 +14,6 @@ #include #include #include -#include namespace DB { @@ -116,24 +115,7 @@ void NativeWriter::write(const Block & block) writeStringBinary(column.name, ostr); bool include_version = client_revision >= DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING; - auto callback = [&](DataTypePtr & type) - { - const auto * aggregate_function_data_type = typeid_cast(type.get()); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) - { - if (include_version) - { - auto version = aggregate_function_data_type->getVersionFromRevision(client_revision); - aggregate_function_data_type->setVersion(version, /* if_empty */true); - } - else - { - aggregate_function_data_type->setVersion(0, /* if_empty */false); - } - } - }; - - callOnNestedSimpleTypes(column.type, callback); + setVersionToAggregateFunctions(column.type, include_version, include_version ? std::optional(client_revision) : std::nullopt); /// Type String type_name = column.type->getName(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6d2c5a84b56..74ff055efd4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -12,17 +12,14 @@ #include #include -#include #include #include #include -#include #include #include #include -#include #include #include #include @@ -37,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -56,11 +52,9 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -485,13 +479,8 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( { column_type = DataTypeFactory::instance().get(col_decl.type); - auto callback = [&](DataTypePtr & type) - { - const auto * aggregate_function_type = typeid_cast(type.get()); - if (attach && aggregate_function_type && aggregate_function_type->isVersioned()) - aggregate_function_type->setVersion(0, /* if_empty */true); - }; - callOnNestedSimpleTypes(column_type, callback); + if (attach) + setVersionToAggregateFunctions(column_type, true); if (col_decl.null_modifier) { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index d89761e44a3..5d2e755c1ab 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -1192,15 +1191,7 @@ void IMergeTreeDataPart::loadColumns(bool require) loaded_columns.readText(*in); for (auto & column : loaded_columns) - { - auto callback = [](DataTypePtr & type) - { - const auto * aggregate_function_data_type = typeid_cast(type.get()); - if (aggregate_function_data_type && aggregate_function_data_type->isVersioned()) - aggregate_function_data_type->setVersion(0, /* if_empty */true); - }; - callOnNestedSimpleTypes(column.type, callback); - } + setVersionToAggregateFunctions(column.type, true); } SerializationInfo::Settings settings = From fb637818eabe384b3d17771c85f65731d9d630d4 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 14 Oct 2022 18:47:21 +0000 Subject: [PATCH 091/252] Make test better --- ...02267_file_globs_schema_inference.reference | 1 + .../02267_file_globs_schema_inference.sh | 18 ++++++++++++++++++ .../02267_file_globs_schema_inference.sql | 11 ----------- 3 files changed, 19 insertions(+), 11 deletions(-) create mode 100755 tests/queries/0_stateless/02267_file_globs_schema_inference.sh delete mode 100644 tests/queries/0_stateless/02267_file_globs_schema_inference.sql diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.reference b/tests/queries/0_stateless/02267_file_globs_schema_inference.reference index 98da2074df6..ad94d5181ef 100644 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.reference +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.reference @@ -1,2 +1,3 @@ 1 \N +OK diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh new file mode 100755 index 00000000000..701e18a0259 --- /dev/null +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data2.jsonl') select NULL as x SETTINGS engine_file_truncate_on_insert = 1"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data3.jsonl') select * from numbers(0) SETTINGS engine_file_truncate_on_insert = 1"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl') select 1 as x SETTINGS engine_file_truncate_on_insert = 1"; + +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') order by x"; + +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl', 'TSV') select 1 as x"; +$CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1"; + +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "INCORRECT_DATA" && echo "OK" || echo "FAIL"; + diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql b/tests/queries/0_stateless/02267_file_globs_schema_inference.sql deleted file mode 100644 index b2a2997beab..00000000000 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sql +++ /dev/null @@ -1,11 +0,0 @@ --- Tags: no-fasttest, no-parallel - -insert into function file('02267_data2.jsonl') select NULL as x SETTINGS engine_file_truncate_on_insert = 1; -insert into function file('02267_data3.jsonl') select * from numbers(0) SETTINGS engine_file_truncate_on_insert = 1; -insert into function file('02267_data4.jsonl') select 1 as x SETTINGS engine_file_truncate_on_insert = 1; -select * from file('02267_data*.jsonl') order by x; - -insert into function file('02267_data4.jsonl', 'TSV') select 1 as x; -insert into function file('02267_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1; - -select * from file('02267_data*.jsonl') settings schema_inference_use_cache_for_file=0; --{serverError INCORRECT_DATA} From 18a03d0dc7efc5467be08026e4bdc4f800371337 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 14 Oct 2022 21:41:28 +0200 Subject: [PATCH 092/252] Fix fasttest --- .../02455_duplicate_column_names_in_schema_inference.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql index 3eb3549ee48..626a4d7034e 100644 --- a/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql +++ b/tests/queries/0_stateless/02455_duplicate_column_names_in_schema_inference.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + desc format(JSONEachRow, '{"x" : 1, "x" : 2}'); -- {serverError INCORRECT_DATA} desc format(JSONEachRow, '{"x" : 1, "y" : 2}\n{"x" : 2, "x" : 3}'); -- {serverError INCORRECT_DATA} desc format(CSVWithNames, 'a,b,a\n1,2,3'); -- {serverError INCORRECT_DATA} From 024d0706c58cdfb65a14289d38e11b0a10fdd800 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 14 Oct 2022 21:52:56 +0200 Subject: [PATCH 093/252] Fix test --- tests/queries/0_stateless/02456_datetime_schema_inference.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02456_datetime_schema_inference.sql b/tests/queries/0_stateless/02456_datetime_schema_inference.sql index f00dba1a9ea..34749dbd412 100644 --- a/tests/queries/0_stateless/02456_datetime_schema_inference.sql +++ b/tests/queries/0_stateless/02456_datetime_schema_inference.sql @@ -6,10 +6,10 @@ select * from format('TSV', '2022-04-22T03:45:06.381Z'); select * from format('TSV', '01/12/1925'); set date_time_input_format = 'best_effort'; select * from format('TSV', '2022-04-22T03:45:06.381'); -select * from format('TSV', '2022-04-22T03:45:06.381Z'); +select toTimeZone(c1, 'UTC') from format('TSV', '2022-04-22T03:45:06.381Z'); select * from format('TSV', '01/12/1925'); set date_time_input_format = 'best_effort_us'; select * from format('TSV', '2022-04-22T03:45:06.381'); -select * from format('TSV', '2022-04-22T03:45:06.381Z'); +select toTimeZone(c1, 'UTC') from format('TSV', '2022-04-22T03:45:06.381Z'); select * from format('TSV', '01/12/1925'); From 9e9e967f1f03c86e33e60763f41e90b9a5462b4f Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Sun, 16 Oct 2022 16:57:15 +0800 Subject: [PATCH 094/252] choose correct aggregation method for lc128 and lc512 --- src/Interpreters/Aggregator.cpp | 8 +++++++- ...w_cardinality_uint128_aggregator.reference | 20 +++++++++++++++++++ ...459_low_cardinality_uint128_aggregator.sql | 9 +++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.reference create mode 100644 tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.sql diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index b5d15b0927b..40070087c6e 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -811,6 +811,10 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::low_cardinality_key32; if (size_of_field == 8) return AggregatedDataVariants::Type::low_cardinality_key64; + if (size_of_field == 16) + return AggregatedDataVariants::Type::low_cardinality_keys128; + if (size_of_field == 32) + return AggregatedDataVariants::Type::low_cardinality_keys256; } if (size_of_field == 1) @@ -3019,7 +3023,7 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) auto bucket_num = blocks.front().info.bucket_num; bool is_overflows = blocks.front().info.is_overflows; - LOG_TRACE(log, "Merging partially aggregated blocks (bucket = {}).", bucket_num); + LOG_DEBUG(log, "Merging partially aggregated blocks (bucket = {}) with method {}.", bucket_num, method_chosen); Stopwatch watch; /** If possible, change 'method' to some_hash64. Otherwise, leave as is. @@ -3045,6 +3049,8 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) #undef APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION + LOG_DEBUG(log, "Merging partially aggregated blocks (bucket = {}) with method {}.", bucket_num, method_chosen); + /// Temporary data for aggregation. AggregatedDataVariants result; diff --git a/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.reference b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.reference new file mode 100644 index 00000000000..2a3af430e48 --- /dev/null +++ b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.reference @@ -0,0 +1,20 @@ +0 4950 +1 14950 +2 24950 +3 34950 +4 44950 +5 54950 +6 64950 +7 74950 +8 84950 +9 94950 +0 4950 +1 14950 +2 24950 +3 34950 +4 44950 +5 54950 +6 64950 +7 74950 +8 84950 +9 94950 diff --git a/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.sql b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.sql new file mode 100644 index 00000000000..893e5514ba5 --- /dev/null +++ b/tests/queries/0_stateless/02459_low_cardinality_uint128_aggregator.sql @@ -0,0 +1,9 @@ +SET allow_suspicious_low_cardinality_types = 1; +-- LC UInt128 +CREATE TABLE group_by_pk_lc_uint128 (`k` LowCardinality(UInt128), `v` UInt32) ENGINE = MergeTree ORDER BY k PARTITION BY v%50; +INSERT INTO group_by_pk_lc_uint128 SELECT number / 100, number FROM numbers(1000); +SELECT k, sum(v) AS s FROM group_by_pk_lc_uint128 GROUP BY k ORDER BY k ASC LIMIT 1024 SETTINGS optimize_aggregation_in_order = 1; +-- LC UInt256 +CREATE TABLE group_by_pk_lc_uint256 (`k` LowCardinality(UInt256), `v` UInt32) ENGINE = MergeTree ORDER BY k PARTITION BY v%50; +INSERT INTO group_by_pk_lc_uint256 SELECT number / 100, number FROM numbers(1000); +SELECT k, sum(v) AS s FROM group_by_pk_lc_uint256 GROUP BY k ORDER BY k ASC LIMIT 1024 SETTINGS optimize_aggregation_in_order = 1; From 5526e05aac627076767d47e85bbbf9acbf175a44 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Sun, 16 Oct 2022 17:19:40 +0800 Subject: [PATCH 095/252] remove junk log --- src/Interpreters/Aggregator.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 40070087c6e..7f1cf11a561 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -3023,7 +3023,7 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) auto bucket_num = blocks.front().info.bucket_num; bool is_overflows = blocks.front().info.is_overflows; - LOG_DEBUG(log, "Merging partially aggregated blocks (bucket = {}) with method {}.", bucket_num, method_chosen); + LOG_TRACE(log, "Merging partially aggregated blocks (bucket = {}).", bucket_num); Stopwatch watch; /** If possible, change 'method' to some_hash64. Otherwise, leave as is. @@ -3049,8 +3049,6 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) #undef APPLY_FOR_VARIANTS_THAT_MAY_USE_BETTER_HASH_FUNCTION - LOG_DEBUG(log, "Merging partially aggregated blocks (bucket = {}) with method {}.", bucket_num, method_chosen); - /// Temporary data for aggregation. AggregatedDataVariants result; From c6b2ee47dfb38954b97b4d541c57fd2b0aa5ebb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 02:10:36 +0200 Subject: [PATCH 096/252] Fix data race in query finish/cancel --- src/Processors/Executors/ExecutingGraph.cpp | 16 ++--- src/Processors/Executors/ExecutingGraph.h | 7 +- src/Processors/Executors/PipelineExecutor.cpp | 4 +- src/Processors/Executors/PipelineExecutor.h | 3 +- .../PushingAsyncPipelineExecutor.cpp | 2 +- .../Executors/PushingPipelineExecutor.cpp | 2 +- .../gtest_exception_on_incorrect_pipeline.cpp | 12 ++-- src/QueryPipeline/BlockIO.h | 3 +- src/QueryPipeline/Pipe.cpp | 46 ++++++------- src/QueryPipeline/Pipe.h | 13 ++-- src/QueryPipeline/QueryPipeline.cpp | 69 ++++++++++--------- src/QueryPipeline/QueryPipeline.h | 14 ++-- src/QueryPipeline/QueryPipelineBuilder.cpp | 12 ++-- 13 files changed, 104 insertions(+), 99 deletions(-) diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 651ede10cfd..9d69abc5e87 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -10,17 +10,17 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -ExecutingGraph::ExecutingGraph(Processors & processors_, bool profile_processors_) - : processors(processors_) +ExecutingGraph::ExecutingGraph(std::shared_ptr processors_, bool profile_processors_) + : processors(std::move(processors_)) , profile_processors(profile_processors_) { - uint64_t num_processors = processors.size(); + uint64_t num_processors = processors->size(); nodes.reserve(num_processors); /// Create nodes. for (uint64_t node = 0; node < num_processors; ++node) { - IProcessor * proc = processors[node].get(); + IProcessor * proc = processors->at(node).get(); processors_map[proc] = node; nodes.emplace_back(std::make_unique(proc, node)); } @@ -109,10 +109,10 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) { std::lock_guard guard(processors_mutex); - processors.insert(processors.end(), new_processors.begin(), new_processors.end()); + processors->insert(processors->end(), new_processors.begin(), new_processors.end()); } - uint64_t num_processors = processors.size(); + uint64_t num_processors = processors->size(); std::vector back_edges_sizes(num_processors, 0); std::vector direct_edge_sizes(num_processors, 0); @@ -126,7 +126,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) while (nodes.size() < num_processors) { - auto * processor = processors[nodes.size()].get(); + auto * processor = processors->at(nodes.size()).get(); if (processors_map.contains(processor)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Processor {} was already added to pipeline", processor->getName()); @@ -386,7 +386,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue void ExecutingGraph::cancel() { std::lock_guard guard(processors_mutex); - for (auto & processor : processors) + for (auto & processor : *processors) processor->cancel(); } diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h index 587a2561ae0..7ccdf9f9898 100644 --- a/src/Processors/Executors/ExecutingGraph.h +++ b/src/Processors/Executors/ExecutingGraph.h @@ -6,6 +6,7 @@ #include #include + namespace DB { @@ -123,9 +124,9 @@ public: using ProcessorsMap = std::unordered_map; ProcessorsMap processors_map; - explicit ExecutingGraph(Processors & processors_, bool profile_processors_); + explicit ExecutingGraph(std::shared_ptr processors_, bool profile_processors_); - const Processors & getProcessors() const { return processors; } + const Processors & getProcessors() const { return *processors; } /// Traverse graph the first time to update all the childless nodes. void initializeExecution(Queue & queue); @@ -149,7 +150,7 @@ private: /// All new nodes and nodes with updated ports are pushed into stack. bool expandPipeline(std::stack & stack, uint64_t pid); - Processors & processors; + std::shared_ptr processors; std::mutex processors_mutex; UpgradableMutex nodes_mutex; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index ae20d97604b..564f42be5db 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes } -PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) +PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatus * elem) : process_list_element(elem) { if (process_list_element) @@ -41,7 +41,7 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) /// If exception was thrown while pipeline initialization, it means that query pipeline was not build correctly. /// It is logical error, and we need more information about pipeline. WriteBufferFromOwnString buf; - printPipeline(processors, buf); + printPipeline(*processors, buf); buf.finalize(); exception.addMessage("Query pipeline:\n" + buf.str()); diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index cea64d309fa..c7a74d6ea75 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -10,6 +10,7 @@ #include #include + namespace DB { @@ -30,7 +31,7 @@ public: /// During pipeline execution new processors can appear. They will be added to existing set. /// /// Explicit graph representation is built in constructor. Throws if graph is not correct. - explicit PipelineExecutor(Processors & processors, QueryStatus * elem); + explicit PipelineExecutor(std::shared_ptr & processors, QueryStatus * elem); ~PipelineExecutor(); /// Execute pipeline in multiple threads. Must be called once. diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 7a55d26f16c..ee8e94b6f28 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -129,7 +129,7 @@ PushingAsyncPipelineExecutor::PushingAsyncPipelineExecutor(QueryPipeline & pipel pushing_source = std::make_shared(pipeline.input->getHeader()); connect(pushing_source->getPort(), *pipeline.input); - pipeline.processors.emplace_back(pushing_source); + pipeline.processors->emplace_back(pushing_source); } PushingAsyncPipelineExecutor::~PushingAsyncPipelineExecutor() diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index bf43cd327fe..d9a14704cd0 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -58,7 +58,7 @@ PushingPipelineExecutor::PushingPipelineExecutor(QueryPipeline & pipeline_) : pi pushing_source = std::make_shared(pipeline.input->getHeader(), input_wait_flag); connect(pushing_source->getPort(), *pipeline.input); - pipeline.processors.emplace_back(pushing_source); + pipeline.processors->emplace_back(pushing_source); } PushingPipelineExecutor::~PushingPipelineExecutor() diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index b137eaf0f47..53483b7a7f5 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -23,9 +23,9 @@ TEST(Processors, PortsConnected) connect(source->getPort(), sink->getPort()); - Processors processors; - processors.emplace_back(std::move(source)); - processors.emplace_back(std::move(sink)); + auto processors = std::make_shared(); + processors->emplace_back(std::move(source)); + processors->emplace_back(std::move(sink)); QueryStatus * element = nullptr; PipelineExecutor executor(processors, element); @@ -46,9 +46,9 @@ TEST(Processors, PortsNotConnected) /// connect(source->getPort(), sink->getPort()); - Processors processors; - processors.emplace_back(std::move(source)); - processors.emplace_back(std::move(sink)); + auto processors = std::make_shared(); + processors->emplace_back(std::move(source)); + processors->emplace_back(std::move(sink)); #ifndef ABORT_ON_LOGICAL_ERROR try diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h index 1f2a8f6f033..b69f86ac684 100644 --- a/src/QueryPipeline/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -34,9 +34,8 @@ struct BlockIO void onFinish() { if (finish_callback) - { finish_callback(pipeline); - } + pipeline.reset(); } diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 291739079a2..b1ad06d2000 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -155,7 +155,7 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output totals_port = totals; extremes_port = extremes; output_ports.push_back(output); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); max_parallel_streams = 1; } @@ -168,18 +168,18 @@ Pipe::Pipe(ProcessorPtr source) output_ports.push_back(&source->getOutputs().front()); header = output_ports.front()->getHeader(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); max_parallel_streams = 1; } -Pipe::Pipe(Processors processors_) : processors(std::move(processors_)) +Pipe::Pipe(std::shared_ptr processors_) : processors(std::move(processors_)) { /// Create hash table with processors. std::unordered_set set; - for (const auto & processor : processors) + for (const auto & processor : *processors) set.emplace(processor.get()); - for (auto & processor : processors) + for (auto & processor : *processors) { for (const auto & port : processor->getInputs()) { @@ -225,7 +225,7 @@ Pipe::Pipe(Processors processors_) : processors(std::move(processors_)) max_parallel_streams = output_ports.size(); if (collected_processors) - for (const auto & processor : processors) + for (const auto & processor : *processors) collected_processors->emplace_back(processor); } @@ -311,7 +311,7 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow if (!allow_empty_header || pipe.header) assertCompatibleHeader(pipe.header, res.header, "Pipe::unitePipes"); - res.processors.insert(res.processors.end(), pipe.processors.begin(), pipe.processors.end()); + res.processors->insert(res.processors->end(), pipe.processors->begin(), pipe.processors->end()); res.output_ports.insert(res.output_ports.end(), pipe.output_ports.begin(), pipe.output_ports.end()); res.max_parallel_streams += pipe.max_parallel_streams; @@ -323,15 +323,15 @@ Pipe Pipe::unitePipes(Pipes pipes, Processors * collected_processors, bool allow extremes.emplace_back(pipe.extremes_port); } - size_t num_processors = res.processors.size(); + size_t num_processors = res.processors->size(); - res.totals_port = uniteTotals(totals, res.header, res.processors); - res.extremes_port = uniteExtremes(extremes, res.header, res.processors); + res.totals_port = uniteTotals(totals, res.header, *res.processors); + res.extremes_port = uniteExtremes(extremes, res.header, *res.processors); if (res.collected_processors) { - for (; num_processors < res.processors.size(); ++num_processors) - res.collected_processors->emplace_back(res.processors[num_processors]); + for (; num_processors < res.processors->size(); ++num_processors) + res.collected_processors->emplace_back(res.processors->at(num_processors)); } return res; @@ -351,7 +351,7 @@ void Pipe::addSource(ProcessorPtr source) collected_processors->emplace_back(source); output_ports.push_back(&source->getOutputs().front()); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -373,7 +373,7 @@ void Pipe::addTotalsSource(ProcessorPtr source) collected_processors->emplace_back(source); totals_port = &source->getOutputs().front(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } void Pipe::addExtremesSource(ProcessorPtr source) @@ -393,7 +393,7 @@ void Pipe::addExtremesSource(ProcessorPtr source) collected_processors->emplace_back(source); extremes_port = &source->getOutputs().front(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } static void dropPort(OutputPort *& port, Processors & processors, Processors * collected_processors) @@ -413,12 +413,12 @@ static void dropPort(OutputPort *& port, Processors & processors, Processors * c void Pipe::dropTotals() { - dropPort(totals_port, processors, collected_processors); + dropPort(totals_port, *processors, collected_processors); } void Pipe::dropExtremes() { - dropPort(extremes_port, processors, collected_processors); + dropPort(extremes_port, *processors, collected_processors); } void Pipe::addTransform(ProcessorPtr transform) @@ -504,7 +504,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -595,7 +595,7 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } @@ -647,7 +647,7 @@ void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); } }; @@ -698,7 +698,7 @@ void Pipe::addChains(std::vector chains) if (collected_processors) collected_processors->emplace_back(transform); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); } } @@ -757,7 +757,7 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) transform = std::make_shared(stream->getHeader()); connect(*stream, transform->getInputs().front()); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); }; for (auto & port : output_ports) @@ -858,7 +858,7 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) collected_processors->emplace_back(processor); } - processors.insert(processors.end(), new_processors.begin(), new_processors.end()); + processors->insert(processors->end(), new_processors.begin(), new_processors.end()); max_parallel_streams = std::max(max_parallel_streams, output_ports.size()); } diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 79d19a18193..399e9b0a740 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -5,6 +5,7 @@ #include #include + namespace DB { @@ -33,7 +34,7 @@ public: /// Create from source with specified totals end extremes (may be nullptr). Ports should be owned by source. explicit Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes); /// Create from processors. Use all not-connected output ports as output_ports. Check invariants. - explicit Pipe(Processors processors_); + explicit Pipe(std::shared_ptr processors_); Pipe(const Pipe & other) = delete; Pipe(Pipe && other) = default; @@ -41,7 +42,7 @@ public: Pipe & operator=(Pipe && other) = default; const Block & getHeader() const { return header; } - bool empty() const { return processors.empty(); } + bool empty() const { return processors->empty(); } size_t numOutputPorts() const { return output_ports.size(); } size_t maxParallelStreams() const { return max_parallel_streams; } OutputPort * getOutputPort(size_t pos) const { return output_ports[pos]; } @@ -96,15 +97,15 @@ public: /// Unite several pipes together. They should have same header. static Pipe unitePipes(Pipes pipes); - /// Get processors from Pipe. Use it with cautious, it is easy to loss totals and extremes ports. - static Processors detachProcessors(Pipe pipe) { return std::move(pipe.processors); } + /// Get processors from Pipe. Use it with caution, it is easy to lose totals and extremes ports. + static Processors detachProcessors(Pipe pipe) { return *std::move(pipe.processors); } /// Get processors from Pipe without destroying pipe (used for EXPLAIN to keep QueryPlan). - const Processors & getProcessors() const { return processors; } + const Processors & getProcessors() const { return *processors; } private: /// Header is common for all output below. Block header; - Processors processors; + std::shared_ptr processors; /// Output ports. Totals and extremes are allowed to be empty. OutputPortRawPtrs output_ports; diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 31b18c7f7f0..63bf1025d2f 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -21,6 +21,7 @@ #include #include + namespace DB { @@ -210,16 +211,16 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) QueryPipeline::QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_) + std::shared_ptr processors_) : resources(std::move(resources_)) , processors(std::move(processors_)) { - checkCompleted(processors); + checkCompleted(*processors); } QueryPipeline::QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, InputPort * input_) : resources(std::move(resources_)) , processors(std::move(processors_)) @@ -231,7 +232,7 @@ QueryPipeline::QueryPipeline( "Cannot create pushing QueryPipeline because its input port is connected or null"); bool found_input = false; - for (const auto & processor : processors) + for (const auto & processor : *processors) { for (const auto & in : processor->getInputs()) { @@ -255,7 +256,7 @@ QueryPipeline::QueryPipeline(std::shared_ptr source) : QueryPipeline(Pi QueryPipeline::QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_, OutputPort * extremes_) @@ -265,7 +266,7 @@ QueryPipeline::QueryPipeline( , totals(totals_) , extremes(extremes_) { - checkPulling(processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes); } QueryPipeline::QueryPipeline(Pipe pipe) @@ -278,12 +279,12 @@ QueryPipeline::QueryPipeline(Pipe pipe) extremes = pipe.getExtremesPort(); processors = std::move(pipe.processors); - checkPulling(processors, output, totals, extremes); + checkPulling(*processors, output, totals, extremes); } else { processors = std::move(pipe.processors); - checkCompleted(processors); + checkCompleted(*processors); } } @@ -292,13 +293,13 @@ QueryPipeline::QueryPipeline(Chain chain) , input(&chain.getInputPort()) , num_threads(chain.getNumThreads()) { - processors.reserve(chain.getProcessors().size() + 1); + processors->reserve(chain.getProcessors().size() + 1); for (auto processor : chain.getProcessors()) - processors.emplace_back(std::move(processor)); + processors->emplace_back(std::move(processor)); auto sink = std::make_shared(chain.getOutputPort().getHeader()); connect(chain.getOutputPort(), sink->getPort()); - processors.emplace_back(std::move(sink)); + processors->emplace_back(std::move(sink)); input = &chain.getInputPort(); } @@ -313,14 +314,14 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) { auto source = std::make_shared(format_totals.getHeader()); totals = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } if (!extremes) { auto source = std::make_shared(format_extremes.getHeader()); extremes = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } connect(*totals, format_totals); @@ -332,7 +333,7 @@ QueryPipeline::QueryPipeline(std::shared_ptr format) output_format = format.get(); - processors.emplace_back(std::move(format)); + processors->emplace_back(std::move(format)); } static void drop(OutputPort *& port, Processors & processors) @@ -354,11 +355,11 @@ void QueryPipeline::complete(std::shared_ptr sink) if (!pulling()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline must be pulling to be completed with sink"); - drop(totals, processors); - drop(extremes, processors); + drop(totals, *processors); + drop(extremes, *processors); connect(*output, sink->getPort()); - processors.emplace_back(std::move(sink)); + processors->emplace_back(std::move(sink)); output = nullptr; } @@ -369,17 +370,17 @@ void QueryPipeline::complete(Chain chain) resources = chain.detachResources(); - drop(totals, processors); - drop(extremes, processors); + drop(totals, *processors); + drop(extremes, *processors); - processors.reserve(processors.size() + chain.getProcessors().size() + 1); + processors->reserve(processors->size() + chain.getProcessors().size() + 1); for (auto processor : chain.getProcessors()) - processors.emplace_back(std::move(processor)); + processors->emplace_back(std::move(processor)); auto sink = std::make_shared(chain.getOutputPort().getHeader()); connect(*output, chain.getInputPort()); connect(chain.getOutputPort(), sink->getPort()); - processors.emplace_back(std::move(sink)); + processors->emplace_back(std::move(sink)); output = nullptr; } @@ -400,7 +401,7 @@ void QueryPipeline::complete(Pipe pipe) input = nullptr; auto pipe_processors = Pipe::detachProcessors(std::move(pipe)); - processors.insert(processors.end(), pipe_processors.begin(), pipe_processors.end()); + processors->insert(processors->end(), pipe_processors.begin(), pipe_processors.end()); } static void addMaterializing(OutputPort *& output, Processors & processors) @@ -421,9 +422,9 @@ void QueryPipeline::complete(std::shared_ptr format) if (format->expectMaterializedColumns()) { - addMaterializing(output, processors); - addMaterializing(totals, processors); - addMaterializing(extremes, processors); + addMaterializing(output, *processors); + addMaterializing(totals, *processors); + addMaterializing(extremes, *processors); } auto & format_main = format->getPort(IOutputFormat::PortKind::Main); @@ -434,14 +435,14 @@ void QueryPipeline::complete(std::shared_ptr format) { auto source = std::make_shared(format_totals.getHeader()); totals = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } if (!extremes) { auto source = std::make_shared(format_extremes.getHeader()); extremes = &source->getPort(); - processors.emplace_back(std::move(source)); + processors->emplace_back(std::move(source)); } connect(*output, format_main); @@ -455,7 +456,7 @@ void QueryPipeline::complete(std::shared_ptr format) initRowsBeforeLimit(format.get()); output_format = format.get(); - processors.emplace_back(std::move(format)); + processors->emplace_back(std::move(format)); } Block QueryPipeline::getHeader() const @@ -504,7 +505,7 @@ void QueryPipeline::setLimitsAndQuota(const StreamLocalLimits & limits, std::sha transform->setQuota(quota_); connect(*output, transform->getInputPort()); output = &transform->getOutputPort(); - processors.emplace_back(std::move(transform)); + processors->emplace_back(std::move(transform)); } @@ -529,7 +530,7 @@ void QueryPipeline::addCompletedPipeline(QueryPipeline other) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add not completed pipeline"); resources = std::move(other.resources); - processors.insert(processors.end(), other.processors.begin(), other.processors.end()); + processors->insert(processors->end(), other.processors->begin(), other.processors->end()); } void QueryPipeline::reset() @@ -560,9 +561,9 @@ void QueryPipeline::convertStructureTo(const ColumnsWithTypeAndName & columns) ActionsDAG::MatchColumnsMode::Position); auto actions = std::make_shared(std::move(converting)); - addExpression(output, actions, processors); - addExpression(totals, actions, processors); - addExpression(extremes, actions, processors); + addExpression(output, actions, *processors); + addExpression(totals, actions, *processors); + addExpression(extremes, actions, *processors); } std::unique_ptr QueryPipeline::getReadProgressCallback() const diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 1b88ede3349..04d4a45e683 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -4,6 +4,7 @@ #include #include + namespace DB { @@ -34,6 +35,7 @@ class ReadProgressCallback; struct ColumnWithTypeAndName; using ColumnsWithTypeAndName = std::vector; + class QueryPipeline { public: @@ -58,23 +60,23 @@ public: /// completed QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_); + std::shared_ptr processors_); /// pushing QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, InputPort * input_); /// pulling QueryPipeline( QueryPlanResourceHolder resources_, - Processors processors_, + std::shared_ptr processors_, OutputPort * output_, OutputPort * totals_ = nullptr, OutputPort * extremes_ = nullptr); - bool initialized() const { return !processors.empty(); } + bool initialized() const { return !processors->empty(); } /// When initialized, exactly one of the following is true. /// Use PullingPipelineExecutor or PullingAsyncPipelineExecutor. bool pulling() const { return output != nullptr; } @@ -119,7 +121,7 @@ public: /// Add processors and resources from other pipeline. Other pipeline should be completed. void addCompletedPipeline(QueryPipeline other); - const Processors & getProcessors() const { return processors; } + const Processors & getProcessors() const { return *processors; } /// For pulling pipeline, convert structure to expected. /// Trash, need to remove later. @@ -134,7 +136,7 @@ private: std::shared_ptr quota; bool update_profile_events = true; - Processors processors; + std::shared_ptr processors; InputPort * input = nullptr; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 440f123e876..d56113a07e6 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -327,9 +327,9 @@ QueryPipelineBuilderPtr QueryPipelineBuilder::mergePipelines( collected_processors->emplace_back(transform); left->pipe.output_ports.front() = &transform->getOutputs().front(); - left->pipe.processors.emplace_back(transform); + left->pipe.processors->emplace_back(transform); - left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end()); + left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end()); left->pipe.header = left->pipe.output_ports.front()->getHeader(); left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams); return left; @@ -383,7 +383,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe /// Collect the NEW processors for the right pipeline. QueryPipelineProcessorsCollector collector(*right); /// Remember the last step of the right pipeline. - ExpressionStep* step = typeid_cast(right->pipe.processors.back()->getQueryPlanStep()); + ExpressionStep* step = typeid_cast(right->pipe.processors->back()->getQueryPlanStep()); if (!step) { throw Exception(ErrorCodes::LOGICAL_ERROR, "The top step of the right pipeline should be ExpressionStep"); @@ -467,7 +467,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (collected_processors) collected_processors->emplace_back(joining); - left->pipe.processors.emplace_back(std::move(joining)); + left->pipe.processors->emplace_back(std::move(joining)); } if (left->hasTotals()) @@ -482,14 +482,14 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe if (collected_processors) collected_processors->emplace_back(joining); - left->pipe.processors.emplace_back(std::move(joining)); + left->pipe.processors->emplace_back(std::move(joining)); } /// Move the collected processors to the last step in the right pipeline. Processors processors = collector.detachProcessors(); step->appendExtraProcessors(processors); - left->pipe.processors.insert(left->pipe.processors.end(), right->pipe.processors.begin(), right->pipe.processors.end()); + left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end()); left->resources = std::move(right->resources); left->pipe.header = left->pipe.output_ports.front()->getHeader(); left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams); From 1113fe4a8d1c60028ca12d8ce1efc3748f7db5d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 02:27:10 +0200 Subject: [PATCH 097/252] Fix data race in query finish/cancel --- src/QueryPipeline/Pipe.cpp | 6 ++++++ src/QueryPipeline/Pipe.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index b1ad06d2000..62a928d814c 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -102,7 +102,12 @@ static OutputPort * uniteTotals(const OutputPortRawPtrs & ports, const Block & h return totals_port; } +Pipe::Pipe() : processors(std::make_shared()) +{ +} + Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes) + : processors(std::make_shared()) { if (!source->getInputs().empty()) throw Exception( @@ -160,6 +165,7 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output } Pipe::Pipe(ProcessorPtr source) + : processors(std::make_shared()) { checkSource(*source); diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 399e9b0a740..7e30d9c990e 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -28,7 +28,7 @@ class Pipe public: /// Default constructor creates empty pipe. Generally, you cannot do anything with it except to check it is empty(). /// You cannot get empty pipe in any other way. All transforms check that result pipe is not empty. - Pipe() = default; + Pipe(); /// Create from source. Source must have no input ports and single output. explicit Pipe(ProcessorPtr source); /// Create from source with specified totals end extremes (may be nullptr). Ports should be owned by source. From 17d486233679ad57998c358883a8da3590308067 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 02:29:54 +0200 Subject: [PATCH 098/252] Fix data race in query finish/cancel --- src/QueryPipeline/QueryPipeline.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 63bf1025d2f..4d3607b89fd 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -290,6 +290,7 @@ QueryPipeline::QueryPipeline(Pipe pipe) QueryPipeline::QueryPipeline(Chain chain) : resources(chain.detachResources()) + , processors(std::make_shared()) , input(&chain.getInputPort()) , num_threads(chain.getNumThreads()) { From 89390ebc0106342cdafc582a604a4a1015fc525c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 02:34:33 +0200 Subject: [PATCH 099/252] Add a test --- .../02461_cancel_finish_race.reference | 0 .../0_stateless/02461_cancel_finish_race.sh | 59 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 tests/queries/0_stateless/02461_cancel_finish_race.reference create mode 100755 tests/queries/0_stateless/02461_cancel_finish_race.sh diff --git a/tests/queries/0_stateless/02461_cancel_finish_race.reference b/tests/queries/0_stateless/02461_cancel_finish_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02461_cancel_finish_race.sh b/tests/queries/0_stateless/02461_cancel_finish_race.sh new file mode 100755 index 00000000000..3b19456a9d9 --- /dev/null +++ b/tests/queries/0_stateless/02461_cancel_finish_race.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function thread_query() +{ + while true; do + $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.numbers_mt WHERE rand() = 0 FORMAT Null"; + done +} + +function thread_cancel() +{ + while true; do + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE 1 SYNC FORMAT Null"; + done +} + +# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout +export -f thread_query; +export -f thread_cancel; + +TIMEOUT=30 + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +timeout $TIMEOUT bash -c thread_query 2> /dev/null & +timeout $TIMEOUT bash -c thread_cancel 2> /dev/null & + +wait From 78977ac0b24805819debc3133fc624d73a38352c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 02:50:42 +0200 Subject: [PATCH 100/252] Better test --- tests/queries/0_stateless/02461_cancel_finish_race.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02461_cancel_finish_race.sh b/tests/queries/0_stateless/02461_cancel_finish_race.sh index 3b19456a9d9..be40843023e 100755 --- a/tests/queries/0_stateless/02461_cancel_finish_race.sh +++ b/tests/queries/0_stateless/02461_cancel_finish_race.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function thread_query() { while true; do - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.numbers_mt WHERE rand() = 0 FORMAT Null"; + $CLICKHOUSE_CLIENT --query "SELECT count() FROM numbers_mt(10000) WHERE rand() = 0 FORMAT Null"; done } From c05429574de03ed46e4951866c0879e3a30104bc Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 17 Oct 2022 08:59:39 +0800 Subject: [PATCH 101/252] add exception --- src/Interpreters/Aggregator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 7f1cf11a561..e9a72ce0156 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -815,6 +815,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::low_cardinality_keys128; if (size_of_field == 32) return AggregatedDataVariants::Type::low_cardinality_keys256; + throw Exception("Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); } if (size_of_field == 1) From f88ed8195b1e433d1d406fbf9c3af6d7264c748c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 04:21:08 +0200 Subject: [PATCH 102/252] Fix trash --- src/Common/OvercommitTracker.cpp | 10 ++++--- src/Formats/FormatFactory.cpp | 2 +- .../ClusterProxy/executeQuery.cpp | 4 +-- src/Interpreters/Context.cpp | 14 ++++----- src/Interpreters/Context.h | 7 +++-- src/Interpreters/ProcessList.cpp | 30 +++++++++---------- src/Interpreters/ProcessList.h | 17 +++++------ src/Interpreters/executeQuery.cpp | 12 ++++---- src/Processors/Executors/PipelineExecutor.cpp | 4 +-- src/Processors/Executors/PipelineExecutor.h | 5 ++-- .../Formats/Impl/MySQLOutputFormat.cpp | 2 +- .../QueryPlan/BuildQueryPipelineSettings.h | 4 ++- src/Processors/Transforms/CountingTransform.h | 5 ++-- .../Transforms/buildPushingToViewsChain.cpp | 10 ++++--- .../gtest_exception_on_incorrect_pipeline.cpp | 4 +-- src/QueryPipeline/BlockIO.cpp | 3 +- src/QueryPipeline/QueryPipeline.cpp | 2 +- src/QueryPipeline/QueryPipeline.h | 5 ++-- src/QueryPipeline/QueryPipelineBuilder.cpp | 2 +- src/QueryPipeline/QueryPipelineBuilder.h | 4 +-- src/QueryPipeline/ReadProgressCallback.cpp | 3 +- src/QueryPipeline/ReadProgressCallback.h | 7 +++-- 22 files changed, 83 insertions(+), 73 deletions(-) diff --git a/src/Common/OvercommitTracker.cpp b/src/Common/OvercommitTracker.cpp index c7730667f55..bb477d6019d 100644 --- a/src/Common/OvercommitTracker.cpp +++ b/src/Common/OvercommitTracker.cpp @@ -5,6 +5,7 @@ #include #include + namespace ProfileEvents { extern const Event MemoryOvercommitWaitTimeMicroseconds; @@ -170,7 +171,8 @@ void UserOvercommitTracker::pickQueryToExcludeImpl() GlobalOvercommitTracker::GlobalOvercommitTracker(DB::ProcessList * process_list_) : OvercommitTracker(process_list_) -{} +{ +} void GlobalOvercommitTracker::pickQueryToExcludeImpl() { @@ -180,16 +182,16 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl() // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery. for (auto const & query : process_list->processes) { - if (query.isKilled()) + if (query->isKilled()) continue; Int64 user_soft_limit = 0; - if (auto const * user_process_list = query.getUserProcessList()) + if (auto const * user_process_list = query->getUserProcessList()) user_soft_limit = user_process_list->user_memory_tracker.getSoftLimit(); if (user_soft_limit == 0) continue; - auto * memory_tracker = query.getMemoryTracker(); + auto * memory_tracker = query->getMemoryTracker(); if (!memory_tracker) continue; auto ratio = memory_tracker->getOvercommitRatio(user_soft_limit); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index bfe651dd1af..a882fcf5009 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -303,7 +303,7 @@ InputFormatPtr FormatFactory::getInputFormat( static void addExistingProgressToOutputFormat(OutputFormatPtr format, ContextPtr context) { - auto * element_id = context->getProcessListElement(); + auto element_id = context->getProcessListElement(); if (element_id) { /// While preparing the query there might have been progress (for example in subscalar subqueries) so add it here diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index d974721627e..ddc1add6de4 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -126,7 +126,7 @@ void executeQuery( new_context->getClientInfo().distributed_depth += 1; ThrottlerPtr user_level_throttler; - if (auto * process_list_element = context->getProcessListElement()) + if (auto process_list_element = context->getProcessListElement()) user_level_throttler = process_list_element->getUserNetworkThrottler(); /// Network bandwidth limit, if needed. @@ -228,7 +228,7 @@ void executeQueryWithParallelReplicas( const Settings & settings = context->getSettingsRef(); ThrottlerPtr user_level_throttler; - if (auto * process_list_element = context->getProcessListElement()) + if (auto process_list_element = context->getProcessListElement()) user_level_throttler = process_list_element->getUserNetworkThrottler(); /// Network bandwidth limit, if needed. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1de56e950c6..b7242231ebe 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1463,10 +1463,8 @@ void Context::setCurrentQueryId(const String & query_id) void Context::killCurrentQuery() { - if (process_list_elem) - { - process_list_elem->cancelQuery(true); - } + if (auto elem = process_list_elem.lock()) + elem->cancelQuery(true); } String Context::getDefaultFormat() const @@ -1707,15 +1705,15 @@ ProgressCallback Context::getProgressCallback() const } -void Context::setProcessListElement(ProcessList::Element * elem) +void Context::setProcessListElement(QueryStatusPtr elem) { /// Set to a session or query. In the session, only one query is processed at a time. Therefore, the lock is not needed. - process_list_elem = elem; + process_list_elem = std::move(elem); } -ProcessList::Element * Context::getProcessListElement() const +QueryStatusPtr Context::getProcessListElement() const { - return process_list_elem; + return process_list_elem.lock(); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 233f4011ce3..eeb9e8da148 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -68,6 +68,7 @@ class MMappedFileCache; class UncompressedCache; class ProcessList; class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class Macros; struct Progress; struct FileProgress; @@ -230,7 +231,7 @@ private: using FileProgressCallback = std::function; FileProgressCallback file_progress_callback; /// Callback for tracking progress of file loading. - QueryStatus * process_list_elem = nullptr; /// For tracking total resource usage for query. + std::weak_ptr process_list_elem; /// For tracking total resource usage for query. StorageID insertion_table = StorageID::createEmpty(); /// Saved insertion table in query context bool is_distributed = false; /// Whether the current context it used for distributed query @@ -750,9 +751,9 @@ public: /** Set in executeQuery and InterpreterSelectQuery. Then it is used in QueryPipeline, * to update and monitor information about the total number of resources spent for the query. */ - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); /// Can return nullptr if the query was not inserted into the ProcessList. - QueryStatus * getProcessListElement() const; + QueryStatusPtr getProcessListElement() const; /// List all queries. ProcessList & getProcessList(); diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index d5194a02513..b8a5ef3b537 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -243,15 +243,15 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as } auto process_it = processes.emplace(processes.end(), - query_context, query_, client_info, priorities.insert(settings.priority), std::move(thread_group), query_kind); + std::make_shared(query_context, query_, client_info, priorities.insert(settings.priority), std::move(thread_group), query_kind)); increaseQueryKindAmount(query_kind); res = std::make_shared(*this, process_it); - process_it->setUserProcessList(&user_process_list); + (*process_it)->setUserProcessList(&user_process_list); - user_process_list.queries.emplace(client_info.current_query_id, &res->get()); + user_process_list.queries.emplace(client_info.current_query_id, res->getQueryStatus()); /// Track memory usage for all simultaneously running queries from single user. user_process_list.user_memory_tracker.setOrRaiseHardLimit(settings.max_memory_usage_for_user); @@ -280,11 +280,11 @@ ProcessListEntry::~ProcessListEntry() { auto lock = parent.safeLock(); - String user = it->getClientInfo().current_user; - String query_id = it->getClientInfo().current_query_id; - IAST::QueryKind query_kind = it->query_kind; + String user = (*it)->getClientInfo().current_user; + String query_id = (*it)->getClientInfo().current_query_id; + IAST::QueryKind query_kind = (*it)->query_kind; - const QueryStatus * process_list_element_ptr = &*it; + const QueryStatusPtr process_list_element_ptr = *it; auto user_process_list_it = parent.user_to_queries.find(user); if (user_process_list_it == parent.user_to_queries.end()) @@ -307,7 +307,7 @@ ProcessListEntry::~ProcessListEntry() } /// Wait for the query if it is in the cancellation right now. - parent.cancelled_cv.wait(lock.lock, [&]() { return it->is_cancelling == false; }); + parent.cancelled_cv.wait(lock.lock, [&]() { return process_list_element_ptr->is_cancelling == false; }); /// This removes the memory_tracker of one request. parent.processes.erase(it); @@ -430,7 +430,7 @@ ThrottlerPtr QueryStatus::getUserNetworkThrottler() } -QueryStatus * ProcessList::tryGetProcessListElement(const String & current_query_id, const String & current_user) +QueryStatusPtr ProcessList::tryGetProcessListElement(const String & current_query_id, const String & current_user) { auto user_it = user_to_queries.find(current_user); if (user_it != user_to_queries.end()) @@ -442,13 +442,13 @@ QueryStatus * ProcessList::tryGetProcessListElement(const String & current_query return query_it->second; } - return nullptr; + return {}; } CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill) { - QueryStatus * elem; + QueryStatusPtr elem; /// Cancelling the query should be done without the lock. /// @@ -484,7 +484,7 @@ CancellationCode ProcessList::sendCancelToQuery(const String & current_query_id, void ProcessList::killAllQueries() { - std::vector cancelled_processes; + std::vector cancelled_processes; SCOPE_EXIT({ auto lock = safeLock(); @@ -498,8 +498,8 @@ void ProcessList::killAllQueries() cancelled_processes.reserve(processes.size()); for (auto & process : processes) { - cancelled_processes.push_back(&process); - process.is_cancelling = true; + cancelled_processes.push_back(process); + process->is_cancelling = true; } } @@ -558,7 +558,7 @@ ProcessList::Info ProcessList::getInfo(bool get_thread_list, bool get_profile_ev per_query_infos.reserve(processes.size()); for (const auto & process : processes) - per_query_infos.emplace_back(process.getInfo(get_thread_list, get_profile_events, get_settings)); + per_query_infos.emplace_back(process->getInfo(get_thread_list, get_profile_events, get_settings)); return per_query_infos; } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 6943c7cfcd8..077ded9d24c 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -221,6 +221,8 @@ public: [[nodiscard]] bool checkTimeLimitSoft(); }; +using QueryStatusPtr = std::shared_ptr; + /// Information of process list for user. struct ProcessListForUserInfo @@ -241,7 +243,7 @@ struct ProcessListForUser ProcessListForUser(ContextPtr global_context, ProcessList * global_process_list); /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled. - using QueryToElement = std::unordered_map; + using QueryToElement = std::unordered_map; QueryToElement queries; ProfileEvents::Counters user_performance_counters{VariableContext::User, &ProfileEvents::global_counters}; @@ -278,7 +280,7 @@ class ProcessList; class ProcessListEntry { private: - using Container = std::list; + using Container = std::list; ProcessList & parent; Container::iterator it; @@ -289,11 +291,8 @@ public: ~ProcessListEntry(); - QueryStatus * operator->() { return &*it; } - const QueryStatus * operator->() const { return &*it; } - - QueryStatus & get() { return *it; } - const QueryStatus & get() const { return *it; } + QueryStatusPtr getQueryStatus() { return *it; } + const QueryStatusPtr getQueryStatus() const { return *it; } }; @@ -319,7 +318,7 @@ protected: class ProcessList : public ProcessListBase { public: - using Element = QueryStatus; + using Element = QueryStatusPtr; using Entry = ProcessListEntry; using QueryAmount = UInt64; @@ -358,7 +357,7 @@ protected: ThrottlerPtr total_network_throttler; /// Call under lock. Finds process with specified current_user and current_query_id. - QueryStatus * tryGetProcessListElement(const String & current_query_id, const String & current_user); + QueryStatusPtr tryGetProcessListElement(const String & current_query_id, const String & current_user); /// limit for insert. 0 means no limit. Otherwise, when limit exceeded, an exception is thrown. size_t max_insert_queries_amount = 0; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 86686b3eb13..1caeea95881 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -537,7 +537,7 @@ static std::tuple executeQueryImpl( { /// processlist also has query masked now, to avoid secrets leaks though SHOW PROCESSLIST by other users. process_list_entry = context->getProcessList().insert(query_for_logging, ast.get(), context); - context->setProcessListElement(&process_list_entry->get()); + context->setProcessListElement(process_list_entry->getQueryStatus()); } /// Load external tables if they were provided @@ -695,9 +695,9 @@ static std::tuple executeQueryImpl( if (process_list_entry) { /// Query was killed before execution - if ((*process_list_entry)->isKilled()) - throw Exception("Query '" + (*process_list_entry)->getInfo().client_info.current_query_id + "' is killed in pending state", - ErrorCodes::QUERY_WAS_CANCELLED); + if (process_list_entry->getQueryStatus()->isKilled()) + throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, + "Query '{}' is killed in pending state", process_list_entry->getQueryStatus()->getInfo().client_info.current_query_id); } /// Hold element of process list till end of query execution. @@ -841,7 +841,7 @@ static std::tuple executeQueryImpl( pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { - QueryStatus * process_list_elem = context->getProcessListElement(); + QueryStatusPtr process_list_elem = context->getProcessListElement(); if (process_list_elem) { @@ -1007,7 +1007,7 @@ static std::tuple executeQueryImpl( elem.exception_code = getCurrentExceptionCode(); elem.exception = getCurrentExceptionMessage(false); - QueryStatus * process_list_elem = context->getProcessListElement(); + QueryStatusPtr process_list_elem = context->getProcessListElement(); const Settings & current_settings = context->getSettingsRef(); /// Update performance counters before logging to query_log diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 564f42be5db..42140b40ecf 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -24,8 +24,8 @@ namespace ErrorCodes } -PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatus * elem) - : process_list_element(elem) +PipelineExecutor::PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem) + : process_list_element(std::move(elem)) { if (process_list_element) { diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index c7a74d6ea75..7b5d3213dea 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -15,6 +15,7 @@ namespace DB { class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class ExecutingGraph; using ExecutingGraphPtr = std::unique_ptr; @@ -31,7 +32,7 @@ public: /// During pipeline execution new processors can appear. They will be added to existing set. /// /// Explicit graph representation is built in constructor. Throws if graph is not correct. - explicit PipelineExecutor(std::shared_ptr & processors, QueryStatus * elem); + explicit PipelineExecutor(std::shared_ptr & processors, QueryStatusPtr elem); ~PipelineExecutor(); /// Execute pipeline in multiple threads. Must be called once. @@ -80,7 +81,7 @@ private: Poco::Logger * log = &Poco::Logger::get("PipelineExecutor"); /// Now it's used to check if query was killed. - QueryStatus * const process_list_element = nullptr; + QueryStatusPtr process_list_element; ReadProgressCallbackPtr read_progress_callback; diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 344c5c179db..b4aafbd3d9e 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -74,7 +74,7 @@ void MySQLOutputFormat::finalizeImpl() { size_t affected_rows = 0; std::string human_readable_info; - if (QueryStatus * process_list_elem = getContext()->getProcessListElement()) + if (QueryStatusPtr process_list_elem = getContext()->getProcessListElement()) { CurrentThread::finalizePerformanceCounters(); QueryStatusInfo info = process_list_elem->getInfo(); diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h index fadbd061fbd..3b5e4e06953 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h @@ -5,16 +5,18 @@ #include + namespace DB { struct Settings; class QueryStatus; +using QueryStatusPtr = std::shared_ptr; struct BuildQueryPipelineSettings { ExpressionActionsSettings actions_settings; - QueryStatus * process_list_element = nullptr; + QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; const ExpressionActionsSettings & getActionsSettings() const { return actions_settings; } diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index bd2ec58a27f..05d8e2aeac8 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -9,6 +9,7 @@ namespace DB { class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class ThreadStatus; /// Proxy class which counts number of written block, rows, bytes @@ -29,7 +30,7 @@ public: progress_callback = callback; } - void setProcessListElement(QueryStatus * elem) + void setProcessListElement(QueryStatusPtr elem) { process_elem = elem; } @@ -50,7 +51,7 @@ public: protected: Progress progress; ProgressCallback progress_callback; - QueryStatus * process_elem = nullptr; + QueryStatusPtr process_elem; ThreadStatus * thread_status = nullptr; /// Quota is used to limit amount of written bytes. diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 174aaf67ec5..830f400faf2 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -620,9 +620,10 @@ void PushingToLiveViewSink::consume(Chunk chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageLiveView::writeIntoLiveView(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); - auto * process = context->getProcessListElement(); - if (process) + + if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); + ProfileEvents::increment(ProfileEvents::SelectedRows, local_progress.read_rows); ProfileEvents::increment(ProfileEvents::SelectedBytes, local_progress.read_bytes); } @@ -643,9 +644,10 @@ void PushingToWindowViewSink::consume(Chunk chunk) Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); - auto * process = context->getProcessListElement(); - if (process) + + if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); + ProfileEvents::increment(ProfileEvents::SelectedRows, local_progress.read_rows); ProfileEvents::increment(ProfileEvents::SelectedBytes, local_progress.read_bytes); } diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index 53483b7a7f5..40718bd968a 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -27,7 +27,7 @@ TEST(Processors, PortsConnected) processors->emplace_back(std::move(source)); processors->emplace_back(std::move(sink)); - QueryStatus * element = nullptr; + QueryStatusPtr element; PipelineExecutor executor(processors, element); executor.execute(1); } @@ -53,7 +53,7 @@ TEST(Processors, PortsNotConnected) #ifndef ABORT_ON_LOGICAL_ERROR try { - QueryStatus * element = nullptr; + QueryStatusPtr element; PipelineExecutor executor(processors, element); executor.execute(1); ASSERT_TRUE(false) << "Should have thrown."; diff --git a/src/QueryPipeline/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp index 35463ca6be9..9e42e06c722 100644 --- a/src/QueryPipeline/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -53,9 +53,8 @@ void BlockIO::setAllDataSent() const /// - internal /// - SHOW PROCESSLIST if (process_list_entry) - (*process_list_entry)->setAllDataSent(); + process_list_entry->getQueryStatus()->setAllDataSent(); } } - diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 4d3607b89fd..61120aef921 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -477,7 +477,7 @@ void QueryPipeline::setProgressCallback(const ProgressCallback & callback) progress_callback = callback; } -void QueryPipeline::setProcessListElement(QueryStatus * elem) +void QueryPipeline::setProcessListElement(QueryStatusPtr elem) { process_list_element = elem; diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 04d4a45e683..63f444e6ec1 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -16,6 +16,7 @@ using ProcessorPtr = std::shared_ptr; using Processors = std::vector; class QueryStatus; +using QueryStatusPtr = std::shared_ptr; struct Progress; using ProgressCallback = std::function; @@ -99,7 +100,7 @@ public: size_t getNumThreads() const { return num_threads; } void setNumThreads(size_t num_threads_) { num_threads = num_threads_; } - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback); void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota_); bool tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const; @@ -144,7 +145,7 @@ private: OutputPort * totals = nullptr; OutputPort * extremes = nullptr; - QueryStatus * process_list_element = nullptr; + QueryStatusPtr process_list_element; IOutputFormat * output_format = nullptr; diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index d56113a07e6..812bd155b42 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -537,7 +537,7 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline) addTransform(std::move(processor)); } -void QueryPipelineBuilder::setProcessListElement(QueryStatus * elem) +void QueryPipelineBuilder::setProcessListElement(QueryStatusPtr elem) { process_list_element = elem; } diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index 13b4d681b7d..5a0694100eb 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -148,7 +148,7 @@ public: const Block & getHeader() const { return pipe.getHeader(); } - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(ProgressCallback callback); /// Recommend number of threads for pipeline execution. @@ -189,7 +189,7 @@ private: /// Sometimes, more streams are created then the number of threads for more optimal execution. size_t max_threads = 0; - QueryStatus * process_list_element = nullptr; + QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; void checkInitialized(); diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index bbdabb8e8d8..6692b0f96bd 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -2,6 +2,7 @@ #include #include + namespace ProfileEvents { extern const Event SelectedRows; @@ -17,7 +18,7 @@ namespace ErrorCodes extern const int TOO_MANY_BYTES; } -void ReadProgressCallback::setProcessListElement(QueryStatus * elem) +void ReadProgressCallback::setProcessListElement(QueryStatusPtr elem) { process_list_elem = elem; if (!elem) diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index f64123ef39d..c8f0d4cf537 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -4,20 +4,23 @@ #include #include + namespace DB { class QueryStatus; +using QueryStatusPtr = std::shared_ptr; class EnabledQuota; struct StorageLimits; using StorageLimitsList = std::list; + class ReadProgressCallback { public: void setQuota(const std::shared_ptr & quota_) { quota = quota_; } - void setProcessListElement(QueryStatus * elem); + void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; } void addTotalRowsApprox(size_t value) { total_rows_approx += value; } @@ -30,7 +33,7 @@ public: private: std::shared_ptr quota; ProgressCallback progress_callback; - QueryStatus * process_list_elem = nullptr; + QueryStatusPtr process_list_elem; /// The approximate total number of rows to read. For progress bar. std::atomic_size_t total_rows_approx = 0; From e93b8d1618da04678fc3c54afb2646516be81d8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 04:44:58 +0200 Subject: [PATCH 103/252] Fix trash --- src/QueryPipeline/QueryPipeline.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index 61120aef921..e0da4c4f0eb 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -30,7 +30,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -QueryPipeline::QueryPipeline() = default; +QueryPipeline::QueryPipeline() + : processors(std::make_shared()) +{ +} + QueryPipeline::QueryPipeline(QueryPipeline &&) noexcept = default; QueryPipeline & QueryPipeline::operator=(QueryPipeline &&) noexcept = default; QueryPipeline::~QueryPipeline() = default; @@ -306,6 +310,7 @@ QueryPipeline::QueryPipeline(Chain chain) } QueryPipeline::QueryPipeline(std::shared_ptr format) + : processors(std::make_shared()) { auto & format_main = format->getPort(IOutputFormat::PortKind::Main); auto & format_totals = format->getPort(IOutputFormat::PortKind::Totals); From d1f0c80ffe26e02866837b8423682be762f18445 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 04:57:15 +0200 Subject: [PATCH 104/252] Revert "Merge pull request #40217 from zvonand/zvonand-minmax" This reverts commit df934d876277f73922b79521809a9ac1e54041a7, reversing changes made to d3a7945b1bdcb0578861bce810b210802b06cea9. --- .../functions/date-time-functions.md | 19 ++--- docs/ru/sql-reference/data-types/date.md | 2 +- .../functions/date-time-functions.md | 22 +++--- docs/zh/sql-reference/data-types/date.md | 2 +- src/Functions/CustomWeekTransforms.h | 10 +-- src/Functions/DateTimeTransforms.h | 71 ++++++----------- src/Functions/FunctionsConversion.h | 78 +++++-------------- .../0_stateless/00900_long_parquet.reference | 8 +- .../0_stateless/00941_to_custom_week.sql | 1 + .../01440_to_date_monotonicity.reference | 2 +- .../01921_datatype_date32.reference | 48 ++++++------ .../02403_date_time_narrowing.reference | 20 ----- .../0_stateless/02403_date_time_narrowing.sql | 74 ------------------ 13 files changed, 94 insertions(+), 263 deletions(-) delete mode 100644 tests/queries/0_stateless/02403_date_time_narrowing.reference delete mode 100644 tests/queries/0_stateless/02403_date_time_narrowing.sql diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 76f66db924f..fae05fead45 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -271,11 +271,7 @@ Result: The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default. Behavior for -* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range: - * If the argument is smaller than 1970, the result will be calculated from the argument `1970-01-01 (00:00:00)` instead. - * If the return type is `DateTime` and the argument is larger than `2106-02-07 08:28:15`, the result will be calculated from the argument `2106-02-07 08:28:15` instead. - * If the return type is `Date` and the argument is larger than `2149-06-06`, the result will be calculated from the argument `2149-06-06` instead. - * If `toLastDayOfMonth` is called with an argument greater then `2149-05-31`, the result will be calculated from the argument `2149-05-31` instead. +* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. * `enable_extended_results_for_datetime_functions = 1`: * Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`. * Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime` if their argument is a `Date` or `DateTime`, and they return `DateTime64` if their argument is a `Date32` or `DateTime64`. @@ -302,25 +298,22 @@ Returns the date. Rounds down a date or date with time to the first day of the month. Returns the date. -## toLastDayOfMonth - -Rounds up a date or date with time to the last day of the month. -Returns the date. +:::note +The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception or do “natural†overflow. +::: If `toLastDayOfMonth` is called with an argument of type `Date` greater then 2149-05-31, the result will be calculated from the argument 2149-05-31 instead. ## toMonday Rounds down a date or date with time to the nearest Monday. -As a special case, date arguments `1970-01-01`, `1970-01-02`, `1970-01-03` and `1970-01-04` return date `1970-01-01`. Returns the date. ## toStartOfWeek(t\[,mode\]) Rounds down a date or date with time to the nearest Sunday or Monday by mode. Returns the date. -As a special case, date arguments `1970-01-01`, `1970-01-02`, `1970-01-03` and `1970-01-04` (and `1970-01-05` if `mode` is `1`) return date `1970-01-01`. -The `mode` argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used. +The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used. ## toStartOfDay @@ -1075,7 +1068,7 @@ Example: SELECT timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600)); SELECT timeSlots(toDateTime('1980-12-12 21:01:02', 'UTC'), toUInt32(600), 299); SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64(600.1, 1), toDecimal64(299, 0)); -``` +``` ``` text ┌─timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600))─┠│ ['2012-01-01 12:00:00','2012-01-01 12:30:00'] │ diff --git a/docs/ru/sql-reference/data-types/date.md b/docs/ru/sql-reference/data-types/date.md index 7254b82f461..185fe28d567 100644 --- a/docs/ru/sql-reference/data-types/date.md +++ b/docs/ru/sql-reference/data-types/date.md @@ -6,7 +6,7 @@ sidebar_label: Date # Date {#data-type-date} -Дата. ХранитÑÑ Ð² двух байтах в виде (беззнакового) чиÑла дней, прошедших от 1970-01-01. ПозволÑет хранить Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð¾Ñ‚ чуть больше, чем начала unix-Ñпохи до верхнего порога, определÑющегоÑÑ ÐºÐ¾Ð½Ñтантой на Ñтапе компилÑции (ÑÐµÐ¹Ñ‡Ð°Ñ - до 2149 года, поÑледний полноÑтью поддерживаемый год - 2148). +Дата. ХранитÑÑ Ð² двух байтах в виде (беззнакового) чиÑла дней, прошедших от 1970-01-01. ПозволÑет хранить Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ð¾Ñ‚ чуть больше, чем начала unix-Ñпохи до верхнего порога, определÑющегоÑÑ ÐºÐ¾Ð½Ñтантой на Ñтапе компилÑции (ÑÐµÐ¹Ñ‡Ð°Ñ - до 2106 года, поÑледний полноÑтью поддерживаемый год - 2105). Диапазон значений: \[1970-01-01, 2149-06-06\]. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index a7d2ce49fae..7942796f6ba 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -272,15 +272,9 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp; Поведение Ð´Ð»Ñ * `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Ð¥Ð¾Ñ‚Ñ Ñти функции могут принимать Ð·Ð½Ð°Ñ‡ÐµÐ½Ð¸Ñ Ñ‚Ð¸Ð¿Ð° `Date32` или `DateTime64` в качеÑтве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` Ð´Ð»Ñ `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` Ð´Ð»Ñ `DateTime`) будет получен некорректный результат. -Ð’ Ñлучае еÑли значение аргумента вне нормального диапазона: - * `1970-01-01 (00:00:00)` будет возвращён Ð´Ð»Ñ Ð¼Ð¾Ð¼ÐµÐ½Ñ‚Ð¾Ð² времени до 1970 года, - * `2106-02-07 08:28:15` будет взÑÑ‚ в качеÑтве аргумента, еÑли полученный аргумент превоÑходит данное значение и возвращаемый тип - `DateTime`, - * `2149-06-06` будет взÑÑ‚ в качеÑтве аргумента, еÑли полученный аргумент превоÑходит данное значение и возвращаемый тип - `Date`, - * `2149-05-31` будет результатом функции `toLastDayOfMonth` при обработке аргумента больше `2149-05-31`. * `enable_extended_results_for_datetime_functions = 1`: * Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` еÑли их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` еÑли их аргумент `Date32` или `DateTime64`. * Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime` еÑли их аргумент `Date` или `DateTime` и они возвращают `DateTime64` еÑли их аргумент `Date32` или `DateTime64`. - ::: ## toStartOfYear {#tostartofyear} @@ -316,6 +310,7 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101; ОкруглÑет дату или дату-Ñ-временем вниз до первого Ð´Ð½Ñ Ð¼ÐµÑÑца. ВозвращаетÑÑ Ð´Ð°Ñ‚Ð°. +<<<<<<< HEAD ## toLastDayOfMonth ОкруглÑет дату или дату-Ñ-временем до поÑледнего чиÑла меÑÑца. @@ -323,18 +318,22 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101; ЕÑли `toLastDayOfMonth` вызываетÑÑ Ñ Ð°Ñ€Ð³ÑƒÐ¼ÐµÐ½Ñ‚Ð¾Ð¼ типа `Date` большим чем 2149-05-31, то результат будет вычиÑлен от аргумента 2149-05-31. +======= +:::note "Attention" + Возвращаемое значение Ð´Ð»Ñ Ð½ÐµÐºÐ¾Ñ€Ñ€ÐµÐºÑ‚Ð½Ñ‹Ñ… дат завиÑит от реализации. ClickHouse может вернуть нулевую дату, выброÑить иÑключение, или выполнить «еÑтеÑтвенное» перетекание дат между меÑÑцами. +::: + +>>>>>>> parent of df934d8762 (Merge pull request #40217 from zvonand/zvonand-minmax) ## toMonday {#tomonday} ОкруглÑет дату или дату-Ñ-временем вниз до ближайшего понедельника. -ЧаÑтный Ñлучай: Ð´Ð»Ñ Ð´Ð°Ñ‚ `1970-01-01`, `1970-01-02`, `1970-01-03` и `1970-01-04` результатом будет `1970-01-01`. ВозвращаетÑÑ Ð´Ð°Ñ‚Ð°. ## toStartOfWeek(t[,mode]) {#tostartofweek} ОкруглÑет дату или дату Ñо временем до ближайшего воÑкреÑÐµÐ½ÑŒÑ Ð¸Ð»Ð¸ понедельника в ÑоответÑтвии Ñ mode. ВозвращаетÑÑ Ð´Ð°Ñ‚Ð°. -ЧаÑтный Ñлучай: Ð´Ð»Ñ Ð´Ð°Ñ‚ `1970-01-01`, `1970-01-02`, `1970-01-03` и `1970-01-04` (и `1970-01-05`, еÑли `mode` равен `1`) результатом будет `1970-01-01`. -Ðргумент `mode` работает точно так же, как аргумент mode [toWeek()](#toweek). ЕÑли аргумент mode опущен, то иÑпользуетÑÑ Ñ€ÐµÐ¶Ð¸Ð¼ 0. +Ðргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). ЕÑли аргумент mode опущен, то иÑпользуетÑÑ Ñ€ÐµÐ¶Ð¸Ð¼ 0. ## toStartOfDay {#tostartofday} @@ -975,8 +974,13 @@ SELECT now('Europe/Moscow'); ## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size} Ð”Ð»Ñ Ð¸Ð½Ñ‚ÐµÑ€Ð²Ð°Ð»Ð°, начинающегоÑÑ Ð² `StartTime` и длÑщегоÑÑ `Duration` Ñекунд, возвращает маÑÑив моментов времени, кратных `Size`. Параметр `Size` указывать необÑзательно, по умолчанию он равен 1800 Ñекундам (30 минутам) - необÑзательный параметр. +<<<<<<< HEAD Ð”Ð°Ð½Ð½Ð°Ñ Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð¼Ð¾Ð¶ÐµÑ‚ иÑпользоватьÑÑ, например, Ð´Ð»Ñ Ð°Ð½Ð°Ð»Ð¸Ð·Ð° количеÑтва проÑмотров Ñтраницы за ÑоответÑтвующую ÑеÑÑию. Ðргумент `StartTime` может иметь тип `DateTime` или `DateTime64`. Ð’ Ñлучае, еÑли иÑпользуетÑÑ `DateTime`, аргументы `Duration` и `Size` должны иметь тип `UInt32`; Ð”Ð»Ñ DateTime64 они должны быть типа `Decimal64`. +======= +Ð”Ð°Ð½Ð½Ð°Ñ Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð¼Ð¾Ð¶ÐµÑ‚ иÑпользоватьÑÑ, например, Ð´Ð»Ñ Ð°Ð½Ð°Ð»Ð¸Ð·Ð° количеÑтва проÑмотров Ñтраницы за ÑоответÑтвующую ÑеÑÑию. +Ðргумент `StartTime` может иметь тип `DateTime` или `DateTime64`. Ð’ Ñлучае, еÑли иÑпользуетÑÑ `DateTime`, аргументы `Duration` и `Size` должны иметь тип `UInt32`; Ð”Ð»Ñ DateTime64 они должны быть типа `Decimal64`. +>>>>>>> parent of df934d8762 (Merge pull request #40217 from zvonand/zvonand-minmax) Возвращает маÑÑив DateTime/DateTime64 (тип будет Ñовпадать Ñ Ñ‚Ð¸Ð¿Ð¾Ð¼ параметра ’StartTime’). Ð”Ð»Ñ DateTime64 маÑштаб(scale) возвращаемой величины может отличатьÑÑ Ð¾Ñ‚ маÑштаба фргумента ’StartTime’ --- результат будет иметь наибольший маÑштаб Ñреди вÑех данных аргументов. Пример иÑпользованиÑ: diff --git a/docs/zh/sql-reference/data-types/date.md b/docs/zh/sql-reference/data-types/date.md index 9b1acdbe939..a8874151e75 100644 --- a/docs/zh/sql-reference/data-types/date.md +++ b/docs/zh/sql-reference/data-types/date.md @@ -3,7 +3,7 @@ slug: /zh/sql-reference/data-types/date --- # 日期 {#date} -日期类型,用两个字节存储,表示从 1970-01-01 (无符å·) 到当å‰çš„日期值。å…许存储从 Unix 纪元开始到编译阶段定义的上é™é˜ˆå€¼å¸¸é‡ï¼ˆç›®å‰ä¸Šé™æ˜¯2149年,但最终完全支æŒçš„年份为2148)。最å°å€¼è¾“出为1970-01-01。 +日期类型,用两个字节存储,表示从 1970-01-01 (无符å·) 到当å‰çš„日期值。å…许存储从 Unix 纪元开始到编译阶段定义的上é™é˜ˆå€¼å¸¸é‡ï¼ˆç›®å‰ä¸Šé™æ˜¯2106年,但最终完全支æŒçš„年份为2105)。最å°å€¼è¾“出为1970-01-01。 值的范围: \[1970-01-01, 2149-06-06\]。 diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index b690463d456..781c18bc338 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -62,10 +62,7 @@ struct ToStartOfWeekImpl static inline UInt16 execute(Int64 t, UInt8 week_mode, const DateLUTImpl & time_zone) { - if (t < 0) - return 0; - - return time_zone.toFirstDayNumOfWeek(DayNum(std::min(Int32(time_zone.toDayNum(t)), Int32(DATE_LUT_MAX_DAY_NUM))), week_mode); + return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t), week_mode); } static inline UInt16 execute(UInt32 t, UInt8 week_mode, const DateLUTImpl & time_zone) { @@ -73,10 +70,7 @@ struct ToStartOfWeekImpl } static inline UInt16 execute(Int32 d, UInt8 week_mode, const DateLUTImpl & time_zone) { - if (d < 0) - return 0; - - return time_zone.toFirstDayNumOfWeek(DayNum(std::min(d, Int32(DATE_LUT_MAX_DAY_NUM))), week_mode); + return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d), week_mode); } static inline UInt16 execute(UInt16 d, UInt8 week_mode, const DateLUTImpl & time_zone) { diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 217f158cc8e..981698f6eec 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -55,15 +55,15 @@ struct ToDateImpl static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { - return t < 0 ? 0 : std::min(Int32(time_zone.toDayNum(t)), Int32(DATE_LUT_MAX_DAY_NUM)); + return UInt16(time_zone.toDayNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toDayNum(t); + return UInt16(time_zone.toDayNum(t)); } - static inline UInt16 execute(Int32 t, const DateLUTImpl &) + static inline UInt16 execute(Int32, const DateLUTImpl &) { - return t < 0 ? 0 : std::min(t, Int32(DATE_LUT_MAX_DAY_NUM)); + throwDateIsNotSupported(name); } static inline UInt16 execute(UInt16 d, const DateLUTImpl &) { @@ -104,10 +104,7 @@ struct ToStartOfDayImpl static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { - if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) - return 0; - - return time_zone.toDate(std::min(t.whole, Int64(0xffffffff))); + return time_zone.toDate(static_cast(t.whole)); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -115,19 +112,11 @@ struct ToStartOfDayImpl } static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) { - if (d < 0) - return 0; - - auto date_time = time_zone.fromDayNum(ExtendedDayNum(d)); - if (date_time <= 0xffffffff) - return date_time; - else - return time_zone.toDate(0xffffffff); + return time_zone.toDate(ExtendedDayNum(d)); } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - auto date_time = time_zone.fromDayNum(ExtendedDayNum(d)); - return date_time < 0xffffffff ? date_time : time_zone.toDate(0xffffffff); + return time_zone.toDate(DayNum(d)); } static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { @@ -147,16 +136,17 @@ struct ToMondayImpl static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { - return t < 0 ? 0 : time_zone.toFirstDayNumOfWeek(ExtendedDayNum( - std::min(Int32(time_zone.toDayNum(t)), Int32(DATE_LUT_MAX_DAY_NUM)))); + //return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t)); + return time_zone.toFirstDayNumOfWeek(t); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { + //return time_zone.toFirstDayNumOfWeek(time_zone.toDayNum(t)); return time_zone.toFirstDayNumOfWeek(t); } static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { - return d < 0 ? 0 : time_zone.toFirstDayNumOfWeek(ExtendedDayNum(std::min(d, Int32(DATE_LUT_MAX_DAY_NUM)))); + return time_zone.toFirstDayNumOfWeek(ExtendedDayNum(d)); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -179,15 +169,15 @@ struct ToStartOfMonthImpl static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { - return t < 0 ? 0 : time_zone.toFirstDayNumOfMonth(ExtendedDayNum(std::min(Int32(time_zone.toDayNum(t)), Int32(DATE_LUT_MAX_DAY_NUM)))); + return time_zone.toFirstDayNumOfMonth(time_zone.toDayNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(time_zone.toDayNum(t))); + return time_zone.toFirstDayNumOfMonth(time_zone.toDayNum(t)); } static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { - return d < 0 ? 0 : time_zone.toFirstDayNumOfMonth(ExtendedDayNum(std::min(d, Int32(DATE_LUT_MAX_DAY_NUM)))); + return time_zone.toFirstDayNumOfMonth(ExtendedDayNum(d)); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -211,11 +201,7 @@ struct ToLastDayOfMonthImpl static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { - if (t < 0) - return 0; - - /// 0xFFF9 is Int value for 2149-05-31 -- the last day where we can actually find LastDayOfMonth. This will also be the return value. - return time_zone.toLastDayNumOfMonth(ExtendedDayNum(std::min(Int32(time_zone.toDayNum(t)), Int32(0xFFF9)))); + return time_zone.toLastDayNumOfMonth(time_zone.toDayNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -223,16 +209,11 @@ struct ToLastDayOfMonthImpl } static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { - if (d < 0) - return 0; - - /// 0xFFF9 is Int value for 2149-05-31 -- the last day where we can actually find LastDayOfMonth. This will also be the return value. - return time_zone.toLastDayNumOfMonth(ExtendedDayNum(std::min(d, Int32(0xFFF9)))); + return time_zone.toLastDayNumOfMonth(ExtendedDayNum(d)); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { - /// 0xFFF9 is Int value for 2149-05-31 -- the last day where we can actually find LastDayOfMonth. This will also be the return value. - return time_zone.toLastDayNumOfMonth(DayNum(std::min(d, UInt16(0xFFF9)))); + return time_zone.toLastDayNumOfMonth(DayNum(d)); } static inline Int64 executeExtendedResult(Int64 t, const DateLUTImpl & time_zone) { @@ -251,7 +232,7 @@ struct ToStartOfQuarterImpl static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { - return t < 0 ? 0 : time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(std::min(Int64(time_zone.toDayNum(t)), Int64(DATE_LUT_MAX_DAY_NUM)))); + return time_zone.toFirstDayNumOfQuarter(time_zone.toDayNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -259,7 +240,7 @@ struct ToStartOfQuarterImpl } static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { - return d < 0 ? 0 : time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(std::min(d, Int32(DATE_LUT_MAX_DAY_NUM)))); + return time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(d)); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -282,7 +263,7 @@ struct ToStartOfYearImpl static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { - return t < 0 ? 0 : time_zone.toFirstDayNumOfYear(ExtendedDayNum(std::min(Int32(time_zone.toDayNum(t)), Int32(DATE_LUT_MAX_DAY_NUM)))); + return time_zone.toFirstDayNumOfYear(time_zone.toDayNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -290,7 +271,7 @@ struct ToStartOfYearImpl } static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) { - return d < 0 ? 0 : time_zone.toFirstDayNumOfYear(ExtendedDayNum(std::min(d, Int32(DATE_LUT_MAX_DAY_NUM)))); + return time_zone.toFirstDayNumOfYear(ExtendedDayNum(d)); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -340,10 +321,7 @@ struct ToStartOfMinuteImpl static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { - if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) - return 0; - - return time_zone.toStartOfMinute(std::min(t.whole, Int64(0xffffffff))); + return time_zone.toStartOfMinute(t.whole); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -677,10 +655,7 @@ struct ToStartOfHourImpl static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { - if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) - return 0; - - return time_zone.toStartOfHour(std::min(t.whole, Int64(0xffffffff))); + return time_zone.toStartOfHour(t.whole); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 8cbe3b0e532..f3c9f46097f 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -302,11 +302,6 @@ struct ConvertImpl } }; -/** Conversion of Date32 to Date: check bounds. - */ -template struct ConvertImpl - : DateTimeTransformImpl {}; - /** Conversion of DateTime to Date: throw off time component. */ template struct ConvertImpl @@ -325,17 +320,12 @@ struct ToDateTimeImpl static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - auto date_time = time_zone.fromDayNum(ExtendedDayNum(d)); - return date_time <= 0xffffffff ? UInt32(date_time) : UInt32(0xffffffff); + return time_zone.fromDayNum(DayNum(d)); } - static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static Int64 execute(Int32 d, const DateLUTImpl & time_zone) { - if (d < 0) - return 0; - - auto date_time = time_zone.fromDayNum(ExtendedDayNum(d)); - return date_time <= 0xffffffff ? date_time : 0xffffffff; + return time_zone.fromDayNum(ExtendedDayNum(d)); } static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) @@ -343,21 +333,10 @@ struct ToDateTimeImpl return dt; } - static UInt32 execute(Int64 d, const DateLUTImpl & time_zone) + // TODO: return UInt32 ??? + static Int64 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) { - if (d < 0) - return 0; - - auto date_time = time_zone.toDate(d); - return date_time <= 0xffffffff ? date_time : 0xffffffff; - } - - static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & /*time_zone*/) - { - if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) - return 0; - - return std::min(t.whole, Int64(0xFFFFFFFF)); + return dt64; } }; @@ -377,12 +356,9 @@ struct ToDateTransform32Or64 static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { // since converting to Date, no need in values outside of default LUT range. - if (from < 0) - return 0; - return (from < DATE_LUT_MAX_DAY_NUM) ? from - : std::min(Int32(time_zone.toDayNum(from)), Int32(DATE_LUT_MAX_DAY_NUM)); + : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } }; @@ -397,14 +373,9 @@ struct ToDateTransform32Or64Signed /// The function should be monotonic (better for query optimizations), so we saturate instead of overflow. if (from < 0) return 0; - - auto day_num = time_zone.toDayNum(ExtendedDayNum(static_cast(from))); - return day_num < DATE_LUT_MAX_DAY_NUM ? day_num : DATE_LUT_MAX_DAY_NUM; - return (from < DATE_LUT_MAX_DAY_NUM) - ? from - : std::min(Int32(time_zone.toDayNum(static_cast(from))), Int32(0xFFFFFFFF)); - + ? static_cast(from) + : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } }; @@ -435,7 +406,7 @@ struct ToDate32Transform32Or64 { return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) ? from - : std::min(Int32(time_zone.toDayNum(from)), Int32(DATE_LUT_MAX_EXTEND_DAY_NUM)); + : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } }; @@ -451,7 +422,7 @@ struct ToDate32Transform32Or64Signed return daynum_min_offset; return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) ? static_cast(from) - : time_zone.toDayNum(std::min(Int64(from), Int64(0xFFFFFFFF))); + : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(0xFFFFFFFF))); } }; @@ -477,49 +448,35 @@ struct ToDate32Transform8Or16Signed */ template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; - template struct ConvertImpl : DateTimeTransformImpl> {}; @@ -531,7 +488,7 @@ struct ToDateTimeTransform64 static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) { - return std::min(Int64(from), Int64(0xFFFFFFFF)); + return std::min(time_t(from), time_t(0xFFFFFFFF)); } }; @@ -553,12 +510,11 @@ struct ToDateTimeTransform64Signed { static constexpr auto name = "toDateTime"; - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & /* time_zone */) + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) { if (from < 0) return 0; - - return std::min(Int64(from), Int64(0xFFFFFFFF)); + return std::min(time_t(from), time_t(0xFFFFFFFF)); } }; @@ -678,6 +634,8 @@ struct FromDateTime64Transform } }; +/** Conversion of DateTime64 to Date or DateTime: discards fractional part. + */ template struct ConvertImpl : DateTimeTransformImpl> {}; template struct ConvertImpl @@ -701,7 +659,7 @@ struct ToDateTime64Transform DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const { - const auto dt = time_zone.fromDayNum(ExtendedDayNum(d)); + const auto dt = ToDateTimeImpl::execute(d, time_zone); return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); } @@ -1855,7 +1813,7 @@ private: { /// Account for optional timezone argument. if (arguments.size() != 2 && arguments.size() != 3) - throw Exception{"Function " + getName() + " expects 2 or 3 arguments for DateTime64.", + throw Exception{"Function " + getName() + " expects 2 or 3 arguments for DataTypeDateTime64.", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; } else if (arguments.size() != 2) diff --git a/tests/queries/0_stateless/00900_long_parquet.reference b/tests/queries/0_stateless/00900_long_parquet.reference index 4dfc726145e..bbdad7243bd 100644 --- a/tests/queries/0_stateless/00900_long_parquet.reference +++ b/tests/queries/0_stateless/00900_long_parquet.reference @@ -44,12 +44,12 @@ converted: diff: dest: 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 00:00:00 2004-05-06 07:08:09.012000000 -80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2149-06-06 2006-08-09 10:11:12 2006-08-09 10:11:12.345000000 +80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12 2006-08-09 10:11:12.345000000 min: --128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2149-06-06 2003-02-03 04:05:06.789000000 --108 108 8 92 -8 108 -40 -116 -1 -1 string-0\0\0\0\0\0\0\0 fixedstring\0\0\0\0 2001-02-03 2149-06-06 2002-02-03 04:05:06.789000000 +-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03 2003-02-03 04:05:06.789000000 +-108 108 8 92 -8 108 -40 -116 -1 -1 string-0\0\0\0\0\0\0\0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 2002-02-03 04:05:06.789000000 79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 2004-05-06 07:08:09.012000000 -127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2149-06-06 2004-02-03 04:05:06.789000000 +127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03 2004-02-03 04:05:06.789000000 max: -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 2003-04-05 00:00:00 2003-02-03 04:05:06 2003-02-03 04:05:06.789000000 -108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 2001-02-03 00:00:00 2002-02-03 04:05:06 2002-02-03 04:05:06.789000000 diff --git a/tests/queries/0_stateless/00941_to_custom_week.sql b/tests/queries/0_stateless/00941_to_custom_week.sql index 04ff08d4117..4dd5d209306 100644 --- a/tests/queries/0_stateless/00941_to_custom_week.sql +++ b/tests/queries/0_stateless/00941_to_custom_week.sql @@ -49,3 +49,4 @@ SELECT toStartOfWeek(x, 3) AS w3, toStartOfWeek(x_t, 3) AS wt3 FROM numbers(10); + diff --git a/tests/queries/0_stateless/01440_to_date_monotonicity.reference b/tests/queries/0_stateless/01440_to_date_monotonicity.reference index dd8545b721d..2dbec540fbb 100644 --- a/tests/queries/0_stateless/01440_to_date_monotonicity.reference +++ b/tests/queries/0_stateless/01440_to_date_monotonicity.reference @@ -1,4 +1,4 @@ 0 -1970-01-01 2120-07-26 1970-04-11 1970-01-01 2149-06-06 +1970-01-01 2106-02-07 1970-04-11 1970-01-01 2149-06-06 1970-01-01 02:00:00 2106-02-07 09:28:15 1970-01-01 02:16:40 2000-01-01 13:12:12 diff --git a/tests/queries/0_stateless/01921_datatype_date32.reference b/tests/queries/0_stateless/01921_datatype_date32.reference index dcfc193e119..14079b906cb 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.reference +++ b/tests/queries/0_stateless/01921_datatype_date32.reference @@ -43,16 +43,16 @@ -------toMinute--------- -------toSecond--------- -------toStartOfDay--------- -1970-01-01 02:00:00 -1970-01-01 02:00:00 -2106-02-07 00:00:00 -2106-02-07 00:00:00 +2036-02-07 07:31:20 +2036-02-07 07:31:20 +2027-10-01 11:03:28 +2027-10-17 11:03:28 2021-06-22 00:00:00 -------toMonday--------- -1970-01-01 -1970-01-01 -2149-06-02 -2149-06-02 +2079-06-07 +2079-06-07 +2120-07-06 +2120-07-20 2021-06-21 -------toISOWeek--------- 1 @@ -79,28 +79,28 @@ 229953 202125 -------toStartOfWeek--------- -1970-01-01 -1970-01-01 -2149-06-01 -2149-06-01 +2079-06-06 +2079-06-06 +2120-07-05 +2120-07-26 2021-06-20 -------toStartOfMonth--------- -1970-01-01 -1970-01-01 -2149-06-01 -2149-06-01 +2079-06-07 +2079-06-07 +2120-06-26 +2120-06-26 2021-06-01 -------toStartOfQuarter--------- -1970-01-01 -1970-01-01 -2149-04-01 -2149-04-01 +2079-06-07 +2079-06-07 +2120-04-26 +2120-04-26 2021-04-01 -------toStartOfYear--------- -1970-01-01 -1970-01-01 -2149-01-01 -2149-01-01 +2079-06-07 +2079-06-07 +2119-07-28 +2119-07-28 2021-01-01 -------toStartOfSecond--------- -------toStartOfMinute--------- diff --git a/tests/queries/0_stateless/02403_date_time_narrowing.reference b/tests/queries/0_stateless/02403_date_time_narrowing.reference deleted file mode 100644 index 7d6e91c61b8..00000000000 --- a/tests/queries/0_stateless/02403_date_time_narrowing.reference +++ /dev/null @@ -1,20 +0,0 @@ -1970-01-01 2149-06-06 1970-01-01 2149-06-06 1900-01-01 1970-01-02 1970-01-01 00:00:00 2106-02-07 06:28:15 -1970-01-01 2149-06-06 -1970-01-01 2149-06-06 -1970-01-01 00:00:00 2106-02-07 06:28:15 -1970-01-01 00:00:00 2106-02-07 06:28:15 -2106-02-07 06:28:15 -toStartOfDay -2106-02-07 00:00:00 1970-01-01 00:00:00 2106-02-07 00:00:00 1970-01-01 00:00:00 2106-02-07 00:00:00 -toStartOfWeek -1970-01-01 1970-01-01 1970-01-01 1970-01-01 1970-01-01 2149-06-01 1970-01-01 2149-06-02 -toMonday -1970-01-01 1970-01-01 2149-06-02 1970-01-01 2149-06-02 -toStartOfMonth -1970-01-01 2149-06-01 1970-01-01 2149-06-01 -toLastDayOfMonth -2149-05-31 1970-01-01 2149-05-31 1970-01-01 2149-05-31 -toStartOfQuarter -1970-01-01 2149-04-01 1970-01-01 2149-04-01 -toStartOfYear -1970-01-01 2149-01-01 1970-01-01 2149-01-01 diff --git a/tests/queries/0_stateless/02403_date_time_narrowing.sql b/tests/queries/0_stateless/02403_date_time_narrowing.sql deleted file mode 100644 index 07cbba6f31c..00000000000 --- a/tests/queries/0_stateless/02403_date_time_narrowing.sql +++ /dev/null @@ -1,74 +0,0 @@ --- check conversion of numbers to date/time -- -SELECT toDate(toInt32(toDate32('1930-01-01', 'UTC')), 'UTC'), - toDate(toInt32(toDate32('2151-01-01', 'UTC')), 'UTC'), - toDate(toInt64(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC')), 'UTC'), - toDate(toInt64(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC')), 'UTC'), - toDate32(toInt32(toDate32('1900-01-01', 'UTC')) - 1, 'UTC'), - toDate32(toInt32(toDate32('2299-12-31', 'UTC')) + 1, 'UTC'), - toDateTime(toInt64(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC')), 'UTC'), - toDateTime(toInt64(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC')), 'UTC'); - --- check conversion of extended range type to normal range type -- -SELECT toDate(toDate32('1930-01-01', 'UTC'), 'UTC'), - toDate(toDate32('2151-01-01', 'UTC'), 'UTC'); - -SELECT toDate(toDateTime64('1930-01-01 12:12:12.12', 3, 'UTC'), 'UTC'), - toDate(toDateTime64('2151-01-01 12:12:12.12', 3, 'UTC'), 'UTC'); - -SELECT toDateTime(toDateTime64('1930-01-01 12:12:12.12', 3, 'UTC'), 'UTC'), - toDateTime(toDateTime64('2151-01-01 12:12:12.12', 3, 'UTC'), 'UTC'); - -SELECT toDateTime(toDate32('1930-01-01', 'UTC'), 'UTC'), - toDateTime(toDate32('2151-01-01', 'UTC'), 'UTC'); - -SELECT toDateTime(toDate('2141-01-01', 'UTC'), 'UTC'); - --- test DateTimeTransforms -- -SELECT 'toStartOfDay'; -SELECT toStartOfDay(toDate('2141-01-01', 'UTC'), 'UTC'), - toStartOfDay(toDate32('1930-01-01', 'UTC'), 'UTC'), - toStartOfDay(toDate32('2141-01-01', 'UTC'), 'UTC'), - toStartOfDay(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfDay(toDateTime64('2141-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfWeek'; -SELECT toStartOfWeek(toDate('1970-01-01', 'UTC')), - toStartOfWeek(toDate32('1970-01-01', 'UTC')), - toStartOfWeek(toDateTime('1970-01-01 10:10:10', 'UTC'), 0, 'UTC'), - toStartOfWeek(toDateTime64('1970-01-01 10:10:10.123', 3, 'UTC'), 1, 'UTC'), - toStartOfWeek(toDate32('1930-01-01', 'UTC')), - toStartOfWeek(toDate32('2151-01-01', 'UTC')), - toStartOfWeek(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 2, 'UTC'), - toStartOfWeek(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 3, 'UTC'); - -SELECT 'toMonday'; -SELECT toMonday(toDate('1970-01-02', 'UTC')), - toMonday(toDate32('1930-01-01', 'UTC')), - toMonday(toDate32('2151-01-01', 'UTC')), - toMonday(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toMonday(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfMonth'; -SELECT toStartOfMonth(toDate32('1930-01-01', 'UTC')), - toStartOfMonth(toDate32('2151-01-01', 'UTC')), - toStartOfMonth(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfMonth(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toLastDayOfMonth'; -SELECT toLastDayOfMonth(toDate('2149-06-03', 'UTC')), - toLastDayOfMonth(toDate32('1930-01-01', 'UTC')), - toLastDayOfMonth(toDate32('2151-01-01', 'UTC')), - toLastDayOfMonth(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toLastDayOfMonth(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfQuarter'; -SELECT toStartOfQuarter(toDate32('1930-01-01', 'UTC')), - toStartOfQuarter(toDate32('2151-01-01', 'UTC')), - toStartOfQuarter(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfQuarter(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); - -SELECT 'toStartOfYear'; -SELECT toStartOfYear(toDate32('1930-01-01', 'UTC')), - toStartOfYear(toDate32('2151-01-01', 'UTC')), - toStartOfYear(toDateTime64('1930-01-01 12:12:12.123', 3, 'UTC'), 'UTC'), - toStartOfYear(toDateTime64('2151-01-01 12:12:12.123', 3, 'UTC'), 'UTC'); From ef51bb3ee7a0dbeb36e257941d1f1a9d3768ddc3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 05:01:13 +0200 Subject: [PATCH 105/252] Add a test --- tests/queries/0_stateless/02462_int_to_date.reference | 4 ++++ tests/queries/0_stateless/02462_int_to_date.sql | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/02462_int_to_date.reference create mode 100644 tests/queries/0_stateless/02462_int_to_date.sql diff --git a/tests/queries/0_stateless/02462_int_to_date.reference b/tests/queries/0_stateless/02462_int_to_date.reference new file mode 100644 index 00000000000..f31441cf3b8 --- /dev/null +++ b/tests/queries/0_stateless/02462_int_to_date.reference @@ -0,0 +1,4 @@ +20221011 2022-10-11 1665519765 +20221011 2022-10-11 1665519765 +20221011 2022-10-11 1665519765 Int32 +20221011 2022-10-11 1665519765 UInt32 diff --git a/tests/queries/0_stateless/02462_int_to_date.sql b/tests/queries/0_stateless/02462_int_to_date.sql new file mode 100644 index 00000000000..cd470ca12f6 --- /dev/null +++ b/tests/queries/0_stateless/02462_int_to_date.sql @@ -0,0 +1,4 @@ +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toInt64(1665519765) as recordTimestamp; +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toUInt64(1665519765) as recordTimestamp; +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toInt32(1665519765) as recordTimestamp, toTypeName(recordTimestamp); +select toYYYYMMDD(toDate(recordTimestamp, 'Europe/Amsterdam')), toDate(recordTimestamp, 'Europe/Amsterdam'), toUInt32(1665519765) as recordTimestamp, toTypeName(recordTimestamp); From 2023f38764dea3df043f912db7bcbb19c8221dd1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 05:50:24 +0200 Subject: [PATCH 106/252] Fixups --- docs/ru/sql-reference/functions/date-time-functions.md | 5 ----- docs/zh/getting-started/example-datasets/uk-price-paid.mdx | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 7942796f6ba..368ddce1cfe 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -310,20 +310,15 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101; ОкруглÑет дату или дату-Ñ-временем вниз до первого Ð´Ð½Ñ Ð¼ÐµÑÑца. ВозвращаетÑÑ Ð´Ð°Ñ‚Ð°. -<<<<<<< HEAD ## toLastDayOfMonth ОкруглÑет дату или дату-Ñ-временем до поÑледнего чиÑла меÑÑца. ВозвращаетÑÑ Ð´Ð°Ñ‚Ð°. -ЕÑли `toLastDayOfMonth` вызываетÑÑ Ñ Ð°Ñ€Ð³ÑƒÐ¼ÐµÐ½Ñ‚Ð¾Ð¼ типа `Date` большим чем 2149-05-31, то результат будет вычиÑлен от аргумента 2149-05-31. - -======= :::note "Attention" Возвращаемое значение Ð´Ð»Ñ Ð½ÐµÐºÐ¾Ñ€Ñ€ÐµÐºÑ‚Ð½Ñ‹Ñ… дат завиÑит от реализации. ClickHouse может вернуть нулевую дату, выброÑить иÑключение, или выполнить «еÑтеÑтвенное» перетекание дат между меÑÑцами. ::: ->>>>>>> parent of df934d8762 (Merge pull request #40217 from zvonand/zvonand-minmax) ## toMonday {#tomonday} ОкруглÑет дату или дату-Ñ-временем вниз до ближайшего понедельника. diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index 3a14a3ce55d..9c0562c73d9 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -342,7 +342,7 @@ LIMIT 100 ## 使用 Projection 加速查询 {#speedup-with-projections} -[Projections](../../sql-reference/statements/alter/projection.md) å…许我们通过存储任æ„æ ¼å¼çš„预先èšåˆçš„æ•°æ®æ¥æ高查询速度。在此示例中,我们创建了一个按年份ã€åœ°åŒºå’ŒåŸŽé•‡åˆ†ç»„的房产的平å‡ä»·æ ¼ã€æ€»ä»·æ ¼å’Œæ•°é‡çš„ Projection。在执行时,如果 ClickHouse 认为 Projection å¯ä»¥æ高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。 +Projections å…许我们通过存储任æ„æ ¼å¼çš„预先èšåˆçš„æ•°æ®æ¥æ高查询速度。在此示例中,我们创建了一个按年份ã€åœ°åŒºå’ŒåŸŽé•‡åˆ†ç»„的房产的平å‡ä»·æ ¼ã€æ€»ä»·æ ¼å’Œæ•°é‡çš„ Projection。在执行时,如果 ClickHouse 认为 Projection å¯ä»¥æ高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。 ### 构建投影{#build-projection} From f3c4d0eff2692faeea054969dbea4186730d877a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Oct 2022 07:29:22 +0000 Subject: [PATCH 107/252] Fix keeperMap test --- src/Storages/StorageKeeperMap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 8dce6a7eb10..f0bf4e431ae 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -690,7 +690,7 @@ Chunk StorageKeeperMap::getBySerializedKeys(const std::span k full_key_paths.emplace_back(fullPathForKey(key)); } - auto values = client->get(full_key_paths); + auto values = client->tryGet(full_key_paths); for (size_t i = 0; i < keys.size(); ++i) { From 878dee40b1fe1f9bc301366edd5e336719725a9f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Oct 2022 07:36:49 +0000 Subject: [PATCH 108/252] Use ZooKeeper in a single test --- .../test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py index c46e6840153..cf76d47157a 100644 --- a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py +++ b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py @@ -11,11 +11,13 @@ node1 = cluster.add_instance( "node1", main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"], with_zookeeper=True, + use_keeper=False, ) node2 = cluster.add_instance( "node2", main_configs=["configs/zookeeper_config.xml", "configs/remote_servers.xml"], with_zookeeper=True, + use_keeper=False, ) From 051e3a0db57f6b4275ecc12076963d604f6ac547 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 17 Oct 2022 12:27:05 +0200 Subject: [PATCH 109/252] Fix if --- src/Storages/StorageReplicatedMergeTree.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6eec6178a1d..7a2ff56a782 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7769,13 +7769,18 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( /// Very complex case. It means that lock already doesn't exist when we tried to remove it. /// So we don't know are we owner of this part or not. Maybe we just mutated it, renamed on disk and failed to lock in ZK. /// But during mutation we can have hardlinks to another part. So it's not Ok to remove blobs of this part if it was mutated. - if (ec == Coordination::Error::ZNONODE && has_parent) + if (ec == Coordination::Error::ZNONODE) { - LOG_INFO(logger, "Lock on path {} for part {} doesn't exist, refuse to remove blobs", zookeeper_part_replica_node, part_name); - return {false, {}}; + if (has_parent) + { + LOG_INFO(logger, "Lock on path {} for part {} doesn't exist, refuse to remove blobs", zookeeper_part_replica_node, part_name); + return {false, {}}; + } + } + else + { + throw zkutil::KeeperException(ec, zookeeper_part_replica_node); } - - throw zkutil::KeeperException(ec, zookeeper_part_replica_node); } /// Check, maybe we were the last replica and can remove part forever From f7b2addae9dad0a2d39012f85bb23af9f546dafc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 23:11:29 +0200 Subject: [PATCH 110/252] Bump llvm/clang to 15.0.2 Otherwise right now codebrowser image is broken [1], due to changes in cmake: #10 1.607 CMake Error at /usr/lib/llvm-15/lib/cmake/llvm/LLVMExports.cmake:1693 (message): #10 1.607 The imported target "merge-fdata" references the file #10 1.607 #10 1.607 "/usr/lib/llvm-15/bin/merge-fdata" [1]: https://s3.amazonaws.com/clickhouse-test-reports/42349/36502270eb9f195dcd2dc8b350db9367c619f0f8/push_to_dockerhub_amd64.html Signed-off-by: Azat Khuzhin --- docker/test/util/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 57880bfc1d6..58a329e9d53 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -5,6 +5,7 @@ FROM ubuntu:20.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list +# 15.0.2 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15 RUN apt-get update \ From a0ceef48dad983e5bd5541173a451c7849a1e5f9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Oct 2022 10:22:26 +0200 Subject: [PATCH 111/252] Add a workaround for "The imported target "merge-fdata" references the file" error Signed-off-by: Azat Khuzhin --- docker/test/util/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 58a329e9d53..57544bdc090 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -59,6 +59,9 @@ RUN apt-get update \ RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld # for external_symbolizer_path RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer +# FIXME: workaround for "The imported target "merge-fdata" references the file" error +# https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d +RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake ARG CCACHE_VERSION=4.6.1 RUN mkdir /tmp/ccache \ From 0c62e09a6ee97a39ede5dcdd8114db5ffab1d87e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 14 Oct 2022 12:27:48 +0200 Subject: [PATCH 112/252] check-style: fix ErrorCodes check Signed-off-by: Azat Khuzhin --- utils/check-style/check-style | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 5805fcd1d43..a0556d971e8 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -59,10 +59,7 @@ declare -A EXTERN_TYPES EXTERN_TYPES[ErrorCodes]=int EXTERN_TYPES[ProfileEvents]=Event EXTERN_TYPES[CurrentMetrics]=Metric -declare -A EXTERN_ALLOWED_CHARS -EXTERN_ALLOWED_CHARS[ErrorCodes]='_A-Za-z' -EXTERN_ALLOWED_CHARS[ProfileEvents]='_A-Za-z' -EXTERN_ALLOWED_CHARS[CurrentMetrics]='_A-Za-z' + EXTERN_TYPES_EXCLUDES=( ProfileEvents::global_counters ProfileEvents::Event @@ -89,18 +86,28 @@ EXTERN_TYPES_EXCLUDES=( CurrentMetrics::Value ErrorCodes::ErrorCode + ErrorCodes::getName + ErrorCodes::increment + ErrorCodes::end + ErrorCodes::values + ErrorCodes::values[i] + ErrorCodes::getErrorCodeByName ) for extern_type in ${!EXTERN_TYPES[@]}; do type_of_extern=${EXTERN_TYPES[$extern_type]} - allowed_chars=${EXTERN_ALLOWED_CHARS[$extern_type]} + allowed_chars='[_A-Za-z]+' # Unused # NOTE: to fix automatically, replace echo with: # sed -i "/extern const $type_of_extern $val/d" $file find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern [$allowed_chars]+" + # NOTE: the check is pretty dumb and distinguish only by the type_of_extern, + # and this matches with zkutil::CreateMode + grep -v 'src/Common/ZooKeeper/Types.h' + } | { + grep -vP $EXCLUDE_DIRS | xargs grep -l -P "extern const $type_of_extern $allowed_chars" } | while read file; do - grep -P "extern const $type_of_extern [$allowed_chars]+;" $file | sed -r -e "s/^.*?extern const $type_of_extern ([$allowed_chars]+);.*?$/\1/" | while read val; do + grep -P "extern const $type_of_extern $allowed_chars;" $file | sed -r -e "s/^.*?extern const $type_of_extern ($allowed_chars);.*?$/\1/" | while read val; do if ! grep -q "$extern_type::$val" $file; then # Excludes for SOFTWARE_EVENT/HARDWARE_EVENT/CACHE_EVENT in ThreadProfileEvents.cpp if [[ ! $extern_type::$val =~ ProfileEvents::Perf.* ]]; then @@ -112,11 +119,13 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # Undefined # NOTE: to fix automatically, replace echo with: - # ( grep -q -F 'namespace $extern_type' $file && sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file ) + # ( grep -q -F 'namespace $extern_type' $file && \ + # sed -i -r "0,/(\s*)extern const $type_of_extern [$allowed_chars]+/s//\1extern const $type_of_extern $val;\n&/" $file || \ + # awk '{ print; if (ns == 1) { ns = 2 }; if (ns == 2) { ns = 0; print "namespace $extern_type\n{\n extern const $type_of_extern '$val';\n}" } }; /namespace DB/ { ns = 1; };' < $file > ${file}.tmp && mv ${file}.tmp $file ) find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::[$allowed_chars]+" + grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do - grep -P "$extern_type::[$allowed_chars]+" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::([$allowed_chars]+).*?$/\1/" | while read val; do + grep -P "$extern_type::$allowed_chars" $file | grep -P -v '^\s*//' | sed -r -e "s/^.*?$extern_type::($allowed_chars).*?$/\1/" | while read val; do if ! grep -q "extern const $type_of_extern $val" $file; then if ! in_array "$extern_type::$val" "${EXTERN_TYPES_EXCLUDES[@]}"; then echo "$extern_type::$val is used in file $file but not defined" @@ -127,9 +136,9 @@ for extern_type in ${!EXTERN_TYPES[@]}; do # Duplicates find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | { - grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::[$allowed_chars]+" + grep -vP $EXCLUDE_DIRS | xargs grep -l -P "$extern_type::$allowed_chars" } | while read file; do - grep -P "extern const $type_of_extern [$allowed_chars]+;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file" + grep -P "extern const $type_of_extern $allowed_chars;" $file | sort | uniq -c | grep -v -P ' +1 ' && echo "Duplicate $extern_type in file $file" done done From 1b65ea6c6b1fb4b6a724fc977d38150d4c27a9c9 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 17 Oct 2022 12:56:20 +0200 Subject: [PATCH 113/252] other cluster name --- tests/queries/0_stateless/02458_empty_hdfs_url.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02458_empty_hdfs_url.sql b/tests/queries/0_stateless/02458_empty_hdfs_url.sql index 7e91043255d..ccc554fc628 100644 --- a/tests/queries/0_stateless/02458_empty_hdfs_url.sql +++ b/tests/queries/0_stateless/02458_empty_hdfs_url.sql @@ -1,5 +1,5 @@ -- Tags: no-fasttest, no-cpu-aarch64 -SELECT * FROM hdfsCluster('test_cluster', '', 'TSV'); -- { serverError BAD_ARGUMENTS } -SELECT * FROM hdfsCluster('test_cluster', ' ', 'TSV'); -- { serverError BAD_ARGUMENTS } -SELECT * FROM hdfsCluster('test_cluster', '/', 'TSV'); -- { serverError BAD_ARGUMENTS } -SELECT * FROM hdfsCluster('test_cluster', 'http/', 'TSV'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file +SELECT * FROM hdfsCluster('test_shard_localhost', '', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_shard_localhost', ' ', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_shard_localhost', '/', 'TSV'); -- { serverError BAD_ARGUMENTS } +SELECT * FROM hdfsCluster('test_shard_localhost', 'http/', 'TSV'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file From 238f86d892b2a9e42c5bc75a1144e362a2a3163e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Oct 2022 11:05:26 +0000 Subject: [PATCH 114/252] Small refactoring --- src/Coordination/KeeperDispatcher.cpp | 2 +- src/Coordination/KeeperSnapshotManagerS3.cpp | 35 ++++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 0851a337e02..6e9116d4b75 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -196,7 +196,7 @@ void KeeperDispatcher::snapshotThread() try { - [[maybe_unused]] auto snapshot_path = task.create_snapshot(std::move(task.snapshot)); + auto snapshot_path = task.create_snapshot(std::move(task.snapshot)); if (snapshot_path.empty()) continue; diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 56d5cceb33c..2e19d496407 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -135,9 +135,6 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa if (s3_client == nullptr) return; - LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); - ReadBufferFromFile snapshot_file(snapshot_path); - S3Settings::ReadWriteSettings read_write_settings; read_write_settings.upload_part_size_multiply_parts_count_threshold = 10000; @@ -152,9 +149,6 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa }; }; - auto snapshot_name = fs::path(snapshot_path).filename().string(); - auto lock_file = fmt::format(".{}_LOCK", snapshot_name); - const auto file_exists = [&](const auto & key) { Aws::S3::Model::HeadObjectRequest request; @@ -172,6 +166,13 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa return false; }; + + LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); + ReadBufferFromFile snapshot_file(snapshot_path); + + auto snapshot_name = fs::path(snapshot_path).filename().string(); + auto lock_file = fmt::format(".{}_LOCK", snapshot_name); + if (file_exists(snapshot_name)) { LOG_ERROR(log, "Snapshot {} already exists", snapshot_name); @@ -193,20 +194,18 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa lock_writer.finalize(); // We read back the written UUID, if it's the same we can upload the file - std::string read_uuid; + ReadBufferFromS3 lock_reader { - ReadBufferFromS3 lock_reader - { - s3_client->client, - s3_client->uri.bucket, - lock_file, - "", - 1, - {} - }; + s3_client->client, + s3_client->uri.bucket, + lock_file, + "", + 1, + {} + }; - readStringUntilEOF(read_uuid, lock_reader); - } + std::string read_uuid; + readStringUntilEOF(read_uuid, lock_reader); if (read_uuid != toString(uuid)) { From 9af817bb438c8ae0cd47655cb81a73bf9d3bd2bf Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 17 Oct 2022 20:25:31 +0800 Subject: [PATCH 115/252] Fix read from buffer with read in order (#42236) --- src/Storages/StorageBuffer.cpp | 10 ++++++++++ .../0_stateless/02459_read_in_order_bufer.reference | 5 +++++ .../0_stateless/02459_read_in_order_bufer.sql | 13 +++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/02459_read_in_order_bufer.reference create mode 100644 tests/queries/0_stateless/02459_read_in_order_bufer.sql diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 3fc00a79bbe..f6b397950ed 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -334,6 +336,14 @@ void StorageBuffer::read( pipes_from_buffers.emplace_back(std::make_shared(column_names, buf, storage_snapshot)); pipe_from_buffers = Pipe::unitePipes(std::move(pipes_from_buffers)); + if (query_info.getInputOrderInfo()) + { + /// Each buffer has one block, and it not guaranteed that rows in each block are sorted by order keys + pipe_from_buffers.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, query_info.getInputOrderInfo()->sort_description_for_merging, 0); + }); + } } if (pipe_from_buffers.empty()) diff --git a/tests/queries/0_stateless/02459_read_in_order_bufer.reference b/tests/queries/0_stateless/02459_read_in_order_bufer.reference new file mode 100644 index 00000000000..b040bdf6167 --- /dev/null +++ b/tests/queries/0_stateless/02459_read_in_order_bufer.reference @@ -0,0 +1,5 @@ +9 +8 +7 +6 +5 diff --git a/tests/queries/0_stateless/02459_read_in_order_bufer.sql b/tests/queries/0_stateless/02459_read_in_order_bufer.sql new file mode 100644 index 00000000000..5a6e0a3dbc6 --- /dev/null +++ b/tests/queries/0_stateless/02459_read_in_order_bufer.sql @@ -0,0 +1,13 @@ +CREATE TABLE mytable_stored (`a` UInt8) ENGINE = MergeTree ORDER BY a; +CREATE TABLE mytable (`a` UInt8) ENGINE = Buffer(currentDatabase(), 'mytable_stored', 4, 600, 3600, 10, 100, 10000, 10000000); +INSERT INTO mytable VALUES (0); +INSERT INTO mytable VALUES (1); +INSERT INTO mytable VALUES (2); +INSERT INTO mytable VALUES (3); +INSERT INTO mytable VALUES (4); +INSERT INTO mytable VALUES (5); +INSERT INTO mytable VALUES (6); +INSERT INTO mytable VALUES (7); +INSERT INTO mytable VALUES (8); +INSERT INTO mytable VALUES (9); +SELECT a FROM mytable ORDER BY a DESC LIMIT 5; From b8eb02e2394e7101aa87c71717b906905d7343c9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 17 Oct 2022 12:30:10 +0000 Subject: [PATCH 116/252] Add a comment. --- src/Interpreters/ClusterProxy/executeQuery.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 39a7970683a..8863683fba6 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -97,6 +97,12 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c new_settings.limit.changed = false; } + /// Setting additional_table_filters may be applied to Distributed table. + /// In case if query is executed up to WithMergableState on remote shard, it is impossible to filter on initiator. + /// We need to propagate the setting, but change the table name from distributed to source. + /// + /// Here we don't try to analyze setting again. In case if query_info->additional_filter_ast is not empty, some filter was applied. + /// It's just easier to add this filter for a source table. if (query_info && query_info->additional_filter_ast) { Tuple tuple; From 0de06f59b15ee825a8215af87480510e79028ab4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 17 Oct 2022 13:23:40 +0000 Subject: [PATCH 117/252] Add test for index. --- .../02346_additional_filters_index.reference | 30 +++++++++++++++++++ .../02346_additional_filters_index.sql | 24 +++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tests/queries/0_stateless/02346_additional_filters_index.reference create mode 100644 tests/queries/0_stateless/02346_additional_filters_index.sql diff --git a/tests/queries/0_stateless/02346_additional_filters_index.reference b/tests/queries/0_stateless/02346_additional_filters_index.reference new file mode 100644 index 00000000000..d4b9509cb3c --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_index.reference @@ -0,0 +1,30 @@ +-- { echoOn } +set max_rows_to_read = 2; +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x > 3'}; +4 dddd +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x < 3'}; +1 a +2 bb +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) >= 3'}; +3 ccc +4 dddd +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) < 3'}; +1 a +2 bb +set max_rows_to_read = 4; +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x > 3'}; +4 dddd +4 dddd +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x < 3'}; +1 a +1 a +2 bb +2 bb +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) > 3'}; +4 dddd +4 dddd +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) < 3'}; +1 a +1 a +2 bb +2 bb diff --git a/tests/queries/0_stateless/02346_additional_filters_index.sql b/tests/queries/0_stateless/02346_additional_filters_index.sql new file mode 100644 index 00000000000..0d40cc1f898 --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_index.sql @@ -0,0 +1,24 @@ +-- Tags: distributed + +create table table_1 (x UInt32, y String, INDEX a (length(y)) TYPE minmax GRANULARITY 1) engine = MergeTree order by x settings index_granularity = 2; +insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); + +CREATE TABLE distr_table (x UInt32, y String) ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), 'table_1'); + +-- { echoOn } +set max_rows_to_read = 2; + +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x > 3'}; +select * from table_1 order by x settings additional_table_filters={'table_1' : 'x < 3'}; + +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) >= 3'}; +select * from table_1 order by x settings additional_table_filters={'table_1' : 'length(y) < 3'}; + +set max_rows_to_read = 4; + +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x > 3'}; +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'x < 3'}; + +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) > 3'}; +select * from distr_table order by x settings additional_table_filters={'distr_table' : 'length(y) < 3'}; + From db6524bb3b6ef469bc9fd3f9e8e5c4b33c77c99a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 17 Oct 2022 15:44:17 +0200 Subject: [PATCH 118/252] Smaller thrashold for multipart upload part size increase --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1f784497b34..07618ee731d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -84,7 +84,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ - M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 1000, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \ + M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_unexpected_write_error_retries, 4, "The maximum number of retries in case of unexpected errors during S3 write.", 0) \ From 8157fd0595ea13928067b2f0adc271fe9344519c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Oct 2022 14:07:34 +0000 Subject: [PATCH 119/252] fix build --- src/Coordination/KeeperSnapshotManagerS3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h index 6d5e8fe1a4a..5b62d114aae 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.h +++ b/src/Coordination/KeeperSnapshotManagerS3.h @@ -57,7 +57,7 @@ public: KeeperSnapshotManagerS3() = default; void updateS3Configuration(const Poco::Util::AbstractConfiguration &) {} - void uploadSnapshot(const std::string &) {} + void uploadSnapshot(const std::string &, [[maybe_unused]] bool async_upload = true) {} void startup(const Poco::Util::AbstractConfiguration &) {} From cb55308aa73e1de7d977da1cb28b0c16f0ca2db8 Mon Sep 17 00:00:00 2001 From: Jacob Herrington Date: Mon, 17 Oct 2022 11:03:01 -0500 Subject: [PATCH 120/252] Fix link to blog It seems like the `/en` path may not be necessary anymore; it leads me to an empty page on the blog. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f4a39a2c97..003b78a3cbb 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ ClickHouse® is an open-source column-oriented database management system that a * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. -* [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. +* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. From fffecbb9adc8d3de095352a0e2a4822343ede7e5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 17 Oct 2022 18:08:52 +0200 Subject: [PATCH 121/252] better error message for unsupported delimiters in custom formats --- src/Formats/EscapingRuleUtils.cpp | 16 ++++++++++++++++ src/Formats/EscapingRuleUtils.h | 4 +++- .../Impl/CustomSeparatedRowInputFormat.cpp | 13 +++++++++++++ .../Formats/Impl/CustomSeparatedRowInputFormat.h | 1 + .../Formats/Impl/TemplateRowInputFormat.cpp | 15 +++++++++++---- .../Formats/RowInputFormatWithNamesAndTypes.h | 2 +- .../00938_template_input_format.reference | 2 ++ .../0_stateless/00938_template_input_format.sh | 10 ++++++++++ .../01014_format_custom_separated.reference | 1 + .../0_stateless/01014_format_custom_separated.sh | 5 +++++ 10 files changed, 63 insertions(+), 6 deletions(-) diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index f1a97c84fec..f22c1501df0 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -846,4 +847,19 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo return result; } + +void checkSupportedDelimiterAfterField(FormatSettings::EscapingRule escaping_rule, const String & delimiter, const DataTypePtr & type) +{ + if (escaping_rule != FormatSettings::EscapingRule::Escaped) + return; + + bool is_supported_delimiter_after_string = !delimiter.empty() && (delimiter.front() == '\t' || delimiter.front() == '\n'); + if (is_supported_delimiter_after_string) + return; + + /// Nullptr means that field is skipped and it's equivalent to String + if (!type || isString(removeNullable(removeLowCardinality(type)))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'Escaped' serialization requires delimiter after String field to start with '\\t' or '\\n'"); +} + } diff --git a/src/Formats/EscapingRuleUtils.h b/src/Formats/EscapingRuleUtils.h index 901679b6a05..c8b710002a5 100644 --- a/src/Formats/EscapingRuleUtils.h +++ b/src/Formats/EscapingRuleUtils.h @@ -77,6 +77,8 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, const std::unordered_set * numbers_parsed_from_json_strings = nullptr); void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings); -String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings,FormatSettings::EscapingRule escaping_rule); +String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule); + +void checkSupportedDelimiterAfterField(FormatSettings::EscapingRule escaping_rule, const String & delimiter, const DataTypePtr & type); } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 1c99a5484a2..16df132b9d8 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -67,6 +67,19 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( } } +void CustomSeparatedRowInputFormat::readPrefix() +{ + RowInputFormatWithNamesAndTypes::readPrefix(); + + /// Provide better error message for unsupported delimiters + for (const auto & column_index : column_mapping->column_indexes_for_input_fields) + { + if (column_index) + checkSupportedDelimiterAfterField(format_settings.custom.escaping_rule, format_settings.custom.field_delimiter, data_types[*column_index]); + else + checkSupportedDelimiterAfterField(format_settings.custom.escaping_rule, format_settings.custom.field_delimiter, nullptr); + } +} bool CustomSeparatedRowInputFormat::allowSyncAfterError() const { diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index c7e332b983f..e7e96ab87b1 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -30,6 +30,7 @@ private: bool allowSyncAfterError() const override; void syncAfterError() override; + void readPrefix() override; std::unique_ptr buf; bool ignore_spaces; diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 785658c0fa2..76fd0d2a907 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -53,18 +53,25 @@ TemplateRowInputFormat::TemplateRowInputFormat(const Block & header_, std::uniqu std::vector column_in_format(header_.columns(), false); for (size_t i = 0; i < row_format.columnsCount(); ++i) { - if (row_format.format_idx_to_column_idx[i]) + const auto & column_index = row_format.format_idx_to_column_idx[i]; + if (column_index) { - if (header_.columns() <= *row_format.format_idx_to_column_idx[i]) - row_format.throwInvalidFormat("Column index " + std::to_string(*row_format.format_idx_to_column_idx[i]) + + if (header_.columns() <= *column_index) + row_format.throwInvalidFormat("Column index " + std::to_string(*column_index) + " must be less then number of columns (" + std::to_string(header_.columns()) + ")", i); if (row_format.escaping_rules[i] == EscapingRule::None) row_format.throwInvalidFormat("Column is not skipped, but deserialization type is None", i); - size_t col_idx = *row_format.format_idx_to_column_idx[i]; + size_t col_idx = *column_index; if (column_in_format[col_idx]) row_format.throwInvalidFormat("Duplicate column", i); column_in_format[col_idx] = true; + + checkSupportedDelimiterAfterField(row_format.escaping_rules[i], row_format.delimiters[i + 1], data_types[*column_index]); + } + else + { + checkSupportedDelimiterAfterField(row_format.escaping_rules[i], row_format.delimiters[i + 1], nullptr); } } diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index d2dd28eb15a..9d0734f4567 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -41,6 +41,7 @@ protected: void resetParser() override; bool isGarbageAfterField(size_t index, ReadBuffer::Position pos) override; void setReadBuffer(ReadBuffer & in_) override; + void readPrefix() override; const FormatSettings format_settings; DataTypes data_types; @@ -48,7 +49,6 @@ protected: private: bool readRow(MutableColumns & columns, RowReadExtension & ext) override; - void readPrefix() override; bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; diff --git a/tests/queries/0_stateless/00938_template_input_format.reference b/tests/queries/0_stateless/00938_template_input_format.reference index e1f77d9a581..ec8cd7a21f0 100644 --- a/tests/queries/0_stateless/00938_template_input_format.reference +++ b/tests/queries/0_stateless/00938_template_input_format.reference @@ -31,3 +31,5 @@ cv bn m","qwe,rty",456,"2016-01-02" "zx\cv\bn m","qwe,rty","as""df'gh","",789,"2016-01-04" "","zx cv bn m","qwe,rty","as""df'gh",9876543210,"2016-01-03" +1 +1 diff --git a/tests/queries/0_stateless/00938_template_input_format.sh b/tests/queries/0_stateless/00938_template_input_format.sh index e99f59614da..be75edcdb61 100755 --- a/tests/queries/0_stateless/00938_template_input_format.sh +++ b/tests/queries/0_stateless/00938_template_input_format.sh @@ -83,3 +83,13 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE template1"; $CLICKHOUSE_CLIENT --query="DROP TABLE template2"; rm "$CURDIR"/00938_template_input_format_resultset.tmp "$CURDIR"/00938_template_input_format_row.tmp +echo -ne '\${a:Escaped},\${b:Escaped}\n' > "$CURDIR"/00938_template_input_format_row.tmp +echo -ne "a,b\nc,d\n" | $CLICKHOUSE_LOCAL --structure "a String, b String" --input-format Template \ + --format_template_row "$CURDIR"/00938_template_input_format_row.tmp --format_template_rows_between_delimiter '' \ + -q 'select * from table' 2>&1| grep -Fac "'Escaped' serialization requires delimiter" +echo -ne '\${a:Escaped},\${:Escaped}\n' > "$CURDIR"/00938_template_input_format_row.tmp +echo -ne "a,b\nc,d\n" | $CLICKHOUSE_LOCAL --structure "a String" --input-format Template \ + --format_template_row "$CURDIR"/00938_template_input_format_row.tmp --format_template_rows_between_delimiter '' \ + -q 'select * from table' 2>&1| grep -Fac "'Escaped' serialization requires delimiter" +rm "$CURDIR"/00938_template_input_format_row.tmp + diff --git a/tests/queries/0_stateless/01014_format_custom_separated.reference b/tests/queries/0_stateless/01014_format_custom_separated.reference index d46a6fdf5b1..626d6ed66b8 100644 --- a/tests/queries/0_stateless/01014_format_custom_separated.reference +++ b/tests/queries/0_stateless/01014_format_custom_separated.reference @@ -8,3 +8,4 @@ 1,"2019-09-25","world" 2,"2019-09-26","custom" 3,"2019-09-27","separated" +1 diff --git a/tests/queries/0_stateless/01014_format_custom_separated.sh b/tests/queries/0_stateless/01014_format_custom_separated.sh index 4e88419d125..655607c8c9b 100755 --- a/tests/queries/0_stateless/01014_format_custom_separated.sh +++ b/tests/queries/0_stateless/01014_format_custom_separated.sh @@ -34,3 +34,8 @@ FORMAT CustomSeparated" $CLICKHOUSE_CLIENT --query="SELECT * FROM custom_separated ORDER BY n FORMAT CSV" $CLICKHOUSE_CLIENT --query="DROP TABLE custom_separated" + +echo -ne "a,b\nc,d\n" | $CLICKHOUSE_LOCAL --structure "a String, b String" \ + --input-format CustomSeparated --format_custom_escaping_rule=Escaped \ + --format_custom_field_delimiter=',' --format_custom_row_after_delimiter=$'\n' -q 'select * from table' \ + 2>&1| grep -Fac "'Escaped' serialization requires delimiter" From 597544a15b977506df68cf4a84eac4f37565398d Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 17 Oct 2022 16:12:00 +0000 Subject: [PATCH 122/252] Fix assert cast in join on falsy condition --- src/Interpreters/HashJoin.cpp | 5 +++++ .../0_stateless/02461_join_lc_issue_42380.reference | 2 ++ .../0_stateless/02461_join_lc_issue_42380.sql | 12 ++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/02461_join_lc_issue_42380.reference create mode 100644 tests/queries/0_stateless/02461_join_lc_issue_42380.sql diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 7780b335128..26b9b843567 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -232,6 +232,11 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s data->type = Type::CROSS; sample_block_with_columns_to_add = right_sample_block; } + else if (table_join->getClauses().empty()) + { + data->type = Type::EMPTY; + sample_block_with_columns_to_add = right_sample_block; + } else if (table_join->oneDisjunct()) { const auto & key_names_right = table_join->getOnlyClause().key_names_right; diff --git a/tests/queries/0_stateless/02461_join_lc_issue_42380.reference b/tests/queries/0_stateless/02461_join_lc_issue_42380.reference new file mode 100644 index 00000000000..b0d5371e4f7 --- /dev/null +++ b/tests/queries/0_stateless/02461_join_lc_issue_42380.reference @@ -0,0 +1,2 @@ +1 0 +\N 1 diff --git a/tests/queries/0_stateless/02461_join_lc_issue_42380.sql b/tests/queries/0_stateless/02461_join_lc_issue_42380.sql new file mode 100644 index 00000000000..f0ecbf64e58 --- /dev/null +++ b/tests/queries/0_stateless/02461_join_lc_issue_42380.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t1__fuzz_13; +DROP TABLE IF EXISTS t2__fuzz_47; + +SET allow_suspicious_low_cardinality_types = 1; + +CREATE TABLE t1__fuzz_13 (id Nullable(Int16)) ENGINE = MergeTree() ORDER BY id SETTINGS allow_nullable_key = 1; +CREATE TABLE t2__fuzz_47 (id LowCardinality(Int16)) ENGINE = MergeTree() ORDER BY id; + +INSERT INTO t1__fuzz_13 VALUES (1); +INSERT INTO t2__fuzz_47 VALUES (1); + +SELECT * FROM t1__fuzz_13 FULL OUTER JOIN t2__fuzz_47 ON 1 = 2; From e014293fe8e7675c61ff958f2240ecc2f3c03c1b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Oct 2022 19:29:14 +0200 Subject: [PATCH 123/252] Fix tidy --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b7242231ebe..721d701c9a2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1708,7 +1708,7 @@ ProgressCallback Context::getProgressCallback() const void Context::setProcessListElement(QueryStatusPtr elem) { /// Set to a session or query. In the session, only one query is processed at a time. Therefore, the lock is not needed. - process_list_elem = std::move(elem); + process_list_elem = elem; } QueryStatusPtr Context::getProcessListElement() const From d8501c7f6a5f9cf537df076bf9cf584fb7d0e609 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 17 Oct 2022 18:10:22 +0000 Subject: [PATCH 124/252] fix formatting of ALTER FREEZE --- src/Parsers/ASTAlterQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 2d8193871b0..959fc55c945 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -378,7 +378,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & } else if (type == ASTAlterCommand::FREEZE_ALL) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "FREEZE"; + settings.ostr << (settings.hilite ? hilite_keyword : "") << "FREEZE" << (settings.hilite ? hilite_none : ""); if (!with_name.empty()) { @@ -399,7 +399,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & } else if (type == ASTAlterCommand::UNFREEZE_ALL) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "UNFREEZE"; + settings.ostr << (settings.hilite ? hilite_keyword : "") << "UNFREEZE" << (settings.hilite ? hilite_none : ""); if (!with_name.empty()) { From f67b5182eac46a980f772b303729a0ab80c0829c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 17 Oct 2022 19:05:15 +0000 Subject: [PATCH 125/252] Add test with different databases. --- .../ClusterProxy/executeQuery.cpp | 2 +- .../02346_additional_filters_distr.reference | 3 +++ .../02346_additional_filters_distr.sql | 20 +++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02346_additional_filters_distr.reference create mode 100644 tests/queries/0_stateless/02346_additional_filters_distr.sql diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 8863683fba6..923b4a767b7 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -106,7 +106,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c if (query_info && query_info->additional_filter_ast) { Tuple tuple; - tuple.push_back(main_table.getFullTableName()); + tuple.push_back(main_table.getShortName()); tuple.push_back(queryToString(query_info->additional_filter_ast)); new_settings.additional_table_filters.value.push_back(std::move(tuple)); } diff --git a/tests/queries/0_stateless/02346_additional_filters_distr.reference b/tests/queries/0_stateless/02346_additional_filters_distr.reference new file mode 100644 index 00000000000..81814b5e7bb --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_distr.reference @@ -0,0 +1,3 @@ +4 dddd +5 a +6 bb diff --git a/tests/queries/0_stateless/02346_additional_filters_distr.sql b/tests/queries/0_stateless/02346_additional_filters_distr.sql new file mode 100644 index 00000000000..bc9c1715c72 --- /dev/null +++ b/tests/queries/0_stateless/02346_additional_filters_distr.sql @@ -0,0 +1,20 @@ +-- Tags: no-parallel, distributed + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists dist_02346; +drop table if exists shard_0.data_02346; +drop table if exists shard_1.data_02346; + +create table shard_0.data_02346 (x UInt32, y String) engine = MergeTree order by x settings index_granularity = 2; +insert into shard_0.data_02346 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); + +create table shard_1.data_02346 (x UInt32, y String) engine = MergeTree order by x settings index_granularity = 2; +insert into shard_1.data_02346 values (5, 'a'), (6, 'bb'), (7, 'ccc'), (8, 'dddd'); + +create table dist_02346 (x UInt32, y String) engine=Distributed('test_cluster_two_shards_different_databases', /* default_database= */ '', data_02346); + +set max_rows_to_read=4; + +select * from dist_02346 order by x settings additional_table_filters={'dist_02346' : 'x > 3 and x < 7'}; From 1a9d190788e4e9db7e1bb367d57b50719621e9a0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 17 Oct 2022 21:08:27 +0000 Subject: [PATCH 126/252] replace table name in fuzzer more often --- src/Client/QueryFuzzer.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index f0c4313e8a8..6c5f5850b92 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -468,6 +468,16 @@ bool QueryFuzzer::isSuitableForFuzzing(const ASTCreateQuery & create) return create.columns_list && create.columns_list->columns; } +static String getOriginalTableName(const String & full_name) +{ + return full_name.substr(0, full_name.find("__fuzz_")); +} + +static String getFuzzedTableName(const String & original_name, size_t index) +{ + return original_name + "__fuzz_" + toString(index); +} + void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) { if (create.columns_list && create.columns_list->columns) @@ -501,10 +511,9 @@ void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) } auto full_name = create.getTable(); - auto original_name = full_name.substr(0, full_name.find("__fuzz_")); - + auto original_name = getOriginalTableName(full_name); size_t index = index_of_fuzzed_table[original_name]++; - auto new_name = original_name + "__fuzz_" + toString(index); + auto new_name = getFuzzedTableName(original_name, index); create.setTable(new_name); @@ -665,7 +674,8 @@ void QueryFuzzer::fuzzTableName(ASTTableExpression & table) if (table_id.empty()) return; - auto it = original_table_name_to_fuzzed.find(table_id.getTableName()); + auto original_name = getOriginalTableName(table_id.getTableName()); + auto it = original_table_name_to_fuzzed.find(original_name); if (it != original_table_name_to_fuzzed.end() && !it->second.empty()) { auto new_table_name = it->second.begin(); @@ -728,7 +738,7 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query) /// Drop all created tables, not only unique ones. for (size_t i = 0; i < it->second; ++i) { - auto fuzzed_name = table_name + "__fuzz_" + toString(i); + auto fuzzed_name = getFuzzedTableName(table_name, i); auto & query = queries.emplace_back(drop_query.clone()); query->as()->setTable(fuzzed_name); /// Just in case add IF EXISTS to avoid exceptions. @@ -749,7 +759,9 @@ void QueryFuzzer::notifyQueryFailed(ASTPtr ast) if (pos != std::string::npos) { auto original_name = table_name.substr(0, pos); - original_table_name_to_fuzzed[original_name].erase(table_name); + auto it = original_table_name_to_fuzzed.find(original_name); + if (it != original_table_name_to_fuzzed.end()) + it->second.erase(table_name); } }; From d038d2b47879d6a2585d126704043b797bd4d353 Mon Sep 17 00:00:00 2001 From: jferroal Date: Tue, 18 Oct 2022 08:54:59 +0800 Subject: [PATCH 127/252] Doc: Add zh-cn translation getting-started/example-datasets/cell-towers.mdx --- .../example-datasets/cell-towers.mdx | 231 +++++++++++++++++- 1 file changed, 227 insertions(+), 4 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/cell-towers.mdx b/docs/zh/getting-started/example-datasets/cell-towers.mdx index ece13445210..9738680519a 100644 --- a/docs/zh/getting-started/example-datasets/cell-towers.mdx +++ b/docs/zh/getting-started/example-datasets/cell-towers.mdx @@ -1,9 +1,232 @@ --- slug: /zh/getting-started/example-datasets/cell-towers -sidebar_label: Cell Towers -title: "Cell Towers" +sidebar_label: 蜂çªä¿¡å·å¡” +sidebar_position: 3 +title: "蜂çªä¿¡å·å¡”" --- -import Content from '@site/docs/en/getting-started/example-datasets/cell-towers.md'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; +import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md'; +import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md'; + +该数æ®é›†æ¥è‡ª [OpenCellid](https://www.opencellid.org/) - 世界上最大的蜂çªä¿¡å·å¡”的开放数æ®åº“。 + +截至 2021 年,它拥有超过 4000 万æ¡å…³äºŽå…¨çƒèœ‚çªä¿¡å·å¡”(GSMã€LTEã€UMTS 等)的记录åŠå…¶åœ°ç†å标和元数æ®ï¼ˆå›½å®¶ä»£ç ã€ç½‘络等)。 + +OpenCelliD 项目在 `Creative Commons Attribution-ShareAlike 4.0 International License` å议下许å¯ä½¿ç”¨ï¼Œæˆ‘们根æ®ç›¸åŒè®¸å¯æ¡æ¬¾é‡æ–°åˆ†å‘此数æ®é›†çš„快照。登录åŽå³å¯ä¸‹è½½æœ€æ–°ç‰ˆæœ¬çš„æ•°æ®é›†ã€‚ + + +## 获å–æ•°æ®é›† {#get-the-dataset} + + + + +在 ClickHouse Cloud 上å¯ä»¥é€šè¿‡ä¸€ä¸ªæŒ‰é’®å®žçŽ°é€šè¿‡ S3 上传此数æ®é›†ã€‚登录你的 ClickHouse Cloud 组织,或通过 [ClickHouse.cloud](https://clickhouse.cloud) 创建å…费试用版。 + +从 **Sample data** 选项å¡ä¸­é€‰æ‹© **Cell Towers** æ•°æ®é›†ï¼Œç„¶åŽé€‰æ‹© **Load data**: + +![加载数æ®é›†](@site/docs/en/_snippets/images/cloud-load-data-sample.png) + +检查 cell_towers 的表结构: + +```sql +DESCRIBE TABLE cell_towers +``` + + + + + + +1. 下载 2021 å¹´ 2 月以æ¥çš„æ•°æ®é›†å¿«ç…§ï¼š[cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB)。 + +2. 验è¯å®Œæ•´æ€§ï¼ˆå¯é€‰æ­¥éª¤ï¼‰ï¼š + +```bash +md5sum cell_towers.csv.xz +``` + +```response +8cf986f4a0d9f12c6f384a0e9192c908 cell_towers.csv.xz +``` + +3. 使用以下命令解压: + +```bash +xz -d cell_towers.csv.xz +``` + +4. 创建表: + +```sql +CREATE TABLE cell_towers +( + radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5), + mcc UInt16, + net UInt16, + area UInt16, + cell UInt64, + unit Int16, + lon Float64, + lat Float64, + range UInt32, + samples UInt32, + changeable UInt8, + created DateTime, + updated DateTime, + averageSignal UInt8 +) +ENGINE = MergeTree ORDER BY (radio, mcc, net, created); +``` + +5. æ’入数æ®é›†ï¼š + +```bash +clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv +``` + + + + +## 查询示例 {#examples} + +1. 按类型划分的基站数é‡ï¼š + +```sql +SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC +``` +```response +┌─radio─┬────────c─┠+│ UMTS │ 20686487 │ +│ LTE │ 12101148 │ +│ GSM │ 9931312 │ +│ CDMA │ 556344 │ +│ NR │ 867 │ +└───────┴──────────┘ + +5 rows in set. Elapsed: 0.011 sec. Processed 43.28 million rows, 43.28 MB (3.83 billion rows/s., 3.83 GB/s.) +``` + +2. å„个[移动国家代ç ï¼ˆMCC)](https://en.wikipedia.org/wiki/Mobile_country_code)对应的蜂çªä¿¡å·å¡”æ•°é‡ï¼š + +```sql +SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 +``` +```response +┌─mcc─┬─count()─┠+│ 310 │ 5024650 │ +│ 262 │ 2622423 │ +│ 250 │ 1953176 │ +│ 208 │ 1891187 │ +│ 724 │ 1836150 │ +│ 404 │ 1729151 │ +│ 234 │ 1618924 │ +│ 510 │ 1353998 │ +│ 440 │ 1343355 │ +│ 311 │ 1332798 │ +└─────┴─────────┘ + +10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.) +``` + +排åé å‰çš„国家是:美国ã€å¾·å›½å’Œä¿„罗斯。 + +ä½ å¯ä»¥é€šè¿‡åœ¨ ClickHouse 中创建一个 [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) æ¥è§£ç è¿™äº›å€¼ã€‚ + +## 用例:åˆå¹¶åœ°ç†æ•°æ® {#use-case} + +使用 `pointInPolygon` 函数。 + +1. 创建一个用于存储多边形的表: + + + + +```sql +CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64))) +ORDER BY polygon; +``` + + + + +```sql +CREATE TEMPORARY TABLE +moscow (polygon Array(Tuple(Float64, Float64))); +``` + + + + +2. 以下点大致上构造了莫斯科的地ç†å›´æ ï¼ˆé™¤â€œæ–°èŽ«æ–¯ç§‘â€å¤–): + +```sql +INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), +(37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), +(37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), +(37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), +(37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), +(37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), +(37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), +(37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), +(37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), +(37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), +(37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), +(37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), +(37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), +(37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), +(37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), +(37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), +(37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), +(37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), +(37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), +(37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), +(37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), +(37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), +(37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), +(37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), +(37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), +(37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), +(37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), +(37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), +(37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), +(37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), +(37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), +(37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), +(37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), +(37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), +(37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), +(37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), +(37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), +(37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), +(37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), +(37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), +(37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), +(37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), +(37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), +(37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), +(37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), +(37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), +(37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), +(37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), +(37.84172564285271, 55.78000432402266)]); +``` + +3. 检查莫斯科有多少个蜂çªä¿¡å·å¡”: + +```sql +SELECT count() FROM cell_towers +WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) +``` +```response +┌─count()─┠+│ 310463 │ +└─────────┘ + +1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.) +``` + +虽然ä¸èƒ½åˆ›å»ºä¸´æ—¶è¡¨ï¼Œä½†æ­¤æ•°æ®é›†ä»å¯åœ¨ [Playground](https://play.clickhouse.com/play?user=play) 中进行交互å¼çš„请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=). - From 1e7afba015666668393f1fc16ab46d7bfe539389 Mon Sep 17 00:00:00 2001 From: jferroal Date: Tue, 18 Oct 2022 08:57:27 +0800 Subject: [PATCH 128/252] Doc: Add zh-CN translation getting-started/example-datasets/menus.mdx --- .../example-datasets/menus.mdx | 355 +++++++++++++++++- 1 file changed, 349 insertions(+), 6 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/menus.mdx b/docs/zh/getting-started/example-datasets/menus.mdx index 250b8a4cd37..07452062d0b 100644 --- a/docs/zh/getting-started/example-datasets/menus.mdx +++ b/docs/zh/getting-started/example-datasets/menus.mdx @@ -1,9 +1,352 @@ ---- -slug: /zh/getting-started/example-datasets/menus -sidebar_label: New York Public Library "What's on the Menu?" Dataset -title: "New York Public Library \"What's on the Menu?\" Dataset" +--- +slug: /zh/getting-started/example-datasets/menus +sidebar_label:纽约公共图书馆“èœå•ä¸Šæœ‰ä»€ä¹ˆï¼Ÿâ€æ•°æ®é›† +title: "纽约公共图书馆“èœå•ä¸Šæœ‰ä»€ä¹ˆï¼Ÿâ€æ•°æ®é›†" --- -import Content from '@site/docs/en/getting-started/example-datasets/menus.md'; +该数æ®é›†ç”±çº½çº¦å…¬å…±å›¾ä¹¦é¦†åˆ›å»ºã€‚其中å«æœ‰æœ‰å…³é…’店ã€é¤é¦†å’Œå’–啡馆的èœå•ä¸Šçš„èœè‚´åŠå…¶ä»·æ ¼çš„历å²æ•°æ®ã€‚ - +æ¥æºï¼šhttp://menus.nypl.org/data +æ•°æ®ä¸ºå¼€æ”¾æ•°æ®ã€‚ + +æ•°æ®æ¥è‡ªäºŽå›¾ä¹¦é¦†ä¸­çš„档案,因此å¯èƒ½ä¸å®Œæ•´ï¼Œä»¥è‡³äºŽéš¾ä»¥è¿›è¡Œç»Ÿè®¡åˆ†æžã€‚尽管如此,该数æ®é›†ä¹Ÿæ˜¯éžå¸¸æœ‰æ„æ€çš„。数æ®é›†ä¸­åªæœ‰ 130 万æ¡å…³äºŽèœå•ä¸­çš„èœè‚´çš„记录 - 这对于 ClickHouse æ¥è¯´æ˜¯ä¸€ä¸ªéžå¸¸å°çš„æ•°æ®é‡ï¼Œä½†è¿™ä»æ˜¯ä¸€ä¸ªå¾ˆå¥½çš„例å­ã€‚ + +## 下载数æ®é›† {#download-dataset} + +è¿è¡Œå‘½ä»¤ï¼š + +```bash +wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz +``` + +如果有需è¦å¯ä»¥ä½¿ç”¨ http://menus.nypl.org/data 中的最新链接。下载的大å°çº¦ä¸º 35 MB。 + +## 解压数æ®é›† {#unpack-dataset} + +```bash +tar xvf 2021_08_01_07_01_17_data.tgz +``` + +解压åŽçš„的大å°çº¦ä¸º 150 MB。 + +æ•°æ®é›†ç”±å››ä¸ªè¡¨ç»„æˆï¼š + +- `Menu` - 有关èœå•çš„ä¿¡æ¯ï¼Œå…¶ä¸­åŒ…å«ï¼šé¤åŽ…å称,看到èœå•çš„日期等 +- `Dish` - 有关èœè‚´çš„ä¿¡æ¯ï¼Œå…¶ä¸­åŒ…å«ï¼šèœè‚´å称以åŠä¸€äº›ç‰¹å¾ã€‚ +- `MenuPage` - 有关èœå•ä¸­é¡µé¢çš„ä¿¡æ¯ï¼Œæ¯ä¸ªé¡µé¢éƒ½å±žäºŽæŸä¸ª `Menu`。 +- `MenuItem` - èœå•é¡¹ã€‚æŸä¸ªèœå•é¡µé¢ä¸Šçš„èœè‚´åŠå…¶ä»·æ ¼ï¼šæŒ‡å‘ `Dish` å’Œ `MenuPage`的链接。 + +## 创建表 {#create-tables} + +使用 [Decimal](../../sql-reference/data-types/decimal.md) æ•°æ®ç±»åž‹æ¥å­˜å‚¨ä»·æ ¼ã€‚ + +```sql +CREATE TABLE dish +( + id UInt32, + name String, + description String, + menus_appeared UInt32, + times_appeared Int32, + first_appeared UInt16, + last_appeared UInt16, + lowest_price Decimal64(3), + highest_price Decimal64(3) +) ENGINE = MergeTree ORDER BY id; + +CREATE TABLE menu +( + id UInt32, + name String, + sponsor String, + event String, + venue String, + place String, + physical_description String, + occasion String, + notes String, + call_number String, + keywords String, + language String, + date String, + location String, + location_type String, + currency String, + currency_symbol String, + status String, + page_count UInt16, + dish_count UInt16 +) ENGINE = MergeTree ORDER BY id; + +CREATE TABLE menu_page +( + id UInt32, + menu_id UInt32, + page_number UInt16, + image_id String, + full_height UInt16, + full_width UInt16, + uuid UUID +) ENGINE = MergeTree ORDER BY id; + +CREATE TABLE menu_item +( + id UInt32, + menu_page_id UInt32, + price Decimal64(3), + high_price Decimal64(3), + dish_id UInt32, + created_at DateTime, + updated_at DateTime, + xpos Float64, + ypos Float64 +) ENGINE = MergeTree ORDER BY id; +``` + +## å¯¼å…¥æ•°æ® {#import-data} + +执行以下命令将数æ®å¯¼å…¥ ClickHouse: + +```bash +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO dish FORMAT CSVWithNames" < Dish.csv +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu FORMAT CSVWithNames" < Menu.csv +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu_page FORMAT CSVWithNames" < MenuPage.csv +clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --date_time_input_format best_effort --query "INSERT INTO menu_item FORMAT CSVWithNames" < MenuItem.csv +``` + +因为数æ®ç”±å¸¦æœ‰æ ‡é¢˜çš„ CSV 表示,所以使用 [CSVWithNames](../../interfaces/formats.md#csvwithnames) æ ¼å¼ã€‚ + +因为åªæœ‰åŒå¼•å·ç”¨äºŽæ•°æ®å­—段,å•å¼•å·å¯ä»¥åœ¨å€¼å†…,所以ç¦ç”¨äº† `format_csv_allow_single_quotes` 以é¿å…æ··æ·† CSV 解æžå™¨ã€‚ + +因为数æ®ä¸­æ²¡æœ‰ [NULL](../../sql-å‚考/syntax.md#null-literal) 值,所以ç¦ç”¨ [input_format_null_as_default](../../operations/settings/settings.md#settings-input-format-null-as-default)。ä¸ç„¶ ClickHouse 将会å°è¯•è§£æž `\N` åºåˆ—,并å¯èƒ½ä¸Žæ•°æ®ä¸­çš„ `\` 混淆。 + +设置 [date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format) 以便解æžå„ç§æ ¼å¼çš„ [DateTime](../../sql-reference/data-types/datetime.md)字段。例如,识别åƒâ€œ2000-01-01 01:02â€è¿™æ ·æ²¡æœ‰ç§’æ•°çš„ ISO-8601 时间字符串。如果没有此设置,则仅å…许使用固定的 DateTime æ ¼å¼ã€‚ + +## éžè§„èŒƒåŒ–æ•°æ® {#denormalize-data} + +æ•°æ®ä»¥ [规范化形å¼] (https://en.wikipedia.org/wiki/Database_normalization#Normal_forms) 在多个表格中呈现。这æ„味ç€å¦‚果你想进行如查询èœå•é¡¹ä¸­çš„èœå这类的查询,则必须执行 [JOIN](../../sql-reference/statements/select/join.md#select-join)。在典型的分æžä»»åŠ¡ä¸­ï¼Œé¢„先处ç†è”接的数æ®ä»¥é¿å…æ¯æ¬¡éƒ½æ‰§è¡Œâ€œè”接â€ä¼šæ›´æœ‰æ•ˆçŽ‡ã€‚这中æ“作被称为“éžè§„范化â€æ•°æ®ã€‚ + +我们将创建一个表“menu_item_denormâ€ï¼Œå…¶ä¸­å°†åŒ…å«æ‰€æœ‰è”接在一起的数æ®ï¼š + +```sql +CREATE TABLE menu_item_denorm +ENGINE = MergeTree ORDER BY (dish_name, created_at) +AS SELECT + price, + high_price, + created_at, + updated_at, + xpos, + ypos, + dish.id AS dish_id, + dish.name AS dish_name, + dish.description AS dish_description, + dish.menus_appeared AS dish_menus_appeared, + dish.times_appeared AS dish_times_appeared, + dish.first_appeared AS dish_first_appeared, + dish.last_appeared AS dish_last_appeared, + dish.lowest_price AS dish_lowest_price, + dish.highest_price AS dish_highest_price, + menu.id AS menu_id, + menu.name AS menu_name, + menu.sponsor AS menu_sponsor, + menu.event AS menu_event, + menu.venue AS menu_venue, + menu.place AS menu_place, + menu.physical_description AS menu_physical_description, + menu.occasion AS menu_occasion, + menu.notes AS menu_notes, + menu.call_number AS menu_call_number, + menu.keywords AS menu_keywords, + menu.language AS menu_language, + menu.date AS menu_date, + menu.location AS menu_location, + menu.location_type AS menu_location_type, + menu.currency AS menu_currency, + menu.currency_symbol AS menu_currency_symbol, + menu.status AS menu_status, + menu.page_count AS menu_page_count, + menu.dish_count AS menu_dish_count +FROM menu_item + JOIN dish ON menu_item.dish_id = dish.id + JOIN menu_page ON menu_item.menu_page_id = menu_page.id + JOIN menu ON menu_page.menu_id = menu.id; +``` + +## 验è¯æ•°æ® {#validate-data} + +请求: + +```sql +SELECT count() FROM menu_item_denorm; +``` + +结果: + +```text +┌─count()─┠+│ 1329175 │ +└─────────┘ +``` + +## è¿è¡Œä¸€äº›æŸ¥è¯¢ {#run-queries} + +### èœå“çš„å¹³å‡åŽ†å²ä»·æ ¼ {#query-averaged-historical-prices} + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 100, 100) +FROM menu_item_denorm +WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022) +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 100, 100)─┠+│ 1850 │ 618 │ 1.5 │ █■│ +│ 1860 │ 1634 │ 1.29 │ █▎ │ +│ 1870 │ 2215 │ 1.36 │ █▎ │ +│ 1880 │ 3909 │ 1.01 │ â–ˆ │ +│ 1890 │ 8837 │ 1.4 │ █■│ +│ 1900 │ 176292 │ 0.68 │ â–‹ │ +│ 1910 │ 212196 │ 0.88 │ â–Š │ +│ 1920 │ 179590 │ 0.74 │ â–‹ │ +│ 1930 │ 73707 │ 0.6 │ â–Œ │ +│ 1940 │ 58795 │ 0.57 │ â–Œ │ +│ 1950 │ 41407 │ 0.95 │ â–Š │ +│ 1960 │ 51179 │ 1.32 │ █▎ │ +│ 1970 │ 12914 │ 1.86 │ █▋ │ +│ 1980 │ 7268 │ 4.35 │ ████▎ │ +│ 1990 │ 11055 │ 6.03 │ ██████ │ +│ 2000 │ 2467 │ 11.85 │ ███████████▋ │ +│ 2010 │ 597 │ 25.66 │ █████████████████████████▋ │ +└──────┴─────────┴──────────────────────┴──────────────────────────────┘ +``` + +带上一粒ç›ã€‚ + +### 汉堡价格 {#query-burger-prices} + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 50, 100) +FROM menu_item_denorm +WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%burger%') +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)───────────┠+│ 1880 │ 2 │ 0.42 │ â–‹ │ +│ 1890 │ 7 │ 0.85 │ █▋ │ +│ 1900 │ 399 │ 0.49 │ â–Š │ +│ 1910 │ 589 │ 0.68 │ █▎ │ +│ 1920 │ 280 │ 0.56 │ â–ˆ │ +│ 1930 │ 74 │ 0.42 │ â–‹ │ +│ 1940 │ 119 │ 0.59 │ █■│ +│ 1950 │ 134 │ 1.09 │ ██■│ +│ 1960 │ 272 │ 0.92 │ █▋ │ +│ 1970 │ 108 │ 1.18 │ ██▎ │ +│ 1980 │ 88 │ 2.82 │ █████▋ │ +│ 1990 │ 184 │ 3.68 │ ███████▎ │ +│ 2000 │ 21 │ 7.14 │ ██████████████▎ │ +│ 2010 │ 6 │ 18.42 │ ████████████████████████████████████▋ │ +└──────┴─────────┴──────────────────────┴───────────────────────────────────────┘ +``` + +###ä¼ç‰¹åŠ {#query-vodka} + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 50, 100) +FROM menu_item_denorm +WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%vodka%') +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)─┠+│ 1910 │ 2 │ 0 │ │ +│ 1920 │ 1 │ 0.3 │ â–Œ │ +│ 1940 │ 21 │ 0.42 │ â–‹ │ +│ 1950 │ 14 │ 0.59 │ █■│ +│ 1960 │ 113 │ 2.17 │ ████▎ │ +│ 1970 │ 37 │ 0.68 │ █▎ │ +│ 1980 │ 19 │ 2.55 │ █████ │ +│ 1990 │ 86 │ 3.6 │ ███████■│ +│ 2000 │ 2 │ 3.98 │ ███████▊ │ +└──────┴─────────┴──────────────────────┴─────────────────────────────┘ +``` + +è¦æŸ¥è¯¢ `Vodka`,必须声明通过 `ILIKE '%vodka%'` 进行查询。 + +### é±¼å­é…± {#query-caviar} + +列出鱼å­é…±çš„价格。å¦å¤–,列出任何带有鱼å­é…±çš„èœè‚´çš„å称。 + +请求: + +```sql +SELECT + round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d, + count(), + round(avg(price), 2), + bar(avg(price), 0, 50, 100), + any(dish_name) +FROM menu_item_denorm +WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%caviar%') +GROUP BY d +ORDER BY d ASC; +``` + +结果: + +```text +┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)──────┬─any(dish_name)──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┠+│ 1090 │ 1 │ 0 │ │ Caviar │ +│ 1880 │ 3 │ 0 │ │ Caviar │ +│ 1890 │ 39 │ 0.59 │ █■│ Butter and caviar │ +│ 1900 │ 1014 │ 0.34 │ â–‹ │ Anchovy Caviar on Toast │ +│ 1910 │ 1588 │ 1.35 │ ██▋ │ 1/1 Brötchen Caviar │ +│ 1920 │ 927 │ 1.37 │ ██▋ │ ASTRAKAN CAVIAR │ +│ 1930 │ 289 │ 1.91 │ ███▋ │ Astrachan caviar │ +│ 1940 │ 201 │ 0.83 │ █▋ │ (SPECIAL) Domestic Caviar Sandwich │ +│ 1950 │ 81 │ 2.27 │ ████▌ │ Beluga Caviar │ +│ 1960 │ 126 │ 2.21 │ ████■│ Beluga Caviar │ +│ 1970 │ 105 │ 0.95 │ █▊ │ BELUGA MALOSSOL CAVIAR AMERICAN DRESSING │ +│ 1980 │ 12 │ 7.22 │ ██████████████■│ Authentic Iranian Beluga Caviar the world's finest black caviar presented in ice garni and a sampling of chilled 100° Russian vodka │ +│ 1990 │ 74 │ 14.42 │ ████████████████████████████▋ │ Avocado Salad, Fresh cut avocado with caviare │ +│ 2000 │ 3 │ 7.82 │ ███████████████▋ │ Aufgeschlagenes Kartoffelsueppchen mit Forellencaviar │ +│ 2010 │ 6 │ 15.58 │ ███████████████████████████████■│ "OYSTERS AND PEARLS" "Sabayon" of Pearl Tapioca with Island Creek Oysters and Russian Sevruga Caviar │ +└──────┴─────────┴──────────────────────┴──────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +至少他们有ä¼ç‰¹åŠ é…é±¼å­é…±ã€‚真棒。 + +## 在线 Playground{#playground} + +此数æ®é›†å·²ç»ä¸Šä¼ åˆ°äº† ClickHouse Playground 中,[example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==)。 From f94f491679aaa6ea0f0d2625abeba197a3dce041 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Tue, 18 Oct 2022 05:05:32 +0000 Subject: [PATCH 129/252] impl max_cache_download_limit --- src/Interpreters/Cache/FileCache.cpp | 121 ++++++++++--------- src/Interpreters/Cache/FileCache.h | 3 + src/Interpreters/Cache/FileCacheSettings.cpp | 2 + src/Interpreters/Cache/FileCacheSettings.h | 3 + src/Interpreters/Cache/FileCache_fwd.h | 1 + 5 files changed, 76 insertions(+), 54 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 20a9f6cce1d..8da0c68b19e 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -32,6 +32,8 @@ FileCache::FileCache( , allow_persistent_files(cache_settings_.do_not_evict_index_and_mark_files) , enable_cache_hits_threshold(cache_settings_.enable_cache_hits_threshold) , enable_filesystem_query_cache_limit(cache_settings_.enable_filesystem_query_cache_limit) + , enable_limit_download_cache_size(cache_settings_.enable_limit_download_cache_size) + , max_enable_download_cache_size(cache_settings_.max_enable_download_cache_size) , log(&Poco::Logger::get("FileCache")) , main_priority(std::make_unique()) , stash_priority(std::make_unique()) @@ -185,70 +187,83 @@ FileSegments FileCache::getImpl( /// Given range = [left, right] and non-overlapping ordered set of file segments, /// find list [segment1, ..., segmentN] of segments which intersect with given range. - auto it = files.find(key); - if (it == files.end()) - return {}; - - const auto & file_segments = it->second; - if (file_segments.empty()) - { - files.erase(key); - removeKeyDirectoryIfExists(key, cache_lock); - return {}; - } - FileSegments result; - auto segment_it = file_segments.lower_bound(range.left); - if (segment_it == file_segments.end()) - { - /// N - last cached segment for given file key, segment{N}.offset < range.left: - /// segment{N} segment{N} - /// [________ [_______] - /// [__________] OR [________] - /// ^ ^ - /// range.left range.left - const auto & cell = file_segments.rbegin()->second; - if (cell.file_segment->range().right < range.left) + if (enable_limit_download_cache_size && (range.size() > max_enable_download_cache_size)) + { + auto file_segment = std::make_shared( + range.left, range.size(), key, this, FileSegment::State::SKIP_CACHE, CreateFileSegmentSettings{}); + { + std::unique_lock segment_lock(file_segment->mutex); + file_segment->detachAssumeStateFinalized(segment_lock); + } + result.emplace_back(file_segment); + } + else + { + auto it = files.find(key); + if (it == files.end()) return {}; - useCell(cell, result, cache_lock); - } - else /// segment_it <-- segmment{k} - { - if (segment_it != file_segments.begin()) + const auto & file_segments = it->second; + if (file_segments.empty()) { - const auto & prev_cell = std::prev(segment_it)->second; - const auto & prev_cell_range = prev_cell.file_segment->range(); - - if (range.left <= prev_cell_range.right) - { - /// segment{k-1} segment{k} - /// [________] [_____ - /// [___________ - /// ^ - /// range.left - useCell(prev_cell, result, cache_lock); - } + files.erase(key); + removeKeyDirectoryIfExists(key, cache_lock); + return {}; } - /// segment{k} ... segment{k-1} segment{k} segment{k} - /// [______ [______] [____ [________ - /// [_________ OR [________ OR [______] ^ - /// ^ ^ ^ segment{k}.offset - /// range.left range.left range.right - - while (segment_it != file_segments.end()) + auto segment_it = file_segments.lower_bound(range.left); + if (segment_it == file_segments.end()) { - const auto & cell = segment_it->second; - if (range.right < cell.file_segment->range().left) - break; + /// N - last cached segment for given file key, segment{N}.offset < range.left: + /// segment{N} segment{N} + /// [________ [_______] + /// [__________] OR [________] + /// ^ ^ + /// range.left range.left + + const auto & cell = file_segments.rbegin()->second; + if (cell.file_segment->range().right < range.left) + return {}; useCell(cell, result, cache_lock); - ++segment_it; + } + else /// segment_it <-- segmment{k} + { + if (segment_it != file_segments.begin()) + { + const auto & prev_cell = std::prev(segment_it)->second; + const auto & prev_cell_range = prev_cell.file_segment->range(); + + if (range.left <= prev_cell_range.right) + { + /// segment{k-1} segment{k} + /// [________] [_____ + /// [___________ + /// ^ + /// range.left + useCell(prev_cell, result, cache_lock); + } + } + + /// segment{k} ... segment{k-1} segment{k} segment{k} + /// [______ [______] [____ [________ + /// [_________ OR [________ OR [______] ^ + /// ^ ^ ^ segment{k}.offset + /// range.left range.left range.right + + while (segment_it != file_segments.end()) + { + const auto & cell = segment_it->second; + if (range.right < cell.file_segment->range().left) + break; + + useCell(cell, result, cache_lock); + ++segment_it; + } } } - return result; } @@ -392,7 +407,6 @@ FileSegmentsHolder FileCache::getOrSet(const Key & key, size_t offset, size_t si #endif FileSegment::Range range(offset, offset + size - 1); - /// Get all segments which intersect with the given range. auto file_segments = getImpl(key, range, cache_lock); @@ -404,7 +418,6 @@ FileSegmentsHolder FileCache::getOrSet(const Key & key, size_t offset, size_t si { fillHolesWithEmptyFileSegments(file_segments, key, range, /* fill_with_detached */false, settings, cache_lock); } - assert(!file_segments.empty()); return FileSegmentsHolder(std::move(file_segments)); } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 07aea230803..fb31459d062 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -140,6 +140,9 @@ private: const size_t enable_cache_hits_threshold; const bool enable_filesystem_query_cache_limit; + const bool enable_limit_download_cache_size; + const size_t max_enable_download_cache_size; + mutable std::mutex mutex; Poco::Logger * log; diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 4b8d806bb53..8dcdbf40698 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -34,6 +34,8 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false); enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false); enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD); + enable_limit_download_cache_size = config.getUInt64(config_prefix + ".enable_limit_download_cache_size", false); + max_enable_download_cache_size = config.getUInt64(config_prefix + ".max_enable_download_cache_size", REMOTE_FS_OBJECTS_CACHE_MAX_ENABLE_DOWNLOAD_SIZE); do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", false); } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index c6155edad85..429480e955d 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -20,6 +20,9 @@ struct FileCacheSettings bool do_not_evict_index_and_mark_files = true; + bool enable_limit_download_cache_size = false; + size_t max_enable_download_cache_size = REMOTE_FS_OBJECTS_CACHE_MAX_ENABLE_DOWNLOAD_SIZE; + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 25c16b4e840..de8dde91991 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -7,6 +7,7 @@ namespace DB static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0; +static constexpr size_t REMOTE_FS_OBJECTS_CACHE_MAX_ENABLE_DOWNLOAD_SIZE = 256 * 1024 * 1024;; class FileCache; using FileCachePtr = std::shared_ptr; From 6d32149fa64f882dd8a3218855757146b2571cb3 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 18 Oct 2022 11:18:01 +0800 Subject: [PATCH 130/252] Correct implementation of agg state comparison --- src/AggregateFunctions/IAggregateFunction.cpp | 15 +++++++-------- .../02456_aggregate_state_conversion.reference | 1 + .../02456_aggregate_state_conversion.sql | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/02456_aggregate_state_conversion.reference create mode 100644 tests/queries/0_stateless/02456_aggregate_state_conversion.sql diff --git a/src/AggregateFunctions/IAggregateFunction.cpp b/src/AggregateFunctions/IAggregateFunction.cpp index 25d2a9a4530..7da341cc5b9 100644 --- a/src/AggregateFunctions/IAggregateFunction.cpp +++ b/src/AggregateFunctions/IAggregateFunction.cpp @@ -53,9 +53,12 @@ String IAggregateFunction::getDescription() const bool IAggregateFunction::haveEqualArgumentTypes(const IAggregateFunction & rhs) const { - return std::equal(argument_types.begin(), argument_types.end(), - rhs.argument_types.begin(), rhs.argument_types.end(), - [](const auto & t1, const auto & t2) { return t1->equals(*t2); }); + return std::equal( + argument_types.begin(), + argument_types.end(), + rhs.argument_types.begin(), + rhs.argument_types.end(), + [](const auto & t1, const auto & t2) { return t1->equals(*t2); }); } bool IAggregateFunction::haveSameStateRepresentation(const IAggregateFunction & rhs) const @@ -67,11 +70,7 @@ bool IAggregateFunction::haveSameStateRepresentation(const IAggregateFunction & bool IAggregateFunction::haveSameStateRepresentationImpl(const IAggregateFunction & rhs) const { - bool res = getName() == rhs.getName() - && parameters == rhs.parameters - && haveEqualArgumentTypes(rhs); - assert(res == (getStateType()->getName() == rhs.getStateType()->getName())); - return res; + return getStateType()->equals(*rhs.getStateType()); } } diff --git a/tests/queries/0_stateless/02456_aggregate_state_conversion.reference b/tests/queries/0_stateless/02456_aggregate_state_conversion.reference new file mode 100644 index 00000000000..abf55dde8a7 --- /dev/null +++ b/tests/queries/0_stateless/02456_aggregate_state_conversion.reference @@ -0,0 +1 @@ +1027000000000000000000000000000000000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02456_aggregate_state_conversion.sql b/tests/queries/0_stateless/02456_aggregate_state_conversion.sql new file mode 100644 index 00000000000..3c05c59de59 --- /dev/null +++ b/tests/queries/0_stateless/02456_aggregate_state_conversion.sql @@ -0,0 +1 @@ +SELECT hex(CAST(x, 'AggregateFunction(sum, Decimal(50, 10))')) FROM (SELECT arrayReduce('sumState', [toDecimal256('0.0000010.000001', 10)]) AS x) GROUP BY x; From d9fcd9187562da795c4ddb3f9fcddc493fcc8c5a Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 18 Oct 2022 09:07:12 +0000 Subject: [PATCH 131/252] Add toStableRelativeHourNum --- src/Common/DateLUTImpl.h | 13 +++++++++++++ src/Functions/dateDiff.cpp | 8 ++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index ec414c221cb..e6913e7450f 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -895,6 +895,19 @@ public: return toRelativeHourNum(lut[toLUTIndex(v)].date); } + /// The same formula is used for positive time (after Unix epoch) and negative time (before Unix epoch). + /// It’s needed for correct work of dateDiff function. + inline Time toStableRelativeHourNum(Time t) const + { + return (t + DATE_LUT_ADD + 86400 - offset_at_start_of_epoch) / 3600 - (DATE_LUT_ADD / 3600); + } + + template + inline Time toStableRelativeHourNum(DateOrTime v) const + { + return toStableRelativeHourNum(lut[toLUTIndex(v)].date); + } + inline Time toRelativeMinuteNum(Time t) const /// NOLINT { return (t + DATE_LUT_ADD) / 60 - (DATE_LUT_ADD / 60); diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 773340a7758..18178b68165 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -159,19 +159,19 @@ struct ToExtendedRelativeHourNumImpl static inline Int64 execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(t); + return time_zone.toStableRelativeHourNum(t); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(static_cast(t)); + return time_zone.toStableRelativeHourNum(static_cast(t)); } static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(ExtendedDayNum(d)); + return time_zone.toStableRelativeHourNum(ExtendedDayNum(d)); } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(DayNum(d)); + return time_zone.toStableRelativeHourNum(DayNum(d)); } using FactorTransform = ZeroTransform; From 1275c464aac247503493e726155d7b0d07e0c6a8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 18 Oct 2022 10:44:22 +0000 Subject: [PATCH 132/252] Fix unit test build --- src/Coordination/tests/gtest_coordination.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 5bb1ecc7c85..dbd9a874067 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -3,6 +3,7 @@ #include "Common/ZooKeeper/IKeeper.h" #include "Coordination/KeeperContext.h" +#include "Coordination/KeeperSnapshotManagerS3.h" #include "Coordination/KeeperStorage.h" #include "Core/Defines.h" #include "IO/WriteHelpers.h" @@ -1318,7 +1319,8 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context); + KeeperSnapshotManagerS3 snapshot_manager_s3; + auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, false, snapshot_manager_s3); state_machine->init(); DB::KeeperLogStore changelog("./logs", settings->rotate_log_storage_interval, true, enable_compression); changelog.init(state_machine->last_commit_index() + 1, settings->reserved_log_items); @@ -1359,7 +1361,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint } SnapshotsQueue snapshots_queue1{1}; - auto restore_machine = std::make_shared(queue, snapshots_queue1, "./snapshots", settings, keeper_context); + auto restore_machine = std::make_shared(queue, snapshots_queue1, "./snapshots", settings, keeper_context, false, snapshot_manager_s3); restore_machine->init(); EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance); @@ -1471,7 +1473,8 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context); + KeeperSnapshotManagerS3 snapshot_manager_s3; + auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, false, snapshot_manager_s3); state_machine->init(); std::shared_ptr request_c = std::make_shared(); From 540728ab9653a11aae050fef3b128ba588956027 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 18 Oct 2022 10:52:57 +0000 Subject: [PATCH 133/252] Simplify logic a bit --- src/Coordination/KeeperServer.cpp | 3 +-- src/Coordination/KeeperStateMachine.cpp | 8 +++----- src/Coordination/KeeperStateMachine.h | 6 ++---- src/Coordination/tests/gtest_coordination.cpp | 9 +++------ utils/keeper-data-dumper/main.cpp | 2 +- 5 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 40c96f12da1..1c8959379da 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -127,8 +127,7 @@ KeeperServer::KeeperServer( configuration_and_settings_->snapshot_storage_path, coordination_settings, keeper_context, - config.getBool("keeper_server.upload_snapshot_on_exit", true), - snapshot_manager_s3, + config.getBool("keeper_server.upload_snapshot_on_exit", true) ? &snapshot_manager_s3 : nullptr, checkAndGetSuperdigest(configuration_and_settings_->super_digest)); state_manager = nuraft::cs_new( diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 3b94e41ea17..ee5bfa48357 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -44,8 +44,7 @@ KeeperStateMachine::KeeperStateMachine( const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, - const bool upload_snapshot_on_exit_, - KeeperSnapshotManagerS3 & snapshot_manager_s3_, + KeeperSnapshotManagerS3 * snapshot_manager_s3_, const std::string & superdigest_) : coordination_settings(coordination_settings_) , snapshot_manager( @@ -61,7 +60,6 @@ KeeperStateMachine::KeeperStateMachine( , log(&Poco::Logger::get("KeeperStateMachine")) , superdigest(superdigest_) , keeper_context(keeper_context_) - , upload_snapshot_on_exit(upload_snapshot_on_exit_) , snapshot_manager_s3(snapshot_manager_s3_) { } @@ -414,10 +412,10 @@ void KeeperStateMachine::create_snapshot(nuraft::snapshot & s, nuraft::async_res LOG_INFO(log, "Creating a snapshot during shutdown because 'create_snapshot_on_exit' is enabled."); auto snapshot_path = snapshot_task.create_snapshot(std::move(snapshot_task.snapshot)); - if (!snapshot_path.empty() && upload_snapshot_on_exit) + if (!snapshot_path.empty() && snapshot_manager_s3) { LOG_INFO(log, "Uploading snapshot {} during shutdown because 'upload_snapshot_on_exit' is enabled.", snapshot_path); - snapshot_manager_s3.uploadSnapshot(snapshot_path, /* asnyc_upload */ false); + snapshot_manager_s3->uploadSnapshot(snapshot_path, /* asnyc_upload */ false); } return; diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index 3802921f7ab..ffc7fce1cfe 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -28,8 +28,7 @@ public: const std::string & snapshots_path_, const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, - bool upload_snapshot_on_exit_, - KeeperSnapshotManagerS3 & snapshot_manager_s3_, + KeeperSnapshotManagerS3 * snapshot_manager_s3_, const std::string & superdigest_ = ""); /// Read state from the latest snapshot @@ -151,8 +150,7 @@ private: KeeperContextPtr keeper_context; - const bool upload_snapshot_on_exit; - KeeperSnapshotManagerS3 & snapshot_manager_s3; + KeeperSnapshotManagerS3 * snapshot_manager_s3; }; } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index dbd9a874067..b1d27d4541d 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -3,7 +3,6 @@ #include "Common/ZooKeeper/IKeeper.h" #include "Coordination/KeeperContext.h" -#include "Coordination/KeeperSnapshotManagerS3.h" #include "Coordination/KeeperStorage.h" #include "Core/Defines.h" #include "IO/WriteHelpers.h" @@ -1319,8 +1318,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - KeeperSnapshotManagerS3 snapshot_manager_s3; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, false, snapshot_manager_s3); + auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); state_machine->init(); DB::KeeperLogStore changelog("./logs", settings->rotate_log_storage_interval, true, enable_compression); changelog.init(state_machine->last_commit_index() + 1, settings->reserved_log_items); @@ -1361,7 +1359,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint } SnapshotsQueue snapshots_queue1{1}; - auto restore_machine = std::make_shared(queue, snapshots_queue1, "./snapshots", settings, keeper_context, false, snapshot_manager_s3); + auto restore_machine = std::make_shared(queue, snapshots_queue1, "./snapshots", settings, keeper_context, nullptr); restore_machine->init(); EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance); @@ -1473,8 +1471,7 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - KeeperSnapshotManagerS3 snapshot_manager_s3; - auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, false, snapshot_manager_s3); + auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); state_machine->init(); std::shared_ptr request_c = std::make_shared(); diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 0762c740ac1..dd3c3a4e2ad 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -63,7 +63,7 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(); - auto state_machine = std::make_shared(queue, snapshots_queue, argv[1], settings, keeper_context); + auto state_machine = std::make_shared(queue, snapshots_queue, argv[1], settings, keeper_context, nullptr); state_machine->init(); size_t last_commited_index = state_machine->last_commit_index(); From 0404c960c6dc295eb84324686d4baa3957b28fca Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Tue, 18 Oct 2022 13:19:16 +0200 Subject: [PATCH 134/252] add -15 tools to cmake.tools --- cmake/tools.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 57d39899a40..8a17d97cf13 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -117,7 +117,7 @@ endif() # Archiver if (COMPILER_GCC) - find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-14" "llvm-ar-13" "llvm-ar-12") + find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-15" "llvm-ar-14" "llvm-ar-13" "llvm-ar-12") else () find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar") endif () @@ -131,7 +131,7 @@ message(STATUS "Using archiver: ${CMAKE_AR}") # Ranlib if (COMPILER_GCC) - find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-14" "llvm-ranlib-13" "llvm-ranlib-12") + find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-15" "llvm-ranlib-14" "llvm-ranlib-13" "llvm-ranlib-12") else () find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib") endif () @@ -145,7 +145,7 @@ message(STATUS "Using ranlib: ${CMAKE_RANLIB}") # Install Name Tool if (COMPILER_GCC) - find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool" "llvm-install-name-tool-14" "llvm-install-name-tool-13" "llvm-install-name-tool-12") + find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool" "llvm-install-name-tool-15" "llvm-install-name-tool-14" "llvm-install-name-tool-13" "llvm-install-name-tool-12") else () find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool") endif () @@ -159,7 +159,7 @@ message(STATUS "Using install-name-tool: ${CMAKE_INSTALL_NAME_TOOL}") # Objcopy if (COMPILER_GCC) - find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-14" "llvm-objcopy-13" "llvm-objcopy-12" "objcopy") + find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-15" "llvm-objcopy-14" "llvm-objcopy-13" "llvm-objcopy-12" "objcopy") else () find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy") endif () @@ -173,7 +173,7 @@ endif () # Strip if (COMPILER_GCC) - find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-14" "llvm-strip-13" "llvm-strip-12" "strip") + find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-15" "llvm-strip-14" "llvm-strip-13" "llvm-strip-12" "strip") else () find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () From 45c6974f7d48fd4ff269e78f42343fc829257f45 Mon Sep 17 00:00:00 2001 From: Tian Xinhui Date: Tue, 18 Oct 2022 19:35:34 +0800 Subject: [PATCH 135/252] Use alias for std::shared_ptr (#42211) * use alias for MergeMutateSelectedEntry share ptr * fix StorageMergeTree.cpp --- src/Storages/MergeTree/MergePlainMergeTreeTask.h | 2 +- src/Storages/StorageMergeTree.cpp | 8 ++++---- src/Storages/StorageMergeTree.h | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index c08853f8e1b..d84db36bac2 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -66,7 +66,7 @@ private: StorageMetadataPtr metadata_snapshot; bool deduplicate; Names deduplicate_by_columns; - std::shared_ptr merge_mutate_entry{nullptr}; + MergeMutateSelectedEntryPtr merge_mutate_entry{nullptr}; TableLockHolder table_lock_holder; FutureMergedMutatedPartPtr future_part{nullptr}; MergeTreeData::MutableDataPartPtr new_part; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index aea853b6c39..a65af1cf69e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -798,7 +798,7 @@ void StorageMergeTree::loadMutations() increment.value = std::max(increment.value.load(), current_mutations_by_version.rbegin()->first); } -std::shared_ptr StorageMergeTree::selectPartsToMerge( +MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( const StorageMetadataPtr & metadata_snapshot, bool aggressive, const String & partition_id, @@ -943,7 +943,7 @@ bool StorageMergeTree::merge( SelectPartsDecision select_decision; - std::shared_ptr merge_mutate_entry; + MergeMutateSelectedEntryPtr merge_mutate_entry; { std::unique_lock lock(currently_processing_in_background_mutex); @@ -989,7 +989,7 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p return currently_merging_mutating_parts.contains(part); } -std::shared_ptr StorageMergeTree::selectPartsToMutate( +MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( const StorageMetadataPtr & metadata_snapshot, String * /* disable_reason */, TableLockHolder & /* table_lock_holder */, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) { @@ -1132,7 +1132,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign assert(!isStaticStorage()); auto metadata_snapshot = getInMemoryMetadataPtr(); - std::shared_ptr merge_entry, mutate_entry; + MergeMutateSelectedEntryPtr merge_entry, mutate_entry; auto share_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index b36850f9f4a..ea2527e44a7 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -187,7 +187,7 @@ private: friend struct CurrentlyMergingPartsTagger; - std::shared_ptr selectPartsToMerge( + MergeMutateSelectedEntryPtr selectPartsToMerge( const StorageMetadataPtr & metadata_snapshot, bool aggressive, const String & partition_id, @@ -200,7 +200,7 @@ private: SelectPartsDecision * select_decision_out = nullptr); - std::shared_ptr selectPartsToMutate( + MergeMutateSelectedEntryPtr selectPartsToMutate( const StorageMetadataPtr & metadata_snapshot, String * disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & currently_processing_in_background_mutex_lock); From 2ac5540d0e4fe1aec65aeb39a50552b6bec053ac Mon Sep 17 00:00:00 2001 From: Alfred Xu Date: Tue, 18 Oct 2022 20:33:31 +0800 Subject: [PATCH 136/252] Fixed a logical error in case of using `runningDifference` with `Date32` type (#42143) --- src/Functions/runningDifference.h | 2 +- .../00653_running_difference.reference | 24 +++++++++++++++++++ .../0_stateless/00653_running_difference.sql | 7 +++++- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index f3caf245d08..053d7cb9736 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -117,7 +117,7 @@ private: else if (which.isDate()) f(DataTypeDate::FieldType()); else if (which.isDate32()) - f(DataTypeDate::FieldType()); + f(DataTypeDate32::FieldType()); else if (which.isDateTime()) f(DataTypeDateTime::FieldType()); else diff --git a/tests/queries/0_stateless/00653_running_difference.reference b/tests/queries/0_stateless/00653_running_difference.reference index 7511beb2418..e2833e0bb3e 100644 --- a/tests/queries/0_stateless/00653_running_difference.reference +++ b/tests/queries/0_stateless/00653_running_difference.reference @@ -19,3 +19,27 @@ \N \N 2 +--Date Difference-- +\N +\N +0 +364 +\N +\N +14466 +- +\N +\N +0 +11101 +22017 +\N +105432 +- +\N +\N +0 +3149094509 +\N +\N +1130059331 diff --git a/tests/queries/0_stateless/00653_running_difference.sql b/tests/queries/0_stateless/00653_running_difference.sql index fd4dfb219fd..f2b4a7300b2 100644 --- a/tests/queries/0_stateless/00653_running_difference.sql +++ b/tests/queries/0_stateless/00653_running_difference.sql @@ -5,4 +5,9 @@ select '-'; select runningDifference(x) from (select arrayJoin([Null, 1]) as x); select '-'; select runningDifference(x) from (select arrayJoin([Null, Null, 1, 3, Null, Null, 5]) as x); - +select '--Date Difference--'; +select runningDifference(x) from (select arrayJoin([Null, Null, toDate('1970-1-1'), toDate('1970-12-31'), Null, Null, toDate('2010-8-9')]) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([Null, Null, toDate32('1900-1-1'), toDate32('1930-5-25'), toDate('1990-9-4'), Null, toDate32('2279-5-4')]) as x); +select '-'; +select runningDifference(x) from (select arrayJoin([Null, Null, toDateTime('1970-06-28 23:48:12', 'Asia/Istanbul'), toDateTime('2070-04-12 21:16:41', 'Asia/Istanbul'), Null, Null, toDateTime('2106-02-03 06:38:52', 'Asia/Istanbul')]) as x); From 8755f94548026db2c028e399f29cd1964024ca0d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 18 Oct 2022 14:57:56 +0200 Subject: [PATCH 137/252] Added aggregate function `analysisOfVariance` (`anova`). Merging #37872 (#42131) --- .../AggregateFunctionAnalysisOfVariance.cpp | 38 ++++++ .../AggregateFunctionAnalysisOfVariance.h | 98 ++++++++++++++ src/AggregateFunctions/Moments.h | 126 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 2 + .../0_stateless/02294_anova_cmp.python | 86 ++++++++++++ .../0_stateless/02294_anova_cmp.reference | 1 + tests/queries/0_stateless/02294_anova_cmp.sh | 9 ++ 7 files changed, 360 insertions(+) create mode 100644 src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp create mode 100644 src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h create mode 100644 tests/queries/0_stateless/02294_anova_cmp.python create mode 100644 tests/queries/0_stateless/02294_anova_cmp.reference create mode 100755 tests/queries/0_stateless/02294_anova_cmp.sh diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp new file mode 100644 index 00000000000..ffb651b3288 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionAnalysisOfVariance(const std::string & name, const DataTypes & arguments, const Array & parameters, const Settings *) +{ + assertNoParameters(name, parameters); + assertBinary(name, arguments); + + if (!isNumber(arguments[0]) || !isNumber(arguments[1])) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} only supports numerical types", name); + + return std::make_shared(arguments, parameters); +} + +} + +void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory & factory) +{ + AggregateFunctionProperties properties = { .is_order_dependent = false }; + factory.registerFunction("analysisOfVariance", {createAggregateFunctionAnalysisOfVariance, properties}, AggregateFunctionFactory::CaseInsensitive); + + /// This is widely used term + factory.registerAlias("anova", "analysisOfVariance", AggregateFunctionFactory::CaseInsensitive); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h new file mode 100644 index 00000000000..efb6426a96c --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionAnalysisOfVariance.h @@ -0,0 +1,98 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "Common/NaNUtils.h" +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +class AggregateFunctionAnalysisOfVarianceData final : public AnalysisOfVarianceMoments +{ +}; + + +/// One way analysis of variance +/// Provides a statistical test of whether two or more population means are equal (null hypothesis) +/// Has an assumption that subjects from group i have normal distribution. +/// Accepts two arguments - a value and a group number which this value belongs to. +/// Groups are enumerated starting from 0 and there should be at least two groups to perform a test +/// Moreover there should be at least one group with the number of observations greater than one. +class AggregateFunctionAnalysisOfVariance final : public IAggregateFunctionDataHelper +{ +public: + explicit AggregateFunctionAnalysisOfVariance(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper(arguments, params) + {} + + DataTypePtr getReturnType() const override + { + DataTypes types {std::make_shared>(), std::make_shared>() }; + Strings names {"f_statistic", "p_value"}; + return std::make_shared( + std::move(types), + std::move(names) + ); + } + + String getName() const override { return "analysisOfVariance"; } + + bool allocatesMemoryInArena() const override { return false; } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override + { + data(place).add(columns[0]->getFloat64(row_num), columns[1]->getUInt(row_num)); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + data(place).merge(data(rhs)); + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + data(place).write(buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override + { + data(place).read(buf); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + auto f_stat = data(place).getFStatistic(); + if (std::isinf(f_stat) || isNaN(f_stat)) + throw Exception("F statistic is not defined or infinite for these arguments", ErrorCodes::BAD_ARGUMENTS); + + auto p_value = data(place).getPValue(f_stat); + + /// Because p-value is a probability. + p_value = std::min(1.0, std::max(0.0, p_value)); + + auto & column_tuple = assert_cast(to); + auto & column_stat = assert_cast &>(column_tuple.getColumn(0)); + auto & column_value = assert_cast &>(column_tuple.getColumn(1)); + + column_stat.getData().push_back(f_stat); + column_value.getData().push_back(p_value); + } + +}; + +} diff --git a/src/AggregateFunctions/Moments.h b/src/AggregateFunctions/Moments.h index 45a77e9cfdb..16279cb93a4 100644 --- a/src/AggregateFunctions/Moments.h +++ b/src/AggregateFunctions/Moments.h @@ -4,7 +4,9 @@ #include #include #include +#include #include +#include namespace DB @@ -13,6 +15,7 @@ struct Settings; namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int DECIMAL_OVERFLOW; } @@ -476,4 +479,127 @@ struct ZTestMoments } }; +template +struct AnalysisOfVarianceMoments +{ + /// Sums of values within a group + std::vector xs1{}; + /// Sums of squared values within a group + std::vector xs2{}; + /// Sizes of each group. Total number of observations is just a sum of all these values + std::vector ns{}; + + void resizeIfNeeded(size_t possible_size) + { + if (xs1.size() >= possible_size) + return; + + xs1.resize(possible_size, 0.0); + xs2.resize(possible_size, 0.0); + ns.resize(possible_size, 0); + } + + void add(T value, size_t group) + { + resizeIfNeeded(group + 1); + xs1[group] += value; + xs2[group] += value * value; + ns[group] += 1; + } + + void merge(const AnalysisOfVarianceMoments & rhs) + { + resizeIfNeeded(rhs.xs1.size()); + for (size_t i = 0; i < rhs.xs1.size(); ++i) + { + xs1[i] += rhs.xs1[i]; + xs2[i] += rhs.xs2[i]; + ns[i] += rhs.ns[i]; + } + } + + void write(WriteBuffer & buf) const + { + writeVectorBinary(xs1, buf); + writeVectorBinary(xs2, buf); + writeVectorBinary(ns, buf); + } + + void read(ReadBuffer & buf) + { + readVectorBinary(xs1, buf); + readVectorBinary(xs2, buf); + readVectorBinary(ns, buf); + } + + Float64 getMeanAll() const + { + const auto n = std::accumulate(ns.begin(), ns.end(), 0UL); + if (n == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no observations to calculate mean value"); + + return std::accumulate(xs1.begin(), xs1.end(), 0.0) / n; + } + + Float64 getMeanGroup(size_t group) const + { + if (ns[group] == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no observations for group {}", group); + + return xs1[group] / ns[group]; + } + + Float64 getBetweenGroupsVariation() const + { + Float64 res = 0; + auto mean = getMeanAll(); + + for (size_t i = 0; i < xs1.size(); ++i) + { + auto group_mean = getMeanGroup(i); + res += ns[i] * (group_mean - mean) * (group_mean - mean); + } + return res; + } + + Float64 getWithinGroupsVariation() const + { + Float64 res = 0; + for (size_t i = 0; i < xs1.size(); ++i) + { + auto group_mean = getMeanGroup(i); + res += xs2[i] + ns[i] * group_mean * group_mean - 2 * group_mean * xs1[i]; + } + return res; + } + + Float64 getFStatistic() const + { + const auto k = xs1.size(); + const auto n = std::accumulate(ns.begin(), ns.end(), 0UL); + + if (k == 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There should be more than one group to calculate f-statistics"); + + if (k == n) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is only one observation in each group"); + + return (getBetweenGroupsVariation() * (n - k)) / (getWithinGroupsVariation() * (k - 1)); + } + + Float64 getPValue(Float64 f_statistic) const + { + const auto k = xs1.size(); + const auto n = std::accumulate(ns.begin(), ns.end(), 0UL); + + if (k == 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There should be more than one group to calculate f-statistics"); + + if (k == n) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is only one observation in each group"); + + return 1.0f - boost::math::cdf(boost::math::fisher_f(k - 1, n - k), f_statistic); + } +}; + } diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index 57cfbb029d4..ecf6ab51367 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -72,6 +72,7 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &); void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &); void registerAggregateFunctionSparkbar(AggregateFunctionFactory &); void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &); +void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -156,6 +157,7 @@ void registerAggregateFunctions() registerAggregateFunctionIntervalLengthSum(factory); registerAggregateFunctionExponentialMovingAverage(factory); registerAggregateFunctionSparkbar(factory); + registerAggregateFunctionAnalysisOfVariance(factory); registerWindowFunctions(factory); } diff --git a/tests/queries/0_stateless/02294_anova_cmp.python b/tests/queries/0_stateless/02294_anova_cmp.python new file mode 100644 index 00000000000..7597b3712d1 --- /dev/null +++ b/tests/queries/0_stateless/02294_anova_cmp.python @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +import os +import sys +from statistics import variance +from scipy import stats +import pandas as pd +import numpy as np + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + + +# unpooled variance z-test for means of two samples +def scipy_anova(rvs): + return stats.f_oneway(*rvs) + + +def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2): + client = ClickHouseClient() + client.query("DROP TABLE IF EXISTS anova;") + client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;") + for group in range(n_groups): + client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''') + + real = client.query_return_df( + '''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''') + + real_f_stat = real['f_stat'][0] + real_p_value = real['p_value'][0] + assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" + assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" + client.query("DROP TABLE IF EXISTS anova;") + + +def test_anova(): + n_groups = 3 + rvs = [] + loc = 0 + scale = 5 + size = 500 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + loc += 5 + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + n_groups = 6 + rvs = [] + loc = 0 + scale = 5 + size = 500 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + n_groups = 10 + rvs = [] + loc = 1 + scale = 2 + size = 100 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + loc += 1 + scale += 2 + size += 100 + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + n_groups = 20 + rvs = [] + loc = 0 + scale = 10 + size = 1100 + for _ in range(n_groups): + rvs.append(np.round(stats.norm.rvs(loc=loc, scale=scale, size=size), 2)) + size -= 50 + f_stat, p_value = scipy_anova(rvs) + test_and_check(rvs, n_groups, f_stat, p_value) + + +if __name__ == "__main__": + test_anova() + print("Ok.") diff --git a/tests/queries/0_stateless/02294_anova_cmp.reference b/tests/queries/0_stateless/02294_anova_cmp.reference new file mode 100644 index 00000000000..587579af915 --- /dev/null +++ b/tests/queries/0_stateless/02294_anova_cmp.reference @@ -0,0 +1 @@ +Ok. diff --git a/tests/queries/0_stateless/02294_anova_cmp.sh b/tests/queries/0_stateless/02294_anova_cmp.sh new file mode 100755 index 00000000000..3dc9ef09b99 --- /dev/null +++ b/tests/queries/0_stateless/02294_anova_cmp.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python3 "$CURDIR"/02294_anova_cmp.python From ab8da5a539d4a29fc9953446b9d478a8f2fae9ef Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 18 Oct 2022 15:50:02 +0200 Subject: [PATCH 138/252] Deactivate mutationsFinalizingTask until startup --- src/Storages/StorageReplicatedMergeTree.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7a2ff56a782..d5f8eaee5ef 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -294,12 +294,17 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( merge_selecting_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mergeSelectingTask)", [this] { mergeSelectingTask(); }); - /// Will be activated if we win leader election. + /// Will be activated if we will achieve leader state. merge_selecting_task->deactivate(); mutations_finalizing_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); }); + /// This task can be scheduled by different parts of code even when storage is readonly. + /// This can lead to redundant exceptions during startup. + /// Will be activated by restarting thread. + mutations_finalizing_task->deactivate(); + bool has_zookeeper = getContext()->hasZooKeeper() || getContext()->hasAuxiliaryZooKeeper(zookeeper_name); if (has_zookeeper) { From 97690577672ad3ed215b5e2b38b871197747baf6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 18 Oct 2022 15:53:52 +0200 Subject: [PATCH 139/252] Deactivate all tasks --- src/Storages/StorageReplicatedMergeTree.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d5f8eaee5ef..94028d31926 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -285,12 +285,18 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { + /// We create and deactivate all task for consistency + /// They all will scheduled and activated by restarting thread. queue_updating_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); }); + queue_updating_task->deactivate(); + mutations_updating_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsUpdatingTask)", [this]{ mutationsUpdatingTask(); }); + mutations_updating_task->deactivate(); + merge_selecting_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mergeSelectingTask)", [this] { mergeSelectingTask(); }); From a5711fda446c9f657cf9da629c8db957c447c590 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 18 Oct 2022 14:00:50 +0000 Subject: [PATCH 140/252] Revert "Revert #27787" This reverts commit c1cc04d44d6d5404df63577623a98c4b37f2fd89. --- src/Storages/AlterCommands.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index dcd7abae68a..d68252679a7 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -755,9 +755,9 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) const auto * nullable_from = typeid_cast(from); const auto * nullable_to = typeid_cast(to); - if (nullable_from && nullable_to) + if (nullable_to) { - from = nullable_from->getNestedType().get(); + from = nullable_from ? nullable_from->getNestedType().get() : from; to = nullable_to->getNestedType().get(); continue; } From 82c7228e53519b8b12ddc1e44c96dfcf84cdf628 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 18 Oct 2022 17:03:31 +0300 Subject: [PATCH 141/252] fix --- .../Serializations/SerializationDate.cpp | 19 +++---------------- src/IO/ReadHelpers.h | 2 ++ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 3b78da97271..678817017e0 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -76,22 +76,9 @@ void SerializationDate::serializeTextCSV(const IColumn & column, size_t row_num, void SerializationDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { - DayNum x; - - if (istr.eof()) - throwReadAfterEOF(); - - char maybe_quote = *istr.position(); - - if (maybe_quote == '\'' || maybe_quote == '\"') - ++istr.position(); - - readDateText(x, istr); - - if (maybe_quote == '\'' || maybe_quote == '\"') - assertChar(maybe_quote, istr); - - assert_cast(column).getData().push_back(x); + DayNum value; + readCSV(value, istr); + assert_cast(column).getData().push_back(value); } } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index d5b0ce4bebe..27a24eef804 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1095,6 +1095,7 @@ inline void readText(is_floating_point auto & x, ReadBuffer & buf) { readFloatTe inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); } inline void readText(LocalDate & x, ReadBuffer & buf) { readDateText(x, buf); } +inline void readText(DayNum & x, ReadBuffer & buf) { readDateText(x, buf); } inline void readText(LocalDateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); } inline void readText(UUID & x, ReadBuffer & buf) { readUUIDText(x, buf); } @@ -1176,6 +1177,7 @@ inline void readCSV(T & x, ReadBuffer & buf) inline void readCSV(String & x, ReadBuffer & buf, const FormatSettings::CSV & settings) { readCSVString(x, buf, settings); } inline void readCSV(LocalDate & x, ReadBuffer & buf) { readCSVSimple(x, buf); } +inline void readCSV(DayNum & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(LocalDateTime & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UUID & x, ReadBuffer & buf) { readCSVSimple(x, buf); } inline void readCSV(UInt128 & x, ReadBuffer & buf) { readCSVSimple(x, buf); } From 2b163e3bc042488c178fa2629d7e4ff1611997c5 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 18 Oct 2022 14:03:50 +0000 Subject: [PATCH 142/252] Remove UTC from 02458_datediff_date32 and 02457_datediff_via_unix_epoch tests --- .../02457_datediff_via_unix_epoch.reference | 4 +- .../02457_datediff_via_unix_epoch.sql | 32 ++-- .../02458_datediff_date32.reference | 154 +++++++++--------- .../0_stateless/02458_datediff_date32.sql | 154 +++++++++--------- 4 files changed, 172 insertions(+), 172 deletions(-) diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference index c2498781ac7..ba12c868037 100644 --- a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.reference @@ -8,8 +8,8 @@ week 1 week 1 day 11 day 11 -hour 240 -hour 240 +hour 264 +hour 264 minute 1440 minute 20 second 86400 diff --git a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql index 036bb64a2d4..796b4cc6e8f 100644 --- a/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql +++ b/tests/queries/0_stateless/02457_datediff_via_unix_epoch.sql @@ -1,23 +1,23 @@ -select 'year', date_diff('year', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); -select 'year', date_diff('year', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); +select 'year', date_diff('year', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'year', date_diff('year', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); -select 'quarter', date_diff('quarter', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); -select 'quarter', date_diff('quarter', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); +select 'quarter', date_diff('quarter', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'quarter', date_diff('quarter', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); -select 'month', date_diff('month', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); -select 'month', date_diff('month', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); +select 'month', date_diff('month', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'month', date_diff('month', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); -select 'week', date_diff('week', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); -select 'week', date_diff('week', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); +select 'week', date_diff('week', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'week', date_diff('week', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); -select 'day', date_diff('day', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); -select 'day', date_diff('day', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); +select 'day', date_diff('day', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'day', date_diff('day', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); -select 'hour', date_diff('hour', toDate32('1969-12-25', 'UTC'), toDate32('1970-01-05', 'UTC')); -select 'hour', date_diff('hour', toDateTime64('1969-12-25 10:00:00.000', 3, 'UTC'), toDateTime64('1970-01-05 10:00:00.000', 3, 'UTC')); +select 'hour', date_diff('hour', toDate32('1969-12-25'), toDate32('1970-01-05')); +select 'hour', date_diff('hour', toDateTime64('1969-12-25 10:00:00.000', 3), toDateTime64('1970-01-05 10:00:00.000', 3)); -select 'minute', date_diff('minute', toDate32('1969-12-31', 'UTC'), toDate32('1970-01-01', 'UTC')); -select 'minute', date_diff('minute', toDateTime64('1969-12-31 23:50:00.000', 3, 'UTC'), toDateTime64('1970-01-01 00:10:00.000', 3, 'UTC')); +select 'minute', date_diff('minute', toDate32('1969-12-31'), toDate32('1970-01-01')); +select 'minute', date_diff('minute', toDateTime64('1969-12-31 23:50:00.000', 3), toDateTime64('1970-01-01 00:10:00.000', 3)); -select 'second', date_diff('second', toDate32('1969-12-31', 'UTC'), toDate32('1970-01-01', 'UTC')); -select 'second', date_diff('second', toDateTime64('1969-12-31 23:50:00.000', 3, 'UTC'), toDateTime64('1970-01-01 00:10:00.000', 3, 'UTC')); +select 'second', date_diff('second', toDate32('1969-12-31'), toDate32('1970-01-01')); +select 'second', date_diff('second', toDateTime64('1969-12-31 23:50:00.000', 3), toDateTime64('1970-01-01 00:10:00.000', 3)); diff --git a/tests/queries/0_stateless/02458_datediff_date32.reference b/tests/queries/0_stateless/02458_datediff_date32.reference index 482b3be633b..67bfa895199 100644 --- a/tests/queries/0_stateless/02458_datediff_date32.reference +++ b/tests/queries/0_stateless/02458_datediff_date32.reference @@ -1,169 +1,169 @@ -- { echo } -- Date32 vs Date32 -SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); 86400 -SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); 1440 -SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); 24 -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); 1 -SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-08', 'UTC')); +SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); 1 -SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDate32('1900-02-01', 'UTC')); +SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); 1 -SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDate32('1900-04-01', 'UTC')); +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); 1 -SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDate32('1901-01-01', 'UTC')); +SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); 1 -- With DateTime64 -- Date32 vs DateTime64 -SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); 86400 -SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); 1440 -SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); 24 -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); 1 -SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); +SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3)); 1 -SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3)); 1 -SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3)); 1 -SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3)); 1 -- DateTime64 vs Date32 -SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); 86400 -SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); 1440 -SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); 24 -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); 1 -SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08', 'UTC')); +SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-08')); 1 -SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01', 'UTC')); +SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-02-01')); 1 -SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01', 'UTC')); +SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-04-01')); 1 -SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01', 'UTC')); +SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1901-01-01')); 1 -- With DateTime -- Date32 vs DateTime -SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('second', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); 86400 -SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('minute', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); 1440 -SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('hour', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); 24 -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('day', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); 1 -SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC')); +SELECT dateDiff('week', toDate32('2015-08-18'), toDateTime('2015-08-25 00:00:00')); 1 -SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC')); +SELECT dateDiff('month', toDate32('2015-08-18'), toDateTime('2015-09-18 00:00:00')); 1 -SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC')); +SELECT dateDiff('quarter', toDate32('2015-08-18'), toDateTime('2015-11-18 00:00:00')); 1 -SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC')); +SELECT dateDiff('year', toDate32('2015-08-18'), toDateTime('2016-08-18 00:00:00')); 1 -- DateTime vs Date32 -SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); 86400 -SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); 1440 -SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); 24 -SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); 1 -SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC')); +SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-25')); 1 -SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC')); +SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00'), toDate32('2015-09-18')); 1 -SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC')); +SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00'), toDate32('2015-11-18')); 1 -SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC')); +SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00'), toDate32('2016-08-18')); 1 -- With Date -- Date32 vs Date -SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('second', toDate32('2015-08-18'), toDate('2015-08-19')); 86400 -SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('minute', toDate32('2015-08-18'), toDate('2015-08-19')); 1440 -SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('hour', toDate32('2015-08-18'), toDate('2015-08-19')); 24 -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('day', toDate32('2015-08-18'), toDate('2015-08-19')); 1 -SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC')); +SELECT dateDiff('week', toDate32('2015-08-18'), toDate('2015-08-25')); 1 -SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC')); +SELECT dateDiff('month', toDate32('2015-08-18'), toDate('2015-09-18')); 1 -SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC')); +SELECT dateDiff('quarter', toDate32('2015-08-18'), toDate('2015-11-18')); 1 -SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC')); +SELECT dateDiff('year', toDate32('2015-08-18'), toDate('2016-08-18')); 1 -- Date vs Date32 -SELECT dateDiff('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('second', toDate('2015-08-18'), toDate32('2015-08-19')); 86400 -SELECT dateDiff('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('minute', toDate('2015-08-18'), toDate32('2015-08-19')); 1440 -SELECT dateDiff('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('hour', toDate('2015-08-18'), toDate32('2015-08-19')); 24 -SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', toDate('2015-08-18'), toDate32('2015-08-19')); 1 -SELECT dateDiff('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC')); +SELECT dateDiff('week', toDate('2015-08-18'), toDate32('2015-08-25')); 1 -SELECT dateDiff('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC')); +SELECT dateDiff('month', toDate('2015-08-18'), toDate32('2015-09-18')); 1 -SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC')); +SELECT dateDiff('quarter', toDate('2015-08-18'), toDate32('2015-11-18')); 1 -SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC')); +SELECT dateDiff('year', toDate('2015-08-18'), toDate32('2016-08-18')); 1 -- Const vs non-const columns -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); 1 -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3))); 1 -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), materialize(toDate32('1900-01-02'))); 1 -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); +SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDateTime('2015-08-19 00:00:00'))); 1 -SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00'), materialize(toDate32('2015-08-19'))); 1 -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); +SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDate('2015-08-19'))); 1 -SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', toDate('2015-08-18'), materialize(toDate32('2015-08-19'))); 1 -- Non-const vs const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); 1 -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3)); 1 -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02', 'UTC')); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), toDate32('1900-01-02')); 1 -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC')); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDateTime('2015-08-19 00:00:00')); 1 -SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00')), toDate32('2015-08-19')); 1 -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC')); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDate('2015-08-19')); 1 -SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', materialize(toDate('2015-08-18')), toDate32('2015-08-19')); 1 -- Non-const vs non-const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); 1 -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3))); 1 -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), materialize(toDate32('1900-01-02'))); 1 -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), materialize(toDateTime('2015-08-19 00:00:00'))); 1 -SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00')), materialize(toDate32('2015-08-19'))); 1 -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), materialize(toDate('2015-08-19'))); 1 -SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', materialize(toDate('2015-08-18')), materialize(toDate32('2015-08-19'))); 1 diff --git a/tests/queries/0_stateless/02458_datediff_date32.sql b/tests/queries/0_stateless/02458_datediff_date32.sql index a5a0809fc39..4c26e04ac27 100644 --- a/tests/queries/0_stateless/02458_datediff_date32.sql +++ b/tests/queries/0_stateless/02458_datediff_date32.sql @@ -1,101 +1,101 @@ -- { echo } -- Date32 vs Date32 -SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDate32('1900-01-08', 'UTC')); -SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDate32('1900-02-01', 'UTC')); -SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDate32('1900-04-01', 'UTC')); -SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDate32('1901-01-01', 'UTC')); +SELECT dateDiff('second', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('day', toDate32('1900-01-01'), toDate32('1900-01-02')); +SELECT dateDiff('week', toDate32('1900-01-01'), toDate32('1900-01-08')); +SELECT dateDiff('month', toDate32('1900-01-01'), toDate32('1900-02-01')); +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDate32('1900-04-01')); +SELECT dateDiff('year', toDate32('1900-01-01'), toDate32('1901-01-01')); -- With DateTime64 -- Date32 vs DateTime64 -SELECT dateDiff('second', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('minute', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('hour', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('week', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-01-08 00:00:00', 3, 'UTC')); -SELECT dateDiff('month', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-02-01 00:00:00', 3, 'UTC')); -SELECT dateDiff('quarter', toDate32('1900-01-01', 'UTC'), toDateTime64('1900-04-01 00:00:00', 3, 'UTC')); -SELECT dateDiff('year', toDate32('1900-01-01', 'UTC'), toDateTime64('1901-01-01 00:00:00', 3, 'UTC')); +SELECT dateDiff('second', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('minute', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('hour', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('day', toDate32('1900-01-01'), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('week', toDate32('1900-01-01'), toDateTime64('1900-01-08 00:00:00', 3)); +SELECT dateDiff('month', toDate32('1900-01-01'), toDateTime64('1900-02-01 00:00:00', 3)); +SELECT dateDiff('quarter', toDate32('1900-01-01'), toDateTime64('1900-04-01 00:00:00', 3)); +SELECT dateDiff('year', toDate32('1900-01-01'), toDateTime64('1901-01-01 00:00:00', 3)); -- DateTime64 vs Date32 -SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-01-08', 'UTC')); -SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-02-01', 'UTC')); -SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1900-04-01', 'UTC')); -SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), toDate32('1901-01-01', 'UTC')); +SELECT dateDiff('second', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('minute', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('hour', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-02')); +SELECT dateDiff('week', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-01-08')); +SELECT dateDiff('month', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-02-01')); +SELECT dateDiff('quarter', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1900-04-01')); +SELECT dateDiff('year', toDateTime64('1900-01-01 00:00:00', 3), toDate32('1901-01-01')); -- With DateTime -- Date32 vs DateTime -SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); -SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); -SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC')); -SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC')); -SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC')); -SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC')); -SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC')); +SELECT dateDiff('second', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); +SELECT dateDiff('minute', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); +SELECT dateDiff('hour', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); +SELECT dateDiff('day', toDate32('2015-08-18'), toDateTime('2015-08-19 00:00:00')); +SELECT dateDiff('week', toDate32('2015-08-18'), toDateTime('2015-08-25 00:00:00')); +SELECT dateDiff('month', toDate32('2015-08-18'), toDateTime('2015-09-18 00:00:00')); +SELECT dateDiff('quarter', toDate32('2015-08-18'), toDateTime('2015-11-18 00:00:00')); +SELECT dateDiff('year', toDate32('2015-08-18'), toDateTime('2016-08-18 00:00:00')); -- DateTime vs Date32 -SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC')); -SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC')); -SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC')); -SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC')); +SELECT dateDiff('second', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); +SELECT dateDiff('minute', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); +SELECT dateDiff('hour', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-19')); +SELECT dateDiff('week', toDateTime('2015-08-18 00:00:00'), toDate32('2015-08-25')); +SELECT dateDiff('month', toDateTime('2015-08-18 00:00:00'), toDate32('2015-09-18')); +SELECT dateDiff('quarter', toDateTime('2015-08-18 00:00:00'), toDate32('2015-11-18')); +SELECT dateDiff('year', toDateTime('2015-08-18 00:00:00'), toDate32('2016-08-18')); -- With Date -- Date32 vs Date -SELECT dateDiff('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); -SELECT dateDiff('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); -SELECT dateDiff('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC')); -SELECT dateDiff('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC')); -SELECT dateDiff('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC')); -SELECT dateDiff('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC')); -SELECT dateDiff('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC')); +SELECT dateDiff('second', toDate32('2015-08-18'), toDate('2015-08-19')); +SELECT dateDiff('minute', toDate32('2015-08-18'), toDate('2015-08-19')); +SELECT dateDiff('hour', toDate32('2015-08-18'), toDate('2015-08-19')); +SELECT dateDiff('day', toDate32('2015-08-18'), toDate('2015-08-19')); +SELECT dateDiff('week', toDate32('2015-08-18'), toDate('2015-08-25')); +SELECT dateDiff('month', toDate32('2015-08-18'), toDate('2015-09-18')); +SELECT dateDiff('quarter', toDate32('2015-08-18'), toDate('2015-11-18')); +SELECT dateDiff('year', toDate32('2015-08-18'), toDate('2016-08-18')); -- Date vs Date32 -SELECT dateDiff('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC')); -SELECT dateDiff('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC')); -SELECT dateDiff('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC')); -SELECT dateDiff('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC')); +SELECT dateDiff('second', toDate('2015-08-18'), toDate32('2015-08-19')); +SELECT dateDiff('minute', toDate('2015-08-18'), toDate32('2015-08-19')); +SELECT dateDiff('hour', toDate('2015-08-18'), toDate32('2015-08-19')); +SELECT dateDiff('day', toDate('2015-08-18'), toDate32('2015-08-19')); +SELECT dateDiff('week', toDate('2015-08-18'), toDate32('2015-08-25')); +SELECT dateDiff('month', toDate('2015-08-18'), toDate32('2015-09-18')); +SELECT dateDiff('quarter', toDate('2015-08-18'), toDate32('2015-11-18')); +SELECT dateDiff('year', toDate('2015-08-18'), toDate32('2016-08-18')); -- Const vs non-const columns -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); -SELECT dateDiff('day', toDate32('1900-01-01', 'UTC'), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); -SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1900-01-02', 'UTC'))); -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); -SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); -SELECT dateDiff('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); -SELECT dateDiff('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('1900-01-01'), materialize(toDateTime64('1900-01-02 00:00:00', 3))); +SELECT dateDiff('day', toDateTime64('1900-01-01 00:00:00', 3), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDateTime('2015-08-19 00:00:00'))); +SELECT dateDiff('day', toDateTime('2015-08-18 00:00:00'), materialize(toDate32('2015-08-19'))); +SELECT dateDiff('day', toDate32('2015-08-18'), materialize(toDate('2015-08-19'))); +SELECT dateDiff('day', toDate('2015-08-18'), materialize(toDate32('2015-08-19'))); -- Non-const vs const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), toDateTime64('1900-01-02 00:00:00', 3, 'UTC')); -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), toDate32('1900-01-02', 'UTC')); -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC')); -SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC')); -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC')); -SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), toDateTime64('1900-01-02 00:00:00', 3)); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), toDate32('1900-01-02')); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDateTime('2015-08-19 00:00:00')); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00')), toDate32('2015-08-19')); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), toDate('2015-08-19')); +SELECT dateDiff('day', materialize(toDate('2015-08-18')), toDate32('2015-08-19')); -- Non-const vs non-const columns -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); -SELECT dateDiff('day', materialize(toDate32('1900-01-01', 'UTC')), materialize(toDateTime64('1900-01-02 00:00:00', 3, 'UTC'))); -SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1900-01-02', 'UTC'))); -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC'))); -SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); -SELECT dateDiff('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); -SELECT dateDiff('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('1900-01-01')), materialize(toDateTime64('1900-01-02 00:00:00', 3))); +SELECT dateDiff('day', materialize(toDateTime64('1900-01-01 00:00:00', 3)), materialize(toDate32('1900-01-02'))); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), materialize(toDateTime('2015-08-19 00:00:00'))); +SELECT dateDiff('day', materialize(toDateTime('2015-08-18 00:00:00')), materialize(toDate32('2015-08-19'))); +SELECT dateDiff('day', materialize(toDate32('2015-08-18')), materialize(toDate('2015-08-19'))); +SELECT dateDiff('day', materialize(toDate('2015-08-18')), materialize(toDate32('2015-08-19'))); From 79231412c9a3624e0c29df387d529432cb5e8b8f Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 18 Oct 2022 15:57:12 +0200 Subject: [PATCH 143/252] Fix minor typos in code documentation --- src/Interpreters/Cache/FileSegment.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 617e7173c2f..8f9c0097d77 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -66,10 +66,10 @@ public: */ DOWNLOADING, /** - * Space reservation for a file segment is incremental, i.e. downaloder reads buffer_size bytes + * Space reservation for a file segment is incremental, i.e. downloader reads buffer_size bytes * from remote fs -> tries to reserve buffer_size bytes to put them to cache -> writes to cache * on successful reservation and stops cache write otherwise. Those, who waited for the same file - * file segment, will read downloaded part from cache and remaining part directly from remote fs. + * segment, will read downloaded part from cache and remaining part directly from remote fs. */ PARTIALLY_DOWNLOADED_NO_CONTINUATION, /** From fbfa2a98851841c03adb886d339e95a7400e3b12 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 18 Oct 2022 15:58:17 +0200 Subject: [PATCH 144/252] Fix exception message to be in sync with code --- src/Interpreters/Cache/FileSegment.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index cf48c5cd976..418bcee05d9 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -66,7 +66,7 @@ FileSegment::FileSegment( { throw Exception( ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, - "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state"); + "Can only create cell with either EMPTY, DOWNLOADED or SKIP_CACHE state"); } } } From 0623ad4e374f3477a2c6fc6a7e7181dcad2319ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Oct 2022 17:35:44 +0300 Subject: [PATCH 145/252] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 94028d31926..18ef65f46e1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -285,7 +285,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { - /// We create and deactivate all task for consistency + /// We create and deactivate all tasks for consistency /// They all will scheduled and activated by restarting thread. queue_updating_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); }); From 7befe2825c988431e669f03c7dec0b5a3c044c8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Oct 2022 17:36:11 +0300 Subject: [PATCH 146/252] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 18ef65f46e1..31ee955a7cc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -285,8 +285,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( , replicated_fetches_throttler(std::make_shared(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler())) , replicated_sends_throttler(std::make_shared(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler())) { - /// We create and deactivate all tasks for consistency - /// They all will scheduled and activated by restarting thread. + /// We create and deactivate all tasks for consistency. + /// They all will be scheduled and activated by the restarting thread. queue_updating_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); }); From de3728127c0b2d7af123ce3e43964582a7e8b49f Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Tue, 18 Oct 2022 14:52:58 +0000 Subject: [PATCH 147/252] fix --- src/Interpreters/Cache/FileCache.cpp | 108 +++++++++++++-------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 8da0c68b19e..f31e525dcca 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -198,72 +198,72 @@ FileSegments FileCache::getImpl( file_segment->detachAssumeStateFinalized(segment_lock); } result.emplace_back(file_segment); + return result; } - else + + auto it = files.find(key); + if (it == files.end()) + return {}; + + const auto & file_segments = it->second; + if (file_segments.empty()) { - auto it = files.find(key); - if (it == files.end()) + files.erase(key); + removeKeyDirectoryIfExists(key, cache_lock); + return {}; + } + + auto segment_it = file_segments.lower_bound(range.left); + if (segment_it == file_segments.end()) + { + /// N - last cached segment for given file key, segment{N}.offset < range.left: + /// segment{N} segment{N} + /// [________ [_______] + /// [__________] OR [________] + /// ^ ^ + /// range.left range.left + + const auto & cell = file_segments.rbegin()->second; + if (cell.file_segment->range().right < range.left) return {}; - const auto & file_segments = it->second; - if (file_segments.empty()) + useCell(cell, result, cache_lock); + } + else /// segment_it <-- segmment{k} + { + if (segment_it != file_segments.begin()) { - files.erase(key); - removeKeyDirectoryIfExists(key, cache_lock); - return {}; + const auto & prev_cell = std::prev(segment_it)->second; + const auto & prev_cell_range = prev_cell.file_segment->range(); + + if (range.left <= prev_cell_range.right) + { + /// segment{k-1} segment{k} + /// [________] [_____ + /// [___________ + /// ^ + /// range.left + useCell(prev_cell, result, cache_lock); + } } - auto segment_it = file_segments.lower_bound(range.left); - if (segment_it == file_segments.end()) - { - /// N - last cached segment for given file key, segment{N}.offset < range.left: - /// segment{N} segment{N} - /// [________ [_______] - /// [__________] OR [________] - /// ^ ^ - /// range.left range.left + /// segment{k} ... segment{k-1} segment{k} segment{k} + /// [______ [______] [____ [________ + /// [_________ OR [________ OR [______] ^ + /// ^ ^ ^ segment{k}.offset + /// range.left range.left range.right - const auto & cell = file_segments.rbegin()->second; - if (cell.file_segment->range().right < range.left) - return {}; + while (segment_it != file_segments.end()) + { + const auto & cell = segment_it->second; + if (range.right < cell.file_segment->range().left) + break; useCell(cell, result, cache_lock); - } - else /// segment_it <-- segmment{k} - { - if (segment_it != file_segments.begin()) - { - const auto & prev_cell = std::prev(segment_it)->second; - const auto & prev_cell_range = prev_cell.file_segment->range(); - - if (range.left <= prev_cell_range.right) - { - /// segment{k-1} segment{k} - /// [________] [_____ - /// [___________ - /// ^ - /// range.left - useCell(prev_cell, result, cache_lock); - } - } - - /// segment{k} ... segment{k-1} segment{k} segment{k} - /// [______ [______] [____ [________ - /// [_________ OR [________ OR [______] ^ - /// ^ ^ ^ segment{k}.offset - /// range.left range.left range.right - - while (segment_it != file_segments.end()) - { - const auto & cell = segment_it->second; - if (range.right < cell.file_segment->range().left) - break; - - useCell(cell, result, cache_lock); - ++segment_it; - } + ++segment_it; } } + return result; } From e504cec74335874a69f86359a082b6ca2202f432 Mon Sep 17 00:00:00 2001 From: jferroal Date: Tue, 18 Oct 2022 22:55:58 +0800 Subject: [PATCH 148/252] Doc: add zh-CN translation getting-started/example-datasets/recipes.mdx --- .../example-datasets/recipes.mdx | 342 +++++++++++++++++- 1 file changed, 336 insertions(+), 6 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/recipes.mdx b/docs/zh/getting-started/example-datasets/recipes.mdx index da3a2ac541b..0926b3dde07 100644 --- a/docs/zh/getting-started/example-datasets/recipes.mdx +++ b/docs/zh/getting-started/example-datasets/recipes.mdx @@ -1,9 +1,339 @@ ---- -slug: /zh/getting-started/example-datasets/recipes -sidebar_label: Recipes Dataset -title: "Recipes Dataset" +--- +slug:/zh/getting-started/example-datasets/recipes +sidebar_label:食谱数æ®é›† +title: "食谱数æ®é›†" --- -import Content from '@site/docs/en/getting-started/example-datasets/recipes.md'; +RecipeNLG æ•°æ®é›†å¯åœ¨ [此处](https://recipenlg.cs.put.poznan.pl/dataset) ä¸‹è½½ã€‚å…¶ä¸­åŒ…å« 220 万份食谱。大å°ç•¥å°äºŽ 1 GB。 - +## 下载并解压数æ®é›† + +1. 进入下载页é¢[https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset)。 +2. 接å—æ¡æ¬¾å’Œæ¡ä»¶å¹¶ä¸‹è½½ zip 文件。 +3. 使用 `unzip` 解压 zip 文件,得到 `full_dataset.csv` 文件。 + +## 创建表 + +è¿è¡Œ clickhouse-client 并执行以下 CREATE 请求: + +``` sql +CREATE TABLE recipes +( + title String, + ingredients Array(String), + directions Array(String), + link String, + source LowCardinality(String), + NER Array(String) +) ENGINE = MergeTree ORDER BY title; +``` + +## æ’å…¥æ•°æ® + +è¿è¡Œä»¥ä¸‹å‘½ä»¤ï¼š + +``` bash +clickhouse-client --query " + INSERT INTO recipes + SELECT + title, + JSONExtract(ingredients, 'Array(String)'), + JSONExtract(directions, 'Array(String)'), + link, + source, + JSONExtract(NER, 'Array(String)') + FROM input('num UInt32, title String, ingredients String, directions String, link String, source LowCardinality(String), NER String') + FORMAT CSVWithNames +" --input_format_with_names_use_header 0 --format_csv_allow_single_quote 0 --input_format_allow_errors_num 10 < full_dataset.csv +``` + +这是一个展示如何解æžè‡ªå®šä¹‰ CSV,这其中涉åŠäº†è®¸å¤šè°ƒæ•´ã€‚ + +说明: +- æ•°æ®é›†ä¸º CSV æ ¼å¼ï¼Œä½†åœ¨æ’入时需è¦ä¸€äº›é¢„处ç†ï¼›ä½¿ç”¨è¡¨å‡½æ•° [input](../../sql-reference/table-functions/input.md) 进行预处ç†ï¼› +- CSV 文件的结构在表函数 `input` çš„å‚数中指定; +- 字段 `num`(行å·ï¼‰æ˜¯ä¸éœ€è¦çš„ - å¯ä»¥å¿½ç•¥å¹¶ä»Žæ–‡ä»¶ä¸­è¿›è¡Œè§£æžï¼› +- 使用 `FORMAT CSVWithNames`,因为标题ä¸åŒ…å«ç¬¬ä¸€ä¸ªå­—段的å称,因此 CSV 中的标题将被忽略(通过命令行å‚æ•° `--input_format_with_names_use_header 0`); +- 文件仅使用åŒå¼•å·å°† CSV 字符串括起æ¥ï¼›ä¸€äº›å­—符串没有用åŒå¼•å·æ‹¬èµ·æ¥ï¼Œå•å¼•å·ä¹Ÿä¸èƒ½è¢«è§£æžä¸ºæ‹¬èµ·æ¥çš„字符串 - 所以添加`--format_csv_allow_single_quote 0`å‚数接å—文件中的å•å¼•å·ï¼› +- 由于æŸäº› CSV çš„å­—ç¬¦ä¸²çš„å¼€å¤´åŒ…å« `\M/` 因此无法被解æžï¼› CSV 中唯一å¯èƒ½ä»¥åæ–œæ å¼€å¤´çš„值是 `\N`,这个值被解æžä¸º SQL NULL。通过添加`--input_format_allow_errors_num 10`å‚数,å…许在导入过程中跳过 10 个格å¼é”™è¯¯ï¼› +- 在数æ®é›†ä¸­çš„ Ingredientsã€directions å’Œ NER 字段为数组;但这些数组并没有以一般形å¼è¡¨ç¤ºï¼šè¿™äº›å­—段作为 JSON åºåˆ—化为字符串,然åŽæ”¾å…¥ CSV 中 - 在导入是将它们解æžä¸ºå­—符串,然åŽä½¿ç”¨ [JSONExtract](../../sql-reference/functions/json-functions.md ) 函数将其转æ¢ä¸ºæ•°ç»„。 + +## 验è¯æ’å…¥çš„æ•°æ® + +通过检查行数: + +请求: + +``` sql +SELECT count() FROM recipes; +``` + +结果: + +``` text +┌─count()─┠+│ 2231141 │ +└─────────┘ +``` + +## 示例查询 + +### 按é…方数é‡æŽ’列的顶级组件: + +在此示例中,我们学习如何使用 [arrayJoin](../../sql-reference/functions/array-join/) 函数将数组扩展为行的集åˆã€‚ + +请求: + +``` sql +SELECT + arrayJoin(NER) AS k, + count() AS c +FROM recipes +GROUP BY k +ORDER BY c DESC +LIMIT 50 +``` + +结果: + +``` text +┌─k────────────────────┬──────c─┠+│ salt │ 890741 │ +│ sugar │ 620027 │ +│ butter │ 493823 │ +│ flour │ 466110 │ +│ eggs │ 401276 │ +│ onion │ 372469 │ +│ garlic │ 358364 │ +│ milk │ 346769 │ +│ water │ 326092 │ +│ vanilla │ 270381 │ +│ olive oil │ 197877 │ +│ pepper │ 179305 │ +│ brown sugar │ 174447 │ +│ tomatoes │ 163933 │ +│ egg │ 160507 │ +│ baking powder │ 148277 │ +│ lemon juice │ 146414 │ +│ Salt │ 122557 │ +│ cinnamon │ 117927 │ +│ sour cream │ 116682 │ +│ cream cheese │ 114423 │ +│ margarine │ 112742 │ +│ celery │ 112676 │ +│ baking soda │ 110690 │ +│ parsley │ 102151 │ +│ chicken │ 101505 │ +│ onions │ 98903 │ +│ vegetable oil │ 91395 │ +│ oil │ 85600 │ +│ mayonnaise │ 84822 │ +│ pecans │ 79741 │ +│ nuts │ 78471 │ +│ potatoes │ 75820 │ +│ carrots │ 75458 │ +│ pineapple │ 74345 │ +│ soy sauce │ 70355 │ +│ black pepper │ 69064 │ +│ thyme │ 68429 │ +│ mustard │ 65948 │ +│ chicken broth │ 65112 │ +│ bacon │ 64956 │ +│ honey │ 64626 │ +│ oregano │ 64077 │ +│ ground beef │ 64068 │ +│ unsalted butter │ 63848 │ +│ mushrooms │ 61465 │ +│ Worcestershire sauce │ 59328 │ +│ cornstarch │ 58476 │ +│ green pepper │ 58388 │ +│ Cheddar cheese │ 58354 │ +└──────────────────────┴────────┘ + +50 rows in set. Elapsed: 0.112 sec. Processed 2.23 million rows, 361.57 MB (19.99 million rows/s., 3.24 GB/s.) +``` + +### 最å¤æ‚çš„è‰èŽ“食谱 + +``` sql +SELECT + title, + length(NER), + length(directions) +FROM recipes +WHERE has(NER, 'strawberry') +ORDER BY length(directions) DESC +LIMIT 10 +``` + +结果: + +``` text +┌─title────────────────────────────────────────────────────────────┬─length(NER)─┬─length(directions)─┠+│ Chocolate-Strawberry-Orange Wedding Cake │ 24 │ 126 │ +│ Strawberry Cream Cheese Crumble Tart │ 19 │ 47 │ +│ Charlotte-Style Ice Cream │ 11 │ 45 │ +│ Sinfully Good a Million Layers Chocolate Layer Cake, With Strawb │ 31 │ 45 │ +│ Sweetened Berries With Elderflower Sherbet │ 24 │ 44 │ +│ Chocolate-Strawberry Mousse Cake │ 15 │ 42 │ +│ Rhubarb Charlotte with Strawberries and Rum │ 20 │ 42 │ +│ Chef Joey's Strawberry Vanilla Tart │ 7 │ 37 │ +│ Old-Fashioned Ice Cream Sundae Cake │ 17 │ 37 │ +│ Watermelon Cake │ 16 │ 36 │ +└──────────────────────────────────────────────────────────────────┴─────────────┴────────────────────┘ + +10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.) +``` + +在此示例中,我们使用 [has](../../sql-reference/functions/array-functions/#hasarr-elem) 函数æ¥æŒ‰è¿‡æ»¤æ•°ç»„类型元素并按 directions çš„æ•°é‡è¿›è¡ŒæŽ’åºã€‚ + +有一个婚礼蛋糕需è¦æ•´ä¸ª126个步骤æ¥åˆ¶ä½œï¼æ˜¾ç¤º directions: + +请求: + +``` sql +SELECT arrayJoin(directions) +FROM recipes +WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake' +``` + +结果: + +``` text +┌─arrayJoin(directions)───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┠+│ Position 1 rack in center and 1 rack in bottom third of oven and preheat to 350F. │ +│ Butter one 5-inch-diameter cake pan with 2-inch-high sides, one 8-inch-diameter cake pan with 2-inch-high sides and one 12-inch-diameter cake pan with 2-inch-high sides. │ +│ Dust pans with flour; line bottoms with parchment. │ +│ Combine 1/3 cup orange juice and 2 ounces unsweetened chocolate in heavy small saucepan. │ +│ Stir mixture over medium-low heat until chocolate melts. │ +│ Remove from heat. │ +│ Gradually mix in 1 2/3 cups orange juice. │ +│ Sift 3 cups flour, 2/3 cup cocoa, 2 teaspoons baking soda, 1 teaspoon salt and 1/2 teaspoon baking powder into medium bowl. │ +│ using electric mixer, beat 1 cup (2 sticks) butter and 3 cups sugar in large bowl until blended (mixture will look grainy). │ +│ Add 4 eggs, 1 at a time, beating to blend after each. │ +│ Beat in 1 tablespoon orange peel and 1 tablespoon vanilla extract. │ +│ Add dry ingredients alternately with orange juice mixture in 3 additions each, beating well after each addition. │ +│ Mix in 1 cup chocolate chips. │ +│ Transfer 1 cup plus 2 tablespoons batter to prepared 5-inch pan, 3 cups batter to prepared 8-inch pan and remaining batter (about 6 cups) to 12-inch pan. │ +│ Place 5-inch and 8-inch pans on center rack of oven. │ +│ Place 12-inch pan on lower rack of oven. │ +│ Bake cakes until tester inserted into center comes out clean, about 35 minutes. │ +│ Transfer cakes in pans to racks and cool completely. │ +│ Mark 4-inch diameter circle on one 6-inch-diameter cardboard cake round. │ +│ Cut out marked circle. │ +│ Mark 7-inch-diameter circle on one 8-inch-diameter cardboard cake round. │ +│ Cut out marked circle. │ +│ Mark 11-inch-diameter circle on one 12-inch-diameter cardboard cake round. │ +│ Cut out marked circle. │ +│ Cut around sides of 5-inch-cake to loosen. │ +│ Place 4-inch cardboard over pan. │ +│ Hold cardboard and pan together; turn cake out onto cardboard. │ +│ Peel off parchment.Wrap cakes on its cardboard in foil. │ +│ Repeat turning out, peeling off parchment and wrapping cakes in foil, using 7-inch cardboard for 8-inch cake and 11-inch cardboard for 12-inch cake. │ +│ Using remaining ingredients, make 1 more batch of cake batter and bake 3 more cake layers as described above. │ +│ Cool cakes in pans. │ +│ Cover cakes in pans tightly with foil. │ +│ (Can be prepared ahead. │ +│ Let stand at room temperature up to 1 day or double-wrap all cake layers and freeze up to 1 week. │ +│ Bring cake layers to room temperature before using.) │ +│ Place first 12-inch cake on its cardboard on work surface. │ +│ Spread 2 3/4 cups ganache over top of cake and all the way to edge. │ +│ Spread 2/3 cup jam over ganache, leaving 1/2-inch chocolate border at edge. │ +│ Drop 1 3/4 cups white chocolate frosting by spoonfuls over jam. │ +│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │ +│ Rub some cocoa powder over second 12-inch cardboard. │ +│ Cut around sides of second 12-inch cake to loosen. │ +│ Place cardboard, cocoa side down, over pan. │ +│ Turn cake out onto cardboard. │ +│ Peel off parchment. │ +│ Carefully slide cake off cardboard and onto filling on first 12-inch cake. │ +│ Refrigerate. │ +│ Place first 8-inch cake on its cardboard on work surface. │ +│ Spread 1 cup ganache over top all the way to edge. │ +│ Spread 1/4 cup jam over, leaving 1/2-inch chocolate border at edge. │ +│ Drop 1 cup white chocolate frosting by spoonfuls over jam. │ +│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │ +│ Rub some cocoa over second 8-inch cardboard. │ +│ Cut around sides of second 8-inch cake to loosen. │ +│ Place cardboard, cocoa side down, over pan. │ +│ Turn cake out onto cardboard. │ +│ Peel off parchment. │ +│ Slide cake off cardboard and onto filling on first 8-inch cake. │ +│ Refrigerate. │ +│ Place first 5-inch cake on its cardboard on work surface. │ +│ Spread 1/2 cup ganache over top of cake and all the way to edge. │ +│ Spread 2 tablespoons jam over, leaving 1/2-inch chocolate border at edge. │ +│ Drop 1/3 cup white chocolate frosting by spoonfuls over jam. │ +│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │ +│ Rub cocoa over second 6-inch cardboard. │ +│ Cut around sides of second 5-inch cake to loosen. │ +│ Place cardboard, cocoa side down, over pan. │ +│ Turn cake out onto cardboard. │ +│ Peel off parchment. │ +│ Slide cake off cardboard and onto filling on first 5-inch cake. │ +│ Chill all cakes 1 hour to set filling. │ +│ Place 12-inch tiered cake on its cardboard on revolving cake stand. │ +│ Spread 2 2/3 cups frosting over top and sides of cake as a first coat. │ +│ Refrigerate cake. │ +│ Place 8-inch tiered cake on its cardboard on cake stand. │ +│ Spread 1 1/4 cups frosting over top and sides of cake as a first coat. │ +│ Refrigerate cake. │ +│ Place 5-inch tiered cake on its cardboard on cake stand. │ +│ Spread 3/4 cup frosting over top and sides of cake as a first coat. │ +│ Refrigerate all cakes until first coats of frosting set, about 1 hour. │ +│ (Cakes can be made to this point up to 1 day ahead; cover and keep refrigerate.) │ +│ Prepare second batch of frosting, using remaining frosting ingredients and following directions for first batch. │ +│ Spoon 2 cups frosting into pastry bag fitted with small star tip. │ +│ Place 12-inch cake on its cardboard on large flat platter. │ +│ Place platter on cake stand. │ +│ Using icing spatula, spread 2 1/2 cups frosting over top and sides of cake; smooth top. │ +│ Using filled pastry bag, pipe decorative border around top edge of cake. │ +│ Refrigerate cake on platter. │ +│ Place 8-inch cake on its cardboard on cake stand. │ +│ Using icing spatula, spread 1 1/2 cups frosting over top and sides of cake; smooth top. │ +│ Using pastry bag, pipe decorative border around top edge of cake. │ +│ Refrigerate cake on its cardboard. │ +│ Place 5-inch cake on its cardboard on cake stand. │ +│ Using icing spatula, spread 3/4 cup frosting over top and sides of cake; smooth top. │ +│ Using pastry bag, pipe decorative border around top edge of cake, spooning more frosting into bag if necessary. │ +│ Refrigerate cake on its cardboard. │ +│ Keep all cakes refrigerated until frosting sets, about 2 hours. │ +│ (Can be prepared 2 days ahead. │ +│ Cover loosely; keep refrigerated.) │ +│ Place 12-inch cake on platter on work surface. │ +│ Press 1 wooden dowel straight down into and completely through center of cake. │ +│ Mark dowel 1/4 inch above top of frosting. │ +│ Remove dowel and cut with serrated knife at marked point. │ +│ Cut 4 more dowels to same length. │ +│ Press 1 cut dowel back into center of cake. │ +│ Press remaining 4 cut dowels into cake, positioning 3 1/2 inches inward from cake edges and spacing evenly. │ +│ Place 8-inch cake on its cardboard on work surface. │ +│ Press 1 dowel straight down into and completely through center of cake. │ +│ Mark dowel 1/4 inch above top of frosting. │ +│ Remove dowel and cut with serrated knife at marked point. │ +│ Cut 3 more dowels to same length. │ +│ Press 1 cut dowel back into center of cake. │ +│ Press remaining 3 cut dowels into cake, positioning 2 1/2 inches inward from edges and spacing evenly. │ +│ Using large metal spatula as aid, place 8-inch cake on its cardboard atop dowels in 12-inch cake, centering carefully. │ +│ Gently place 5-inch cake on its cardboard atop dowels in 8-inch cake, centering carefully. │ +│ Using citrus stripper, cut long strips of orange peel from oranges. │ +│ Cut strips into long segments. │ +│ To make orange peel coils, wrap peel segment around handle of wooden spoon; gently slide peel off handle so that peel keeps coiled shape. │ +│ Garnish cake with orange peel coils, ivy or mint sprigs, and some berries. │ +│ (Assembled cake can be made up to 8 hours ahead. │ +│ Let stand at cool room temperature.) │ +│ Remove top and middle cake tiers. │ +│ Remove dowels from cakes. │ +│ Cut top and middle cakes into slices. │ +│ To cut 12-inch cake: Starting 3 inches inward from edge and inserting knife straight down, cut through from top to bottom to make 6-inch-diameter circle in center of cake. │ +│ Cut outer portion of cake into slices; cut inner portion into slices and serve with strawberries. │ +└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + +126 rows in set. Elapsed: 0.011 sec. Processed 8.19 thousand rows, 5.34 MB (737.75 thousand rows/s., 480.59 MB/s.) +``` + +### 在线 Playground + +此数æ®é›†ä¹Ÿå¯åœ¨ [在线 Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==) 中体验。 + +[原文链接](https://clickhouse.com/docs/en/getting-started/example-datasets/recipes/) From e89b390ee3b63568038a277c7656fb71279f7ee3 Mon Sep 17 00:00:00 2001 From: jferroal Date: Tue, 18 Oct 2022 22:56:49 +0800 Subject: [PATCH 149/252] Doc: add zh-CN translation getting-started/example-datasets/opensky.mdx --- .../example-datasets/opensky.mdx | 419 +++++++++++++++++- 1 file changed, 413 insertions(+), 6 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx index e8d5367e970..41694883d09 100644 --- a/docs/zh/getting-started/example-datasets/opensky.mdx +++ b/docs/zh/getting-started/example-datasets/opensky.mdx @@ -1,9 +1,416 @@ ---- -slug: /zh/getting-started/example-datasets/opensky -sidebar_label: Air Traffic Data -title: "Crowdsourced air traffic data from The OpenSky Network 2020" +--- +slug:/zh/getting-started/example-datasets/opensky +sidebar_labelï¼šç©ºä¸­äº¤é€šæ•°æ® +description: 该数æ®é›†ä¸­çš„æ•°æ®æ˜¯ä»Žå®Œæ•´çš„ OpenSky æ•°æ®é›†ä¸­è¡ç”Ÿè€Œæ¥çš„,对其中的数æ®è¿›è¡Œäº†å¿…è¦çš„清ç†ï¼Œç”¨ä»¥å±•ç¤ºåœ¨ COVID-19 期间空中交通的å‘展。 +title: "æ¥è‡ª The OpenSky Network 2020 的众包空中交通数æ®" --- -import Content from '@site/docs/en/getting-started/example-datasets/opensky.md'; +该数æ®é›†ä¸­çš„æ•°æ®æ˜¯ä»Žå®Œæ•´çš„ OpenSky æ•°æ®é›†ä¸­æ´¾ç”Ÿå’Œæ¸…ç†çš„,以说明 COVID-19 大æµè¡ŒæœŸé—´ç©ºä¸­äº¤é€šçš„å‘展。它涵盖了自 2019 å¹´ 1 月 1 日以æ¥è¯¥ç½‘络中 2500 多åæˆå‘˜è§‚测到的所有航ç­ã€‚直到 COVID-19 大æµè¡Œç»“æŸï¼Œæ›´å¤šæ•°æ®å°†å®šæœŸçš„更新到数æ®é›†ä¸­ã€‚ - +æ¥æºï¼šhttps://zenodo.org/record/5092942#.YRBCyTpRXYd + +Martin Strohmeierã€Xavier Oliveã€Jannis Lübbeã€Matthias Schäfer å’Œ Vincent Lenders “æ¥è‡ª OpenSky 网络 2019-2020 的众包空中交通数æ®â€åœ°çƒç³»ç»Ÿç§‘å­¦æ•°æ® 13(2),2021 https://doi.org/10.5194/essd- 13-357-2021 + +## 下载数æ®é›† {#download-dataset} + +è¿è¡Œå‘½ä»¤ï¼š + +```bash +wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget +``` + +Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB. + +## 创建表 {#create-table} + +```sql +CREATE TABLE opensky +( + callsign String, + number String, + icao24 String, + registration String, + typecode String, + origin String, + destination String, + firstseen DateTime, + lastseen DateTime, + day DateTime, + latitude_1 Float64, + longitude_1 Float64, + altitude_1 Float64, + latitude_2 Float64, + longitude_2 Float64, + altitude_2 Float64 +) ENGINE = MergeTree ORDER BY (origin, destination, callsign); +``` + +## å¯¼å…¥æ•°æ® {#import-data} + +将数æ®å¹¶è¡Œå¯¼å…¥åˆ° ClickHouse: + +```bash +ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"' +``` + +- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处ç†ã€‚ `xargs -P100` 指定最多使用 100 个并行工作程åºï¼Œä½†ç”±äºŽæˆ‘们åªæœ‰ 30 个文件,工作程åºçš„æ•°é‡å°†åªæœ‰ 30 个。 +- 对于æ¯ä¸ªæ–‡ä»¶ï¼Œ`xargs` 将通过 `bash -c` 为æ¯ä¸ªæ–‡ä»¶è¿è¡Œä¸€ä¸ªè„šæœ¬æ–‡ä»¶ã€‚该脚本通过使用 `{}` 表示文件åå ä½ç¬¦ï¼Œç„¶åŽ `xargs` 由命令进行填充(使用 `-I{}`)。 +- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` å‚数),并将输出é‡å®šå‘到 `clickhouse-client`。 +- 我们还è¦æ±‚使用扩展解æžå™¨è§£æž [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区å移的 ISO-8601 æ ¼å¼ã€‚ + +最åŽï¼Œ`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) æ ¼å¼è¯»å–输入数æ®ç„¶åŽæ‰§è¡Œæ’入。 + +å¹¶è¡Œå¯¼å…¥éœ€è¦ 24 秒。 + +如果您ä¸æƒ³ä½¿ç”¨å¹¶è¡Œå¯¼å…¥ï¼Œä»¥ä¸‹æ˜¯é¡ºåºå¯¼å…¥çš„æ–¹å¼ï¼š + +```bash +for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done +``` + +## 验è¯æ•°æ® {#validate-data} + +请求: + +```sql +SELECT count() FROM opensky; +``` + +结果: + +```text +┌──count()─┠+│ 66010819 │ +└──────────┘ +``` + +ClickHouse 中的数æ®é›†å¤§å°åªæœ‰ 2.66 GiB,检查一下。 + +请求: + +```sql +SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky'; +``` + +结果: + +```text +┌─formatReadableSize(total_bytes)─┠+│ 2.66 GiB │ +└─────────────────────────────────┘ +``` + +## è¿è¡Œä¸€äº›æŸ¥è¯¢ {#run-queries} + +总行驶è·ç¦»ä¸º 680 亿公里。 + +请求: + +```sql +SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky; +``` + +结果: + +```text +┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┠+│ 68.72 billion │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +å¹³å‡é£žè¡Œè·ç¦»çº¦ä¸º 1000 公里。 + +请求: + +```sql +SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky; +``` + +结果: + +```text +┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┠+│ 1041090.6465708319 │ +└────────────────────────────────────────────────────────────────────┘ +``` + +### 最ç¹å¿™çš„始å‘机场和观测到的平å‡è·ç¦»{#busy-airports-average-distance} + +请求: + +```sql +SELECT + origin, + count(), + round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance, + bar(distance, 0, 10000000, 100) AS bar +FROM opensky +WHERE origin != '' +GROUP BY origin +ORDER BY count() DESC +LIMIT 100; +``` + +结果: + +```text + ┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┠+ 1. │ KORD │ 745007 │ 1546108 │ ███████████████■│ + 2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │ + 3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │ + 4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │ + 5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │ + 6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │ + 7. │ KPHX │ 428558 │ 1345635 │ █████████████■│ + 8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │ + 9. │ KCLT │ 404612 │ 880355 │ ████████▋ │ + 10. │ VIDP │ 363074 │ 1445052 │ ██████████████■│ + 11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │ + 12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████■│ + 13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │ + 14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │ + 15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │ + 16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████■│ + 17. │ EHAM │ 340272 │ 2116425 │ █████████████████████■│ + 18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │ + 19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │ + 20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │ + 21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │ + 22. │ KBOS │ 304416 │ 1621675 │ ████████████████■│ + 23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │ + 24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │ + 25. │ KMIA │ 265121 │ 1923542 │ ███████████████████■│ + 26. │ ZGSZ │ 263497 │ 745086 │ ███████■│ + 27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │ + 28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │ + 29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │ + 30. │ KLGA │ 248699 │ 1106935 │ ███████████ │ + 31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │ + 32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │ + 33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │ + 34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │ + 35. │ KIAH │ 234202 │ 1538335 │ ███████████████■│ + 36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │ + 37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │ + 38. │ KDCA │ 207883 │ 1013359 │ ██████████■│ + 39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │ + 40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │ + 41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │ + 42. │ KDTW │ 204020 │ 1106716 │ ███████████ │ + 43. │ VABB │ 201679 │ 1300865 │ █████████████ │ + 44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │ + 45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │ + 46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │ + 47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │ + 48. │ SBGR │ 197624 │ 2041697 │ ████████████████████■│ + 49. │ VOBL │ 189011 │ 1040180 │ ██████████■│ + 50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │ + 51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │ + 52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │ + 53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │ + 54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │ + 55. │ KSNA │ 180045 │ 778484 │ ███████▋ │ + 56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │ + 57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │ + 58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │ + 59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │ + 60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │ + 61. │ KOAK │ 171119 │ 1114447 │ ███████████■│ + 62. │ RPLL │ 170122 │ 1440735 │ ██████████████■│ + 63. │ KRDU │ 167001 │ 830521 │ ████████▎ │ + 64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │ + 65. │ KBNA │ 163242 │ 1022726 │ ██████████■│ + 66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │ + 67. │ ENGM │ 160732 │ 910108 │ █████████ │ + 68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │ + 69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │ + 70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │ + 71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │ + 72. │ KIND │ 153929 │ 987944 │ █████████▊ │ + 73. │ ESSA │ 153390 │ 1203439 │ ████████████ │ + 74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │ + 75. │ KDVT │ 152895 │ 74048 │ â–‹ │ + 76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │ + 77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │ + 78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │ + 79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │ + 80. │ EGSS │ 140830 │ 1348183 │ █████████████■│ + 81. │ KAPA │ 140776 │ 420441 │ ████■│ + 82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │ + 83. │ KTPA │ 138033 │ 1338223 │ █████████████■│ + 84. │ KFFZ │ 137333 │ 55397 │ â–Œ │ + 85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │ + 86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │ + 87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │ + 88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │ + 89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │ + 90. │ KIWA │ 127195 │ 250876 │ ██▌ │ + 91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │ + 92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │ + 93. │ LSGG │ 123998 │ 1049101 │ ██████████■│ + 94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │ + 95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │ + 96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │ + 97. │ VTBD │ 118107 │ 661627 │ ██████▌ │ + 98. │ KVNY │ 116326 │ 692960 │ ██████▊ │ + 99. │ EDDT │ 115122 │ 941740 │ █████████■│ +100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │ + └────────┴─────────┴──────────┴────────────────────────────────────────┘ +``` + +### æ¯å‘¨æ¥è‡ªèŽ«æ–¯ç§‘三个主è¦æœºåœºçš„航ç­æ•°é‡ {#flights-from-moscow} + +请求: + +```sql +SELECT + toMonday(day) AS k, + count() AS c, + bar(c, 0, 10000, 100) AS bar +FROM opensky +WHERE origin IN ('UUEE', 'UUDD', 'UUWW') +GROUP BY k +ORDER BY k ASC; +``` + +结果: + +```text + ┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┠+ 1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████■│ + 2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │ + 3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │ + 4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████■│ + 5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │ + 6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │ + 7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │ + 8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │ + 9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │ + 10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │ + 11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │ + 12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████■│ + 13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │ + 14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████■│ + 15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │ + 16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │ + 17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │ + 18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │ + 19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████■│ + 20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │ + 21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │ + 22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │ + 23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │ + 24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████■│ + 25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████■│ + 26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │ + 27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████■│ + 28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │ + 29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████■│ + 30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │ + 31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │ + 32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │ + 33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │ + 34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │ + 35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████■│ + 36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │ + 37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │ + 38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │ + 39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████■│ + 40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │ + 41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████■│ + 42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │ + 43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │ + 44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │ + 45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │ + 46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │ + 47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │ + 48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████■│ + 49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████■│ + 50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │ + 51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │ + 52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │ + 53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │ + 54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │ + 55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │ + 56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████■│ + 57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │ + 58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │ + 59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │ + 60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │ + 61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │ + 62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │ + 63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████■│ + 64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │ + 65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████■│ + 66. │ 2020-03-30 │ 1720 │ █████████████████■│ + 67. │ 2020-04-06 │ 849 │ ████████■│ + 68. │ 2020-04-13 │ 710 │ ███████ │ + 69. │ 2020-04-20 │ 725 │ ███████■│ + 70. │ 2020-04-27 │ 920 │ █████████■│ + 71. │ 2020-05-04 │ 859 │ ████████▌ │ + 72. │ 2020-05-11 │ 1047 │ ██████████■│ + 73. │ 2020-05-18 │ 1135 │ ███████████▎ │ + 74. │ 2020-05-25 │ 1266 │ ████████████▋ │ + 75. │ 2020-06-01 │ 1793 │ █████████████████▊ │ + 76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │ + 77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │ + 78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │ + 79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │ + 80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████■│ + 81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │ + 82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │ + 83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │ + 84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████■│ + 85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │ + 86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │ + 87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │ + 88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │ + 89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████■│ + 90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │ + 91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │ + 92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │ + 93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │ + 94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████■│ + 95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████■│ + 96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████■│ + 97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │ + 98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │ + 99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │ +100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │ +101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │ +102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │ +103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │ +104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████■│ +105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │ +106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │ +107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │ +108. │ 2021-01-18 │ 3144 │ ███████████████████████████████■│ +109. │ 2021-01-25 │ 3115 │ ███████████████████████████████■│ +110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │ +111. │ 2021-02-08 │ 3321 │ █████████████████████████████████■│ +112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │ +113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████■│ +114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │ +115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │ +116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │ +117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │ +118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████■│ +119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │ +120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │ +121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │ +122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │ +123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │ +124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │ +125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │ +126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████■│ +127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████■│ +128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │ +129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████■│ +130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │ +131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │ + └────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘ +``` + +### 在线 Playground {#playground} + +ä½ å¯ä»¥ä½¿ç”¨äº¤äº’å¼èµ„æº [Online Playground](https://play.clickhouse.com/play?user=play) æ¥å°è¯•å¯¹æ­¤æ•°æ®é›†çš„其他查询。 例如, [执行这个查询](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). 但是,请注æ„无法在 Playground 中创建临时表。 From 6e100adcb4ca999c86b134d13fb7949068559226 Mon Sep 17 00:00:00 2001 From: jferroal Date: Tue, 18 Oct 2022 22:57:23 +0800 Subject: [PATCH 150/252] Doc: fix metadata --- docs/zh/getting-started/example-datasets/opensky.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx index 41694883d09..92cd104e06e 100644 --- a/docs/zh/getting-started/example-datasets/opensky.mdx +++ b/docs/zh/getting-started/example-datasets/opensky.mdx @@ -1,6 +1,6 @@ --- -slug:/zh/getting-started/example-datasets/opensky -sidebar_labelï¼šç©ºä¸­äº¤é€šæ•°æ® +slug: /zh/getting-started/example-datasets/opensky +sidebar_label: ç©ºä¸­äº¤é€šæ•°æ® description: 该数æ®é›†ä¸­çš„æ•°æ®æ˜¯ä»Žå®Œæ•´çš„ OpenSky æ•°æ®é›†ä¸­è¡ç”Ÿè€Œæ¥çš„,对其中的数æ®è¿›è¡Œäº†å¿…è¦çš„清ç†ï¼Œç”¨ä»¥å±•ç¤ºåœ¨ COVID-19 期间空中交通的å‘展。 title: "æ¥è‡ª The OpenSky Network 2020 的众包空中交通数æ®" --- From 123e1aea65ced0250575a2a9e766a51a5c5c5ed1 Mon Sep 17 00:00:00 2001 From: jferroal Date: Tue, 18 Oct 2022 22:57:45 +0800 Subject: [PATCH 151/252] Doc: fix metadata --- docs/zh/getting-started/example-datasets/recipes.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/recipes.mdx b/docs/zh/getting-started/example-datasets/recipes.mdx index 0926b3dde07..b7ed92962c5 100644 --- a/docs/zh/getting-started/example-datasets/recipes.mdx +++ b/docs/zh/getting-started/example-datasets/recipes.mdx @@ -1,6 +1,6 @@ --- -slug:/zh/getting-started/example-datasets/recipes -sidebar_label:食谱数æ®é›† +slug: /zh/getting-started/example-datasets/recipes +sidebar_label: 食谱数æ®é›† title: "食谱数æ®é›†" --- From 7d1ef764c284ba350f38c50400dc975ccdd84f31 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Oct 2022 18:08:57 +0300 Subject: [PATCH 152/252] Update tests/queries/0_stateless/02461_cancel_finish_race.sh Co-authored-by: Azat Khuzhin --- tests/queries/0_stateless/02461_cancel_finish_race.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02461_cancel_finish_race.sh b/tests/queries/0_stateless/02461_cancel_finish_race.sh index be40843023e..7e775437da1 100755 --- a/tests/queries/0_stateless/02461_cancel_finish_race.sh +++ b/tests/queries/0_stateless/02461_cancel_finish_race.sh @@ -16,7 +16,7 @@ function thread_query() function thread_cancel() { while true; do - $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE 1 SYNC FORMAT Null"; + $CLICKHOUSE_CLIENT --query "KILL QUERY WHERE current_database = '$CLICKHOUSE_DATABASE' SYNC FORMAT Null"; done } From 7893ff0785acf4d0ab29a11d4bb70f182f07a6b5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 18 Oct 2022 08:43:22 +0200 Subject: [PATCH 153/252] Update woboq_codebrowser location Signed-off-by: Azat Khuzhin --- docker/test/codebrowser/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index ceed93c3ac7..b76b8234c81 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -36,10 +36,7 @@ RUN arch=${TARGETARCH:-amd64} \ # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b -# TODO: remove branch in a few weeks after merge, e.g. in May or June 2022 -# -# FIXME: update location of a repo -RUN git clone https://github.com/azat/woboq_codebrowser --branch llvm-15 \ +RUN git clone https://github.com/ClickHouse/woboq_codebrowser \ && cd woboq_codebrowser \ && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \ && ninja \ From c772dbb5d6b525a565490ec5084796a32322d8b6 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 18 Oct 2022 11:54:29 -0400 Subject: [PATCH 154/252] fix broken links and wrong colon char --- docs/zh/getting-started/example-datasets/menus.mdx | 14 +++++++------- .../example-datasets/uk-price-paid.mdx | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/zh/getting-started/example-datasets/menus.mdx b/docs/zh/getting-started/example-datasets/menus.mdx index 07452062d0b..10e9f2bd318 100644 --- a/docs/zh/getting-started/example-datasets/menus.mdx +++ b/docs/zh/getting-started/example-datasets/menus.mdx @@ -1,7 +1,7 @@ --- slug: /zh/getting-started/example-datasets/menus -sidebar_label:纽约公共图书馆“èœå•ä¸Šæœ‰ä»€ä¹ˆï¼Ÿâ€æ•°æ®é›† -title: "纽约公共图书馆“èœå•ä¸Šæœ‰ä»€ä¹ˆï¼Ÿâ€æ•°æ®é›†" +sidebar_label: '纽约公共图书馆“èœå•ä¸Šæœ‰ä»€ä¹ˆï¼Ÿâ€æ•°æ®é›†' +title: '纽约公共图书馆“èœå•ä¸Šæœ‰ä»€ä¹ˆï¼Ÿâ€æ•°æ®é›†' --- 该数æ®é›†ç”±çº½çº¦å…¬å…±å›¾ä¹¦é¦†åˆ›å»ºã€‚其中å«æœ‰æœ‰å…³é…’店ã€é¤é¦†å’Œå’–啡馆的èœå•ä¸Šçš„èœè‚´åŠå…¶ä»·æ ¼çš„历å²æ•°æ®ã€‚ @@ -38,7 +38,7 @@ tar xvf 2021_08_01_07_01_17_data.tgz ## 创建表 {#create-tables} -使用 [Decimal](../../sql-reference/data-types/decimal.md) æ•°æ®ç±»åž‹æ¥å­˜å‚¨ä»·æ ¼ã€‚ +使用 [Decimal](/docs/zh/sql-reference/data-types/decimal.md) æ•°æ®ç±»åž‹æ¥å­˜å‚¨ä»·æ ¼ã€‚ ```sql CREATE TABLE dish @@ -114,17 +114,17 @@ clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_defa clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --date_time_input_format best_effort --query "INSERT INTO menu_item FORMAT CSVWithNames" < MenuItem.csv ``` -因为数æ®ç”±å¸¦æœ‰æ ‡é¢˜çš„ CSV 表示,所以使用 [CSVWithNames](../../interfaces/formats.md#csvwithnames) æ ¼å¼ã€‚ +因为数æ®ç”±å¸¦æœ‰æ ‡é¢˜çš„ CSV 表示,所以使用 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) æ ¼å¼ã€‚ 因为åªæœ‰åŒå¼•å·ç”¨äºŽæ•°æ®å­—段,å•å¼•å·å¯ä»¥åœ¨å€¼å†…,所以ç¦ç”¨äº† `format_csv_allow_single_quotes` 以é¿å…æ··æ·† CSV 解æžå™¨ã€‚ -因为数æ®ä¸­æ²¡æœ‰ [NULL](../../sql-å‚考/syntax.md#null-literal) 值,所以ç¦ç”¨ [input_format_null_as_default](../../operations/settings/settings.md#settings-input-format-null-as-default)。ä¸ç„¶ ClickHouse 将会å°è¯•è§£æž `\N` åºåˆ—,并å¯èƒ½ä¸Žæ•°æ®ä¸­çš„ `\` 混淆。 +因为数æ®ä¸­æ²¡æœ‰ [NULL](/docs/zh/sql-reference/syntax.md#null-literal) 值,所以ç¦ç”¨ [input_format_null_as_default](/docs/zh/operations/settings/settings.md#settings-input-format-null-as-default)。ä¸ç„¶ ClickHouse 将会å°è¯•è§£æž `\N` åºåˆ—,并å¯èƒ½ä¸Žæ•°æ®ä¸­çš„ `\` 混淆。 -设置 [date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format) 以便解æžå„ç§æ ¼å¼çš„ [DateTime](../../sql-reference/data-types/datetime.md)字段。例如,识别åƒâ€œ2000-01-01 01:02â€è¿™æ ·æ²¡æœ‰ç§’æ•°çš„ ISO-8601 时间字符串。如果没有此设置,则仅å…许使用固定的 DateTime æ ¼å¼ã€‚ +设置 [date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format) 以便解æžå„ç§æ ¼å¼çš„ [DateTime](/docs/zh/sql-reference/data-types/datetime.md)字段。例如,识别åƒâ€œ2000-01-01 01:02â€è¿™æ ·æ²¡æœ‰ç§’æ•°çš„ ISO-8601 时间字符串。如果没有此设置,则仅å…许使用固定的 DateTime æ ¼å¼ã€‚ ## éžè§„èŒƒåŒ–æ•°æ® {#denormalize-data} -æ•°æ®ä»¥ [规范化形å¼] (https://en.wikipedia.org/wiki/Database_normalization#Normal_forms) 在多个表格中呈现。这æ„味ç€å¦‚果你想进行如查询èœå•é¡¹ä¸­çš„èœå这类的查询,则必须执行 [JOIN](../../sql-reference/statements/select/join.md#select-join)。在典型的分æžä»»åŠ¡ä¸­ï¼Œé¢„先处ç†è”接的数æ®ä»¥é¿å…æ¯æ¬¡éƒ½æ‰§è¡Œâ€œè”接â€ä¼šæ›´æœ‰æ•ˆçŽ‡ã€‚这中æ“作被称为“éžè§„范化â€æ•°æ®ã€‚ +æ•°æ®ä»¥ [规范化形å¼] (https://en.wikipedia.org/wiki/Database_normalization#Normal_forms) 在多个表格中呈现。这æ„味ç€å¦‚果你想进行如查询èœå•é¡¹ä¸­çš„èœå这类的查询,则必须执行 [JOIN](/docs/zh/sql-reference/statements/select/join.md#select-join)。在典型的分æžä»»åŠ¡ä¸­ï¼Œé¢„先处ç†è”接的数æ®ä»¥é¿å…æ¯æ¬¡éƒ½æ‰§è¡Œâ€œè”接â€ä¼šæ›´æœ‰æ•ˆçŽ‡ã€‚这中æ“作被称为“éžè§„范化â€æ•°æ®ã€‚ 我们将创建一个表“menu_item_denormâ€ï¼Œå…¶ä¸­å°†åŒ…å«æ‰€æœ‰è”接在一起的数æ®ï¼š diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index 3a14a3ce55d..058f0ae421a 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -42,9 +42,9 @@ ORDER BY (postcode1, postcode2, addr1, addr2); - å°†`postcode` 拆分为两个ä¸åŒçš„列 - `postcode1` å’Œ `postcode2`,因为这更适åˆå­˜å‚¨å’ŒæŸ¥è¯¢ - å°†`time` 字段转æ¢ä¸ºæ—¥æœŸä¸ºå®ƒåªåŒ…å« 00:00 时间 -- 忽略 [UUid](../../sql-reference/data-types/uuid.md) 字段,因为我们ä¸éœ€è¦å®ƒè¿›è¡Œåˆ†æž -- 使用 [transform](../../sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` å’Œ `duration` 转æ¢ä¸ºæ›´æ˜“读的 `Enum` 字段 -- å°† `is_new` 字段从å•å­—符串(` Y`/`N`) 到 [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1 +- 忽略 [UUid](/docs/zh/sql-reference/data-types/uuid.md) 字段,因为我们ä¸éœ€è¦å®ƒè¿›è¡Œåˆ†æž +- 使用 [transform](/docs/zh/sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` å’Œ `duration` 转æ¢ä¸ºæ›´æ˜“读的 `Enum` 字段 +- å°† `is_new` 字段从å•å­—符串(` Y`/`N`) 到 [UInt8](/docs/zh/sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1 - 删除最åŽä¸¤åˆ—,因为它们都具有相åŒçš„å€¼ï¼ˆå³ 0) `url` 函数将æ¥è‡ªç½‘络æœåŠ¡å™¨çš„æ•°æ®æµå¼ä¼ è¾“到 ClickHouse 表中。以下命令将 500 万行æ’入到 `uk_price_paid` 表中: @@ -342,7 +342,7 @@ LIMIT 100 ## 使用 Projection 加速查询 {#speedup-with-projections} -[Projections](../../sql-reference/statements/alter/projection.md) å…许我们通过存储任æ„æ ¼å¼çš„预先èšåˆçš„æ•°æ®æ¥æ高查询速度。在此示例中,我们创建了一个按年份ã€åœ°åŒºå’ŒåŸŽé•‡åˆ†ç»„的房产的平å‡ä»·æ ¼ã€æ€»ä»·æ ¼å’Œæ•°é‡çš„ Projection。在执行时,如果 ClickHouse 认为 Projection å¯ä»¥æ高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。 +[Projections](/docs/zh/sql-reference/statements/alter/projection.mdx) å…许我们通过存储任æ„æ ¼å¼çš„预先èšåˆçš„æ•°æ®æ¥æ高查询速度。在此示例中,我们创建了一个按年份ã€åœ°åŒºå’ŒåŸŽé•‡åˆ†ç»„的房产的平å‡ä»·æ ¼ã€æ€»ä»·æ ¼å’Œæ•°é‡çš„ Projection。在执行时,如果 ClickHouse 认为 Projection å¯ä»¥æ高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。 ### 构建投影{#build-projection} From 216d671cdf564d70aeba78591f80b0149c05870b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 18 Oct 2022 17:56:34 +0200 Subject: [PATCH 155/252] Update AlterCommands.cpp --- src/Storages/AlterCommands.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index d68252679a7..c5e503fbc99 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -757,6 +757,7 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) const auto * nullable_to = typeid_cast(to); if (nullable_to) { + /// Here we allow a conversion X -> Nullable(X) to me a metadata-only conversion. from = nullable_from ? nullable_from->getNestedType().get() : from; to = nullable_to->getNestedType().get(); continue; From 1d9d753167c8769e86963211a49c444f44ce0e84 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 18 Oct 2022 18:13:18 +0200 Subject: [PATCH 156/252] fix --- src/Storages/MergeTree/DataPartStorageOnDisk.cpp | 1 + src/Storages/MergeTree/DataPartStorageOnDisk.h | 4 +++- src/Storages/MergeTree/IDataPartStorage.h | 4 ---- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 ++++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 7b95224d28a..efc7710f640 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -408,6 +408,7 @@ void DataPartStorageOnDisk::clearDirectory( std::optional DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const { + assert(!broken || detached); String res; auto full_relative_path = fs::path(root_path); diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index c457e4a7291..d325049f056 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -52,10 +52,12 @@ public: MergeTreeDataPartState state, Poco::Logger * log) override; + /// Returns path to place detached part in or nullopt if we don't need to detach part (if it already exists and has the same content) std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const override; + /// Returns true if detached part already exists and has the same content (compares checksums.txt and the list of files) bool looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, std::optional & original_checksums_content, - std::optional & original_files_list) const override; + std::optional & original_files_list) const; void setRelativePath(const std::string & path) override; void onRename(const std::string & new_root_path, const std::string & new_part_dir) override; diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index b355cda1e19..03627938348 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -131,10 +131,6 @@ public: /// TODO: remove it. virtual std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const = 0; - virtual bool looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, - std::optional & original_checksums_content, - std::optional & original_files_list) const = 0; - /// Reset part directory, used for im-memory parts. /// TODO: remove it. virtual void setRelativePath(const std::string & path) = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 293b0d5c4c4..6331df416bf 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1484,6 +1484,7 @@ void IMergeTreeDataPart::remove() const std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const { + assert(!broken || detached); String res; /** If you need to detach a part, and directory into which we want to rename it already exists, @@ -1512,7 +1513,9 @@ std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const S void IMergeTreeDataPart::renameToDetached(const String & prefix, DataPartStorageBuilderPtr builder) const { - renameTo(*getRelativePathForDetachedPart(prefix, /* broken */ false), true, builder); + auto path_to_detach = getRelativePathForDetachedPart(prefix, /* broken */ false); + assert(path_to_detach); + renameTo(path_to_detach.value(), true, builder); part_is_probably_removed_from_disk = true; } From 0426c562f67307f597d543ae3f2848a386e199fb Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 18 Oct 2022 14:38:14 -0400 Subject: [PATCH 157/252] add mdx and jsx to list of doc files --- tests/ci/pr_info.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index dc016a7eed9..ef473d9225e 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -20,10 +20,12 @@ SKIP_MERGEABLE_CHECK_LABEL = "skip mergeable check" DIFF_IN_DOCUMENTATION_EXT = [ ".html", ".md", + ".mdx", ".yml", ".txt", ".css", ".js", + ".jsx", ".xml", ".ico", ".conf", From e76560619c67a16193439fc8208dcef5e1151390 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 18 Oct 2022 14:52:28 -0400 Subject: [PATCH 158/252] moved into dev/getting-started --- docs/en/development/browse-code.md | 14 -------------- docs/ru/development/browse-code.md | 14 -------------- docs/zh/development/browse-code.md | 13 ------------- 3 files changed, 41 deletions(-) delete mode 100644 docs/en/development/browse-code.md delete mode 100644 docs/ru/development/browse-code.md delete mode 100644 docs/zh/development/browse-code.md diff --git a/docs/en/development/browse-code.md b/docs/en/development/browse-code.md deleted file mode 100644 index 0d064cc9b0c..00000000000 --- a/docs/en/development/browse-code.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -slug: /en/development/browse-code -sidebar_label: Source Code Browser -sidebar_position: 72 -description: Various ways to browse and edit the source code ---- - -# Browse ClickHouse Source Code - -You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. - -Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. - -If you’re interested what IDE to use, we recommend CLion, QT Creator, VS Code and KDevelop (with caveats). You can use any favorite IDE. Vim and Emacs also count. diff --git a/docs/ru/development/browse-code.md b/docs/ru/development/browse-code.md deleted file mode 100644 index 640b1ac3693..00000000000 --- a/docs/ru/development/browse-code.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -slug: /ru/development/browse-code -sidebar_position: 72 -sidebar_label: "ÐÐ°Ð²Ð¸Ð³Ð°Ñ†Ð¸Ñ Ð¿Ð¾ коду ClickHouse" ---- - - -# ÐÐ°Ð²Ð¸Ð³Ð°Ñ†Ð¸Ñ Ð¿Ð¾ коду ClickHouse {#navigatsiia-po-kodu-clickhouse} - -Ð”Ð»Ñ Ð½Ð°Ð²Ð¸Ð³Ð°Ñ†Ð¸Ð¸ по коду онлайн доÑтупен **Woboq**, он раÑположен [здеÑÑŒ](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). Ð’ нём реализовано удобное перемещение между иÑходными файлами, ÑемантичеÑÐºÐ°Ñ Ð¿Ð¾Ð´Ñветка, подÑказки, индекÑÐ°Ñ†Ð¸Ñ Ð¸ поиÑк. Слепок кода обновлÑетÑÑ ÐµÐ¶ÐµÐ´Ð½ÐµÐ²Ð½Ð¾. - -Также вы можете проÑматривать иÑходники на [GitHub](https://github.com/ClickHouse/ClickHouse). - -ЕÑли вы интереÑуетеÑÑŒ, какую Ñреду разработки выбрать Ð´Ð»Ñ Ñ€Ð°Ð±Ð¾Ñ‚Ñ‹ Ñ ClickHouse, мы рекомендуем CLion, QT Creator, VSCode или KDevelop (Ñ Ð½ÐµÐºÐ¾Ñ‚Ð¾Ñ€Ñ‹Ð¼Ð¸ предоÑтережениÑми). Ð’Ñ‹ можете иÑпользовать Ñвою любимую Ñреду разработки, Vim и Emacs тоже ÑчитаютÑÑ. diff --git a/docs/zh/development/browse-code.md b/docs/zh/development/browse-code.md deleted file mode 100644 index 16382a94ed5..00000000000 --- a/docs/zh/development/browse-code.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -slug: /zh/development/browse-code -sidebar_position: 63 -sidebar_label: "\u6D4F\u89C8\u6E90\u4EE3\u7801" ---- - -# æµè§ˆClickHouseæºä»£ç  {#browse-clickhouse-source-code} - -您å¯ä»¥ä½¿ç”¨ **Woboq** 在线代ç æµè§ˆå™¨ [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它æ供了代ç å¯¼èˆªå’Œè¯­ä¹‰çªå‡ºæ˜¾ç¤ºã€æœç´¢å’Œç´¢å¼•ã€‚ 代ç å¿«ç…§æ¯å¤©æ›´æ–°ã€‚ - -此外,您还å¯ä»¥åƒå¾€å¸¸ä¸€æ ·æµè§ˆæºä»£ç  [GitHub](https://github.com/ClickHouse/ClickHouse) - -如果你希望了解哪ç§IDE较好,我们推è使用CLion,QT Creator,VS Codeå’ŒKDevelop(有注æ„事项)。 您å¯ä»¥ä½¿ç”¨ä»»ä½•æ‚¨å–œæ¬¢çš„IDE。 Vimå’ŒEmacs也å¯ä»¥ã€‚ From 65cd040d1565bb7b2a9ba515041c3a139d31a4f9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 18 Oct 2022 21:06:03 +0200 Subject: [PATCH 159/252] Better workaround for emitting .debug_aranges section Note, that this is just a syntastic change, that should not makes any difference (well the only difference is that now it supports gold and other links, since the option is handled by the plugin itself instead of the linker). Refs: https://reviews.llvm.org/D133092 Signed-off-by: Azat Khuzhin --- cmake/ld.lld.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/ld.lld.in b/cmake/ld.lld.in index 9736dab1bc3..78a264a0089 100755 --- a/cmake/ld.lld.in +++ b/cmake/ld.lld.in @@ -3,15 +3,15 @@ # This is a workaround for bug in llvm/clang, # that does not produce .debug_aranges with LTO # -# NOTE: this is a temporary solution, that should be removed once [1] will be -# resolved. +# NOTE: this is a temporary solution, that should be removed after upgrading to +# clang-16/llvm-16. # -# [1]: https://discourse.llvm.org/t/clang-does-not-produce-full-debug-aranges-section-with-thinlto/64898/8 +# Refs: https://reviews.llvm.org/D133092 # NOTE: only -flto=thin is supported. # NOTE: it is not possible to check was there -gdwarf-aranges initially or not. if [[ "$*" =~ -plugin-opt=thinlto ]]; then - exec "@LLD_PATH@" -mllvm -generate-arange-section "$@" + exec "@LLD_PATH@" -plugin-opt=-generate-arange-section "$@" else exec "@LLD_PATH@" "$@" fi From 436018260cf6cb79992e23b3bad24f81c066f606 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 18 Oct 2022 15:29:05 -0400 Subject: [PATCH 160/252] restore --- tests/ci/pr_info.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ef473d9225e..dc016a7eed9 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -20,12 +20,10 @@ SKIP_MERGEABLE_CHECK_LABEL = "skip mergeable check" DIFF_IN_DOCUMENTATION_EXT = [ ".html", ".md", - ".mdx", ".yml", ".txt", ".css", ".js", - ".jsx", ".xml", ".ico", ".conf", From 312e835b7e4d8885829cb14767aa6e2c9ee2a2ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Oct 2022 22:22:39 +0200 Subject: [PATCH 161/252] Fix flaky test --- ..._long_sessions_in_http_interface.reference | 29 +++- .../00463_long_sessions_in_http_interface.sh | 151 +++++++----------- 2 files changed, 90 insertions(+), 90 deletions(-) diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference index 53cdf1e9393..8d97a12a7f1 100644 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference @@ -1 +1,28 @@ -PASSED +Using non-existent session with the 'session_check' flag will throw exception: +1 +Using non-existent session without the 'session_check' flag will create a new session: +1 +1 +The 'session_timeout' parameter is checked for validity and for the maximum value: +1 +1 +1 +Valid cases are accepted: +1 +1 +1 +Sessions are local per user: +1 +Hello +World +And cannot be accessed for a non-existent user: +1 +The temporary tables created in a session are not accessible without entering this session: +0 +A session successfully expire after a timeout: +111 +A session successfully expire after a timeout and the session's temporary table shadows the permanent table: +HelloWorld +A session cannot be used by concurrent connections: +1 +1 diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index e9f486fbb73..9d9199b5f68 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -5,109 +5,82 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -request() { - local url="$1" - local select="$2" - ${CLICKHOUSE_CURL} --silent "$url" --data "$select" -} +echo "Using non-existent session with the 'session_check' flag will throw exception:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=nonexistent&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found' -create_temporary_table() { - local url="$1" - request "$url" "CREATE TEMPORARY TABLE temp (x String)" - request "$url" "INSERT INTO temp VALUES ('Hello'), ('World')" -} +echo "Using non-existent session without the 'session_check' flag will create a new session:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_1" --data-binary "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_1&session_check=0" --data-binary "SELECT 1" +echo "The 'session_timeout' parameter is checked for validity and for the maximum value:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_2&session_timeout=string" --data-binary "SELECT 1" | grep -c -F 'Invalid session timeout' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_2&session_timeout=3601" --data-binary "SELECT 1" | grep -c -F 'Maximum session timeout' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_2&session_timeout=-1" --data-binary "SELECT 1" | grep -c -F 'Invalid session timeout' -check() { - local url="$1" - local select="$2" - local output="$3" - local expected_result="$4" - local message="$5" - result=$(request "$url" "$select" | grep --count "$output") - if [ "$result" -ne "$expected_result" ]; then - echo "FAILED: $message" - exit 1 - fi -} +echo "Valid cases are accepted:" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_3&session_timeout=0" --data-binary "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_4&session_timeout=3600" --data-binary "SELECT 1" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_5&session_timeout=60" --data-binary "SELECT 1" +echo "Sessions are local per user:" +${CLICKHOUSE_CLIENT} --multiquery --query "DROP USER IF EXISTS test_00463; CREATE USER test_00463; GRANT ALL ON *.* TO test_00463;" -address=${CLICKHOUSE_HOST} -port=${CLICKHOUSE_PORT_HTTP} -url="${CLICKHOUSE_PORT_HTTP_PROTO}://$address:$port/" -session="?session_id=test_$$" # use PID for session ID -select="SELECT * FROM system.settings WHERE name = 'max_rows_to_read'" -select_from_temporary_table="SELECT * FROM temp ORDER BY x" -select_from_non_existent_table="SELECT * FROM no_such_table ORDER BY x" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6&session_timeout=600" --data-binary "CREATE TEMPORARY TABLE t (s String)" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "INSERT INTO t VALUES ('Hello')" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found' +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6&session_timeout=600" --data-binary "CREATE TEMPORARY TABLE t (s String)" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "INSERT INTO t VALUES ('World')" -check "$url?session_id=no_such_session_$$&session_check=1" "$select" "Exception.*Session not found" 1 "session_check=1 does not work." -check "$url$session&session_check=0" "$select" "Exception" 0 "session_check=0 does not work." +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" -request "$url""$session" "SET max_rows_to_read=7777777" +${CLICKHOUSE_CLIENT} --multiquery --query "DROP USER test_00463"; -check "$url$session&session_timeout=string" "$select" "Exception.*Invalid session timeout" 1 "Non-numeric value accepted as a timeout." -check "$url$session&session_timeout=3601" "$select" "Exception.*Maximum session timeout*" 1 "More then 3600 seconds accepted as a timeout." -check "$url$session&session_timeout=-1" "$select" "Exception.*Invalid session timeout" 1 "Negative timeout accepted." -check "$url$session&session_timeout=0" "$select" "Exception" 0 "Zero timeout not accepted." -check "$url$session&session_timeout=3600" "$select" "Exception" 0 "3600 second timeout not accepted." -check "$url$session&session_timeout=60" "$select" "Exception" 0 "60 second timeout not accepted." +echo "And cannot be accessed for a non-existent user:" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "SELECT * FROM t" | grep -c -F 'Exception' -check "$url""$session" "$select" "7777777" 1 "Failed to reuse session." -# Workaround here -# TODO: move the test to integration test or add readonly user to test environment -if [[ -z $(request "$url?user=readonly" "SELECT ''") ]]; then - # We have readonly user - check "$url$session&user=readonly&session_check=1" "$select" "Exception.*Session not found" 1 "Session is accessable for another user." -else - check "$url$session&user=readonly&session_check=1" "$select" "Exception.*Unknown user*" 1 "Session is accessable for unknown user." -fi +echo "The temporary tables created in a session are not accessible without entering this session:" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}" --data-binary "SELECT * FROM t" | grep -c -F 'Exception' -create_temporary_table "$url""$session" -check "$url""$session" "$select_from_temporary_table" "Hello" 1 "Failed to reuse a temporary table for session." - -check "$url?session_id=another_session_$$" "$select_from_temporary_table" "Exception.*Table .* doesn't exist." 1 "Temporary table is visible for another table." - - -( ( -cat </dev/null 2>/dev/null) & -sleep 1 -check "$url""$session" "$select" "Exception.*Session is locked" 1 "Double access to the same session." - - -session="?session_id=test_timeout_$$" - -create_temporary_table "$url$session&session_timeout=1" -check "$url$session&session_timeout=1" "$select_from_temporary_table" "Hello" 1 "Failed to reuse a temporary table for session." -sleep 3 -check "$url$session&session_check=1" "$select" "Exception.*Session not found" 1 "Session did not expire on time." - -create_temporary_table "$url$session&session_timeout=2" -for _ in $(seq 1 3); do - check "$url$session&session_timeout=2" "$select_from_temporary_table" "Hello" 1 "Session expired too early." - sleep 1 +echo "A session successfully expire after a timeout:" +# An infinite loop is required to make the test reliable. We will check that the timeout corresponds to the observed time at least once +while true +do + ( + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_timeout=1" --data-binary "SELECT 1" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_check=1" --data-binary "SELECT 1" + sleep 3 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found' + ) | tr -d '\n' | grep -F '111' && break || sleep 1 done -sleep 3 -check "$url$session&session_check=1" "$select" "Exception.*Session not found" 1 "Session did not expire on time." -create_temporary_table "$url$session&session_timeout=2" -for _ in $(seq 1 5); do - check "$url$session&session_timeout=2" "$select_from_non_existent_table" "Exception.*Table .* doesn't exist." 1 "Session expired too early." - sleep 1 +echo "A session successfully expire after a timeout and the session's temporary table shadows the permanent table:" +# An infinite loop is required to make the test reliable. We will check that the timeout corresponds to the observed time at least once +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (s String) ENGINE = Memory; INSERT INTO t VALUES ('World');" +while true +do + ( + ${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8&session_timeout=1" --data-binary "CREATE TEMPORARY TABLE t (s String)" + ${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8" --data-binary "INSERT INTO t VALUES ('Hello')" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8" --data-binary "SELECT * FROM t" + sleep 3 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_8" --data-binary "SELECT * FROM t" + ) | tr -d '\n' | grep -F 'HelloWorld' && break || sleep 1 done -check "$url$session&session_timeout=2" "$select_from_temporary_table" "Hello" 1 "Session expired too early. Failed to update timeout in case of exceptions." -sleep 4 -check "$url$session&session_check=1" "$select" "Exception.*Session not found" 1 "Session did not expire on time." +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE t" +echo "A session cannot be used by concurrent connections:" -echo "PASSED" +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT count() FROM system.numbers" >/dev/null & + +# An infinite loop is required to make the test reliable. We will ensure that at least once the query on the line above has started before this check +while true +do + ${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.processes WHERE query_id = '${CLICKHOUSE_DATABASE}_9'" | grep -c -F '1' && break || sleep 1 +done + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT 1" | grep -c -F 'Session is locked' +${CLICKHOUSE_CLIENT} --multiquery --query "KILL QUERY WHERE query_id = '${CLICKHOUSE_DATABASE}_9' SYNC FORMAT Null"; +wait From 46917c023cd793bb2a479e7dd5655b6da7f3e649 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 00:25:21 +0300 Subject: [PATCH 162/252] Update AlterCommands.cpp --- src/Storages/AlterCommands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index c5e503fbc99..c1e7cefd19e 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -757,7 +757,7 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) const auto * nullable_to = typeid_cast(to); if (nullable_to) { - /// Here we allow a conversion X -> Nullable(X) to me a metadata-only conversion. + /// Here we allow a conversion X -> Nullable(X) to make a metadata-only conversion. from = nullable_from ? nullable_from->getNestedType().get() : from; to = nullable_to->getNestedType().get(); continue; From 1365105bc4f65c4d83f225a49fd6fc2c986e429c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 12 Oct 2022 16:58:13 +0200 Subject: [PATCH 163/252] Implement backup to S3 --- src/Backups/BackupFactory.cpp | 2 + src/Backups/BackupIO_S3.cpp | 375 ++++++++++++++++++ src/Backups/BackupIO_S3.h | 92 +++++ src/Backups/BackupImpl.cpp | 1 + src/Backups/registerBackupEngineS3.cpp | 129 ++++++ src/Disks/DiskDecorator.cpp | 5 + src/Disks/DiskDecorator.h | 1 + src/Disks/IDisk.h | 8 + .../ObjectStorages/DiskObjectStorage.cpp | 5 + src/Disks/ObjectStorages/DiskObjectStorage.h | 2 + .../configs/disk_s3.xml | 47 +++ .../configs/named_collection_s3_backups.xml | 9 + .../configs/storage_conf.xml | 42 -- .../test_backup_restore_s3/test.py | 127 ++++-- 14 files changed, 763 insertions(+), 82 deletions(-) create mode 100644 src/Backups/BackupIO_S3.cpp create mode 100644 src/Backups/BackupIO_S3.h create mode 100644 src/Backups/registerBackupEngineS3.cpp create mode 100644 tests/integration/test_backup_restore_s3/configs/disk_s3.xml create mode 100644 tests/integration/test_backup_restore_s3/configs/named_collection_s3_backups.xml delete mode 100644 tests/integration/test_backup_restore_s3/configs/storage_conf.xml diff --git a/src/Backups/BackupFactory.cpp b/src/Backups/BackupFactory.cpp index a23cc70658b..7c870737b1d 100644 --- a/src/Backups/BackupFactory.cpp +++ b/src/Backups/BackupFactory.cpp @@ -32,10 +32,12 @@ void BackupFactory::registerBackupEngine(const String & engine_name, const Creat } void registerBackupEnginesFileAndDisk(BackupFactory &); +void registerBackupEngineS3(BackupFactory &); void registerBackupEngines(BackupFactory & factory) { registerBackupEnginesFileAndDisk(factory); + registerBackupEngineS3(factory); } BackupFactory::BackupFactory() diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp new file mode 100644 index 00000000000..be509824261 --- /dev/null +++ b/src/Backups/BackupIO_S3.cpp @@ -0,0 +1,375 @@ +#include + +#if USE_AWS_S3 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace fs = std::filesystem; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int S3_ERROR; + extern const int LOGICAL_ERROR; +} + +namespace +{ + std::shared_ptr + makeS3Client(const S3::URI & s3_uri, const String & access_key_id, const String & secret_access_key, const ContextPtr & context) + { + auto settings = context->getStorageS3Settings().getSettings(s3_uri.uri.toString()); + + Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key); + HeaderCollection headers; + if (access_key_id.empty()) + { + credentials = Aws::Auth::AWSCredentials(settings.auth_settings.access_key_id, settings.auth_settings.secret_access_key); + headers = settings.auth_settings.headers; + } + + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( + settings.auth_settings.region, + context->getRemoteHostFilter(), + context->getGlobalContext()->getSettingsRef().s3_max_redirects, + context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, + /* for_disk_s3 = */ false); + + client_configuration.endpointOverride = s3_uri.endpoint; + client_configuration.maxConnections = context->getSettingsRef().s3_max_connections; + /// Increase connect timeout + client_configuration.connectTimeoutMs = 10 * 1000; + /// Requests in backups can be extremely long, set to one hour + client_configuration.requestTimeoutMs = 60 * 60 * 1000; + + return S3::ClientFactory::instance().create( + client_configuration, + s3_uri.is_virtual_hosted_style, + credentials.GetAWSAccessKeyId(), + credentials.GetAWSSecretKey(), + settings.auth_settings.server_side_encryption_customer_key_base64, + std::move(headers), + settings.auth_settings.use_environment_credentials.value_or( + context->getConfigRef().getBool("s3.use_environment_credentials", false)), + settings.auth_settings.use_insecure_imds_request.value_or( + context->getConfigRef().getBool("s3.use_insecure_imds_request", false))); + } + + Aws::Vector listObjects(Aws::S3::S3Client & client, const S3::URI & s3_uri, const String & file_name) + { + Aws::S3::Model::ListObjectsRequest request; + request.SetBucket(s3_uri.bucket); + request.SetPrefix(fs::path{s3_uri.key} / file_name); + request.SetMaxKeys(1); + auto outcome = client.ListObjects(request); + if (!outcome.IsSuccess()) + throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + return outcome.GetResult().GetContents(); + } +} + + +BackupReaderS3::BackupReaderS3( + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) + : s3_uri(s3_uri_) + , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) + , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries) + , read_settings(context_->getReadSettings()) +{ +} + +DataSourceDescription BackupReaderS3::getDataSourceDescription() const +{ + return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false}; +} + + +BackupReaderS3::~BackupReaderS3() = default; + +bool BackupReaderS3::fileExists(const String & file_name) +{ + return !listObjects(*client, s3_uri, file_name).empty(); +} + +UInt64 BackupReaderS3::getFileSize(const String & file_name) +{ + auto objects = listObjects(*client, s3_uri, file_name); + if (objects.empty()) + throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist"); + return objects[0].GetSize(); +} + +std::unique_ptr BackupReaderS3::readFile(const String & file_name) +{ + return std::make_unique( + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings); +} + + +BackupWriterS3::BackupWriterS3( + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) + : s3_uri(s3_uri_) + , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) + , max_single_read_retries(context_->getSettingsRef().s3_max_single_read_retries) + , read_settings(context_->getReadSettings()) + , rw_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).rw_settings) +{ + rw_settings.updateFromSettingsIfEmpty(context_->getSettingsRef()); +} + +DataSourceDescription BackupWriterS3::getDataSourceDescription() const +{ + return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false}; +} + +bool BackupWriterS3::supportNativeCopy(DataSourceDescription data_source_description) const +{ + return getDataSourceDescription() == data_source_description; +} + + +void BackupWriterS3::copyObjectImpl( + const String & src_bucket, + const String & src_key, + const String & dst_bucket, + const String & dst_key, + std::optional head, + std::optional metadata) const +{ + Aws::S3::Model::CopyObjectRequest request; + request.SetCopySource(src_bucket + "/" + src_key); + request.SetBucket(dst_bucket); + request.SetKey(dst_key); + if (metadata) + { + request.SetMetadata(*metadata); + request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); + } + + auto outcome = client->CopyObject(request); + + if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge") + { // Can't come here with MinIO, MinIO allows single part upload for large objects. + copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata); + return; + } + + if (!outcome.IsSuccess()) + throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + +} + +Aws::S3::Model::HeadObjectOutcome BackupWriterS3::requestObjectHeadData(const std::string & bucket_from, const std::string & key) const +{ + Aws::S3::Model::HeadObjectRequest request; + request.SetBucket(bucket_from); + request.SetKey(key); + + return client->HeadObject(request); +} + +void BackupWriterS3::copyObjectMultipartImpl( + const String & src_bucket, + const String & src_key, + const String & dst_bucket, + const String & dst_key, + std::optional head, + std::optional metadata) const +{ + if (!head) + head = requestObjectHeadData(src_bucket, src_key).GetResult(); + + size_t size = head->GetContentLength(); + + String multipart_upload_id; + + { + Aws::S3::Model::CreateMultipartUploadRequest request; + request.SetBucket(dst_bucket); + request.SetKey(dst_key); + if (metadata) + request.SetMetadata(*metadata); + + auto outcome = client->CreateMultipartUpload(request); + + if (!outcome.IsSuccess()) + throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + + multipart_upload_id = outcome.GetResult().GetUploadId(); + } + + std::vector part_tags; + + size_t upload_part_size = rw_settings.min_upload_part_size; + for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size) + { + Aws::S3::Model::UploadPartCopyRequest part_request; + part_request.SetCopySource(src_bucket + "/" + src_key); + part_request.SetBucket(dst_bucket); + part_request.SetKey(dst_key); + part_request.SetUploadId(multipart_upload_id); + part_request.SetPartNumber(part_number); + part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); + + auto outcome = client->UploadPartCopy(part_request); + if (!outcome.IsSuccess()) + { + Aws::S3::Model::AbortMultipartUploadRequest abort_request; + abort_request.SetBucket(dst_bucket); + abort_request.SetKey(dst_key); + abort_request.SetUploadId(multipart_upload_id); + client->AbortMultipartUpload(abort_request); + // In error case we throw exception later with first error from UploadPartCopy + } + if (!outcome.IsSuccess()) + throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + + auto etag = outcome.GetResult().GetCopyPartResult().GetETag(); + part_tags.push_back(etag); + } + + { + Aws::S3::Model::CompleteMultipartUploadRequest req; + req.SetBucket(dst_bucket); + req.SetKey(dst_key); + req.SetUploadId(multipart_upload_id); + + Aws::S3::Model::CompletedMultipartUpload multipart_upload; + for (size_t i = 0; i < part_tags.size(); ++i) + { + Aws::S3::Model::CompletedPart part; + multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); + } + + req.SetMultipartUpload(multipart_upload); + + auto outcome = client->CompleteMultipartUpload(req); + + if (!outcome.IsSuccess()) + throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + } +} + +void BackupWriterS3::copyFileNative(DiskPtr from_disk, const String & file_name_from, const String & file_name_to) +{ + if (!from_disk) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk"); + + auto objects = from_disk->getStorageObjects(file_name_from); + if (objects.size() > 1) + { + copyFileThroughBuffer(from_disk->readFile(file_name_from), file_name_to); + } + else + { + auto object_storage = from_disk->getObjectStorage(); + std::string source_bucket = object_storage->getObjectsNamespace(); + auto file_path = fs::path(s3_uri.key) / file_name_to; + + auto head = requestObjectHeadData(source_bucket, objects[0].absolute_path).GetResult(); + static constexpr int64_t multipart_upload_threashold = 5UL * 1024 * 1024 * 1024; + if (head.GetContentLength() >= multipart_upload_threashold) + { + copyObjectMultipartImpl( + source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head); + } + else + { + copyObjectImpl( + source_bucket, objects[0].absolute_path, s3_uri.bucket, file_path, head); + } + } +} + + +BackupWriterS3::~BackupWriterS3() = default; + +bool BackupWriterS3::fileExists(const String & file_name) +{ + return !listObjects(*client, s3_uri, file_name).empty(); +} + +UInt64 BackupWriterS3::getFileSize(const String & file_name) +{ + auto objects = listObjects(*client, s3_uri, file_name); + if (objects.empty()) + throw Exception(ErrorCodes::S3_ERROR, "Object {} must exist"); + return objects[0].GetSize(); +} + +bool BackupWriterS3::fileContentsEqual(const String & file_name, const String & expected_file_contents) +{ + if (listObjects(*client, s3_uri, file_name).empty()) + return false; + + try + { + auto in = std::make_unique( + client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, max_single_read_retries, read_settings); + String actual_file_contents(expected_file_contents.size(), ' '); + return (in->read(actual_file_contents.data(), actual_file_contents.size()) == actual_file_contents.size()) + && (actual_file_contents == expected_file_contents) && in->eof(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + return false; + } +} + +std::unique_ptr BackupWriterS3::writeFile(const String & file_name) +{ + return std::make_unique( + client, + s3_uri.bucket, + fs::path(s3_uri.key) / file_name, + rw_settings, + std::nullopt, + DBMS_DEFAULT_BUFFER_SIZE, + threadPoolCallbackRunner(IOThreadPool::get(), "BackupWriterS3")); +} + +void BackupWriterS3::removeFiles(const Strings & file_names) +{ + /// One call of DeleteObjects() cannot remove more than 1000 keys. + size_t chunk_size_limit = 1000; + + size_t current_position = 0; + while (current_position < file_names.size()) + { + std::vector current_chunk; + for (; current_position < file_names.size() && current_chunk.size() < chunk_size_limit; ++current_position) + { + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(fs::path(s3_uri.key) / file_names[current_position]); + current_chunk.push_back(obj); + } + + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(current_chunk); + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(s3_uri.bucket); + request.SetDelete(delkeys); + + auto outcome = client->DeleteObjects(request); + if (!outcome.IsSuccess()) + throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + } +} + +} + +#endif diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h new file mode 100644 index 00000000000..471ddcc06e6 --- /dev/null +++ b/src/Backups/BackupIO_S3.h @@ -0,0 +1,92 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/// Represents a backup stored to AWS S3. +class BackupReaderS3 : public IBackupReader +{ +public: + BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_); + ~BackupReaderS3() override; + + bool fileExists(const String & file_name) override; + UInt64 getFileSize(const String & file_name) override; + std::unique_ptr readFile(const String & file_name) override; + DataSourceDescription getDataSourceDescription() const override; + +private: + S3::URI s3_uri; + std::shared_ptr client; + UInt64 max_single_read_retries; + ReadSettings read_settings; +}; + + +class BackupWriterS3 : public IBackupWriter +{ +public: + BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_); + ~BackupWriterS3() override; + + bool fileExists(const String & file_name) override; + UInt64 getFileSize(const String & file_name) override; + bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override; + std::unique_ptr writeFile(const String & file_name) override; + void removeFiles(const Strings & file_names) override; + + DataSourceDescription getDataSourceDescription() const override; + bool supportNativeCopy(DataSourceDescription data_source_description) const override; + void copyFileNative(DiskPtr from_disk, const String & file_name_from, const String & file_name_to) override; + +private: + + Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const; + + void copyObjectImpl( + const String & src_bucket, + const String & src_key, + const String & dst_bucket, + const String & dst_key, + std::optional head = std::nullopt, + std::optional metadata = std::nullopt) const; + + void copyObjectMultipartImpl( + const String & src_bucket, + const String & src_key, + const String & dst_bucket, + const String & dst_key, + std::optional head = std::nullopt, + std::optional metadata = std::nullopt) const; + + S3::URI s3_uri; + std::shared_ptr client; + UInt64 max_single_read_retries; + ReadSettings read_settings; + S3Settings::ReadWriteSettings rw_settings; +}; + +} + +#endif diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 8b648af44ec..2d58e993364 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -455,6 +455,7 @@ void BackupImpl::createLockFile() assert(uuid); auto out = writer->writeFile(lock_file_name); writeUUIDText(*uuid, *out); + out->finalize(); } bool BackupImpl::checkLockFile(bool throw_if_failed) const diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp new file mode 100644 index 00000000000..4d628e57b5c --- /dev/null +++ b/src/Backups/registerBackupEngineS3.cpp @@ -0,0 +1,129 @@ +#include "config.h" + +#include +#include + +#if USE_AWS_S3 +#include +#include +#include +#include +#include +#include +#endif + + +namespace DB +{ +namespace fs = std::filesystem; + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int SUPPORT_IS_DISABLED; +} + +#if USE_AWS_S3 +namespace +{ + String removeFileNameFromURL(String & url) + { + Poco::URI url2{url}; + String path = url2.getPath(); + size_t slash_pos = path.find_last_of('/'); + String file_name = path.substr(slash_pos + 1); + path.resize(slash_pos + 1); + url2.setPath(path); + url = url2.toString(); + return file_name; + } +} +#endif + + +void registerBackupEngineS3(BackupFactory & factory) +{ + auto creator_fn = []([[maybe_unused]] const BackupFactory::CreateParams & params) -> std::unique_ptr + { +#if USE_AWS_S3 + String backup_name = params.backup_info.toString(); + const String & id_arg = params.backup_info.id_arg; + const auto & args = params.backup_info.args; + + String s3_uri, access_key_id, secret_access_key; + + if (!id_arg.empty()) + { + const auto & config = params.context->getConfigRef(); + auto config_prefix = "named_collections." + id_arg; + + if (!config.has(config_prefix)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg); + + s3_uri = config.getString(config_prefix + ".url"); + access_key_id = config.getString(config_prefix + ".access_key_id", ""); + secret_access_key = config.getString(config_prefix + ".secret_access_key", ""); + + if (config.has(config_prefix + ".filename")) + s3_uri = fs::path(s3_uri) / config.getString(config_prefix + ".filename"); + + if (args.size() > 1) + throw Exception( + "Backup S3 requires 1 or 2 arguments: named_collection, [filename]", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (args.size() == 1) + s3_uri = fs::path(s3_uri) / args[0].safeGet(); + } + else + { + if ((args.size() != 1) && (args.size() != 3)) + throw Exception( + "Backup S3 requires 1 or 3 arguments: url, [access_key_id, secret_access_key]", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + s3_uri = args[0].safeGet(); + if (args.size() >= 3) + { + access_key_id = args[1].safeGet(); + secret_access_key = args[2].safeGet(); + } + } + + BackupImpl::ArchiveParams archive_params; + if (hasRegisteredArchiveFileExtension(s3_uri)) + { + if (params.is_internal_backup) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled"); + + archive_params.archive_name = removeFileNameFromURL(s3_uri); + archive_params.compression_method = params.compression_method; + archive_params.compression_level = params.compression_level; + archive_params.password = params.password; + } + else + { + if (!params.password.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Password is not applicable, backup cannot be encrypted"); + } + + if (params.open_mode == IBackup::OpenMode::READ) + { + auto reader = std::make_shared(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context); + return std::make_unique(backup_name, archive_params, params.base_backup_info, reader, params.context); + } + else + { + auto writer = std::make_shared(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context); + return std::make_unique(backup_name, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid); + } +#else + throw Exception("S3 support is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif + }; + + factory.registerBackupEngine("S3", creator_fn); +} + +} diff --git a/src/Disks/DiskDecorator.cpp b/src/Disks/DiskDecorator.cpp index 73540aaa0ab..af17289c8af 100644 --- a/src/Disks/DiskDecorator.cpp +++ b/src/Disks/DiskDecorator.cpp @@ -241,6 +241,11 @@ DiskObjectStoragePtr DiskDecorator::createDiskObjectStorage() return delegate->createDiskObjectStorage(); } +ObjectStoragePtr DiskDecorator::getObjectStorage() +{ + return delegate->getObjectStorage(); +} + DiskPtr DiskDecorator::getNestedDisk() const { if (const auto * decorator = dynamic_cast(delegate.get())) diff --git a/src/Disks/DiskDecorator.h b/src/Disks/DiskDecorator.h index dcd12ab4bbf..25278f905ba 100644 --- a/src/Disks/DiskDecorator.h +++ b/src/Disks/DiskDecorator.h @@ -89,6 +89,7 @@ public: void getRemotePathsRecursive(const String & path, std::vector & paths_map) override { return delegate->getRemotePathsRecursive(path, paths_map); } DiskObjectStoragePtr createDiskObjectStorage() override; + ObjectStoragePtr getObjectStorage() override; NameSet getCacheLayersNames() const override { return delegate->getCacheLayersNames(); } MetadataStoragePtr getMetadataStorage() override { return delegate->getMetadataStorage(); } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index ba843235345..4a7be740ccf 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -366,6 +366,14 @@ public: /// Return current disk revision. virtual UInt64 getRevision() const { return 0; } + virtual ObjectStoragePtr getObjectStorage() + { + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Method getObjectStorage() is not implemented for disk type: {}", + getDataSourceDescription().type); + } + /// Create disk object storage according to disk type. /// For example for DiskLocal create DiskObjectStorage(LocalObjectStorage), /// for DiskObjectStorage create just a copy. diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index fb13ed7eec8..8814d12d6eb 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -82,6 +82,11 @@ DiskTransactionPtr DiskObjectStorage::createTransaction() return std::make_shared(*this); } +ObjectStoragePtr DiskObjectStorage::getObjectStorage() +{ + return object_storage; +} + DiskTransactionPtr DiskObjectStorage::createObjectStorageTransaction() { return std::make_shared( diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 14fb84d7a15..333fcb258e4 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -166,6 +166,8 @@ public: UInt64 getRevision() const override; + ObjectStoragePtr getObjectStorage() override; + DiskObjectStoragePtr createDiskObjectStorage() override; bool supportsCache() const override; diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml new file mode 100644 index 00000000000..c1fd059bc67 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/disk_s3.xml @@ -0,0 +1,47 @@ + + + + + + s3 + http://minio1:9001/root/data/disks/disk_s3/ + minio + minio123 + + + s3 + http://minio1:9001/root2/data/disks/disk_s3/ + minio + minio123 + + + s3_plain + http://minio1:9001/root/data/disks/disk_s3_plain/ + minio + minio123 + 33554432 + + + + + +
+ disk_s3 +
+
+
+ + +
+ disk_s3_other_bucket +
+
+
+
+
+ + default + disk_s3 + disk_s3_plain + +
diff --git a/tests/integration/test_backup_restore_s3/configs/named_collection_s3_backups.xml b/tests/integration/test_backup_restore_s3/configs/named_collection_s3_backups.xml new file mode 100644 index 00000000000..7a9d5effede --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/named_collection_s3_backups.xml @@ -0,0 +1,9 @@ + + + + http://minio1:9001/root/data/backups + minio + minio123 + + + \ No newline at end of file diff --git a/tests/integration/test_backup_restore_s3/configs/storage_conf.xml b/tests/integration/test_backup_restore_s3/configs/storage_conf.xml deleted file mode 100644 index 0402be720c4..00000000000 --- a/tests/integration/test_backup_restore_s3/configs/storage_conf.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - s3 - http://minio1:9001/root/data/ - minio - minio123 - 33554432 - - - s3_plain - http://minio1:9001/root/data/ - minio - minio123 - 33554432 - - - local - / - - - - - -
- s3 -
-
-
-
-
- - - default - - s3 - s3_plain - - /backups/ - -
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index e18b3800fc0..617c14d6736 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -1,65 +1,40 @@ -#!/usr/bin/env python3 -# pylint: disable=unused-argument - import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", - main_configs=["configs/storage_conf.xml"], + main_configs=["configs/disk_s3.xml", "configs/named_collection_s3_backups.xml"], with_minio=True, ) -@pytest.fixture(scope="module") +@pytest.fixture(scope="module", autouse=True) def start_cluster(): try: cluster.start() - yield cluster + yield finally: cluster.shutdown() -@pytest.mark.parametrize( - "storage_policy,to_disk", - [ - pytest.param( - "default", - "default", - id="from_local_to_local", - ), - pytest.param( - "s3", - "default", - id="from_s3_to_local", - ), - pytest.param( - "default", - "s3", - id="from_local_to_s3", - ), - pytest.param( - "s3", - "s3_plain", - id="from_s3_to_s3_plain", - ), - pytest.param( - "default", - "s3_plain", - id="from_local_to_s3_plain", - ), - ], -) -def test_backup_restore(start_cluster, storage_policy, to_disk): - backup_name = storage_policy + "_" + to_disk +backup_id_counter = 0 + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"backup{backup_id_counter}" + + +def check_backup_and_restore(storage_policy, backup_destination): node.query( f""" DROP TABLE IF EXISTS data NO DELAY; CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}'; INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT 1000; - BACKUP TABLE data TO Disk('{to_disk}', '{backup_name}'); - RESTORE TABLE data AS data_restored FROM Disk('{to_disk}', '{backup_name}'); + BACKUP TABLE data TO {backup_destination}; + RESTORE TABLE data AS data_restored FROM {backup_destination}; SELECT throwIf( (SELECT groupArray(tuple(*)) FROM data) != (SELECT groupArray(tuple(*)) FROM data_restored), @@ -69,3 +44,75 @@ def test_backup_restore(start_cluster, storage_policy, to_disk): DROP TABLE data_restored NO DELAY; """ ) + + +@pytest.mark.parametrize( + "storage_policy, to_disk", + [ + pytest.param( + "default", + "default", + id="from_local_to_local", + ), + pytest.param( + "policy_s3", + "default", + id="from_s3_to_local", + ), + pytest.param( + "default", + "disk_s3", + id="from_local_to_s3", + ), + pytest.param( + "policy_s3", + "disk_s3_plain", + id="from_s3_to_s3_plain", + ), + pytest.param( + "default", + "disk_s3_plain", + id="from_local_to_s3_plain", + ), + ], +) +def test_backup_to_disk(storage_policy, to_disk): + backup_name = new_backup_name() + backup_destination = f"Disk('{to_disk}', '{backup_name}')" + check_backup_and_restore(storage_policy, backup_destination) + + +def test_backup_to_s3(): + storage_policy = "default" + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + check_backup_and_restore(storage_policy, backup_destination) + + +def test_backup_to_s3_named_collection(): + storage_policy = "default" + backup_name = new_backup_name() + backup_destination = f"S3(named_collection_s3_backups, '{backup_name}')" + check_backup_and_restore(storage_policy, backup_destination) + + +def test_backup_to_s3_native_copy(): + storage_policy = "policy_s3" + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + check_backup_and_restore(storage_policy, backup_destination) + assert node.contains_in_log("using native copy") + + +def test_backup_to_s3_other_bucket_native_copy(): + storage_policy = "policy_s3_other_bucket" + backup_name = new_backup_name() + backup_destination = ( + f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" + ) + check_backup_and_restore(storage_policy, backup_destination) + assert node.contains_in_log("using native copy") From e45c800cdf51827b834b32062cbcd30bd9c39f57 Mon Sep 17 00:00:00 2001 From: Boris Kuschel Date: Tue, 18 Oct 2022 17:08:53 -0500 Subject: [PATCH 164/252] Fix power8+ support add power9+ support --- cmake/cpu_features.cmake | 8 +++++++- contrib/rocksdb-cmake/CMakeLists.txt | 22 ++++++---------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 6707d703372..7cba7c7548d 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -61,8 +61,14 @@ elseif (ARCH_AARCH64) endif () elseif (ARCH_PPC64LE) + # By Default, build for power8 and up, allow building for power9 and up # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC - set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") + option (POWER9 "Build for Power 9 CPU and above" 0) + if(POWER9) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power9 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") + else () + set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") + endif () elseif (ARCH_AMD64) option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1) diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index b9dd2558348..466adf6aff0 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -78,23 +78,13 @@ endif() include(CheckCCompilerFlag) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") - CHECK_C_COMPILER_FLAG("-mcpu=power9" HAS_POWER9) - if(HAS_POWER9) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power9 -mtune=power9") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power9 -mtune=power9") + if(POWER9) + set(HAS_POWER9 1) + set(HAS_ALTIVEC 1) else() - CHECK_C_COMPILER_FLAG("-mcpu=power8" HAS_POWER8) - if(HAS_POWER8) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8 -mtune=power8") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8 -mtune=power8") - endif(HAS_POWER8) - endif(HAS_POWER9) - CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC) - if(HAS_ALTIVEC) - message(STATUS " HAS_ALTIVEC yes") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec") - endif(HAS_ALTIVEC) + set(HAS_POWER8 1) + set(HAS_ALTIVEC 1) + endif(POWER9) endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64") if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64") From 5777999504580bf11a41ba13edf71299adb098e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 02:20:22 +0300 Subject: [PATCH 165/252] Update 00463_long_sessions_in_http_interface.sh --- .../queries/0_stateless/00463_long_sessions_in_http_interface.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index 9d9199b5f68..9bf8a0c297a 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash # Tags: long, no-parallel +# shellcheck disable=SC2015 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 2e3a111c2981204eb518f065cc9638c1a415f647 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 03:26:22 +0200 Subject: [PATCH 166/252] Fix UBSan in Modified Julian Day --- src/Functions/GregorianDate.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index ef2b9e6eede..3a8f0c52dc3 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -38,7 +38,7 @@ namespace DB * integral type which should be at least 32 bits wide, and * should preferably signed. */ - explicit GregorianDate(is_integer auto mjd); + explicit GregorianDate(is_integer auto modified_julian_day); /** Convert to Modified Julian Day. The type T is an integral type * which should be at least 32 bits wide, and should preferably @@ -89,7 +89,7 @@ namespace DB * integral type which should be at least 32 bits wide, and * should preferably signed. */ - explicit OrdinalDate(is_integer auto mjd); + explicit OrdinalDate(is_integer auto modified_julian_day); /** Convert to Modified Julian Day. The type T is an integral * type which should be at least 32 bits wide, and should @@ -257,9 +257,9 @@ namespace DB } template - GregorianDate::GregorianDate(is_integer auto mjd) + GregorianDate::GregorianDate(is_integer auto modified_julian_day) { - const OrdinalDate ord(mjd); + const OrdinalDate ord(modified_julian_day); const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear()); year_ = ord.year(); month_ = md.month(); @@ -329,9 +329,17 @@ namespace DB } template - OrdinalDate::OrdinalDate(is_integer auto mjd) + OrdinalDate::OrdinalDate(is_integer auto modified_julian_day) { - const auto a = mjd + 678575; + /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). + + if (modified_julian_day < -678941) + modified_julian_day = -678941; + + if (modified_julian_day > 2973119) + modified_julian_day = 2973119; + + const auto a = modified_julian_day + 678575; const auto quad_cent = gd::div(a, 146097); const auto b = gd::mod(a, 146097); const auto cent = gd::min(gd::div(b, 36524), 3); @@ -339,8 +347,9 @@ namespace DB const auto quad = gd::div(c, 1461); const auto d = gd::mod(c, 1461); const auto y = gd::min(gd::div(d, 365), 3); + day_of_year_ = d - y * 365 + 1; - year_ = quad_cent * 400 + cent * 100 + quad * 4 + y + 1; + year_ = quad_cent * 400 + cent * 100 + quad * 4 + y + 1; } template From e5d1a1b293ed9616e11e82e6689816b04610c6b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 03:34:16 +0200 Subject: [PATCH 167/252] Add a test --- src/Functions/GregorianDate.h | 10 ++++++---- .../0_stateless/02463_julian_day_ubsan.reference | 1 + tests/queries/0_stateless/02463_julian_day_ubsan.sql | 1 + 3 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02463_julian_day_ubsan.reference create mode 100644 tests/queries/0_stateless/02463_julian_day_ubsan.sql diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index 3a8f0c52dc3..da6a24268f7 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -333,11 +333,13 @@ namespace DB { /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). - if (modified_julian_day < -678941) - modified_julian_day = -678941; + if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) + if (modified_julian_day < -678941) + modified_julian_day = -678941; - if (modified_julian_day > 2973119) - modified_julian_day = 2973119; + if constexpr (std::numeric_limits::max() > 2973119) + if (modified_julian_day > 2973119) + modified_julian_day = 2973119; const auto a = modified_julian_day + 678575; const auto quad_cent = gd::div(a, 146097); diff --git a/tests/queries/0_stateless/02463_julian_day_ubsan.reference b/tests/queries/0_stateless/02463_julian_day_ubsan.reference new file mode 100644 index 00000000000..8be8aff668e --- /dev/null +++ b/tests/queries/0_stateless/02463_julian_day_ubsan.reference @@ -0,0 +1 @@ +9999-01-01 diff --git a/tests/queries/0_stateless/02463_julian_day_ubsan.sql b/tests/queries/0_stateless/02463_julian_day_ubsan.sql new file mode 100644 index 00000000000..60b51538bf3 --- /dev/null +++ b/tests/queries/0_stateless/02463_julian_day_ubsan.sql @@ -0,0 +1 @@ +SELECT fromModifiedJulianDay(9223372036854775807 :: Int64); From 148275ed640a7e7e523879e298c54bf265dcdd91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 04:15:26 +0200 Subject: [PATCH 168/252] Fix buffer overflow in Decimal scale --- src/Common/intExp.h | 26 +++++++++++++++++-- ...64_decimal_scale_buffer_overflow.reference | 0 .../02464_decimal_scale_buffer_overflow.sql | 5 ++++ 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.reference create mode 100644 tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.sql diff --git a/src/Common/intExp.h b/src/Common/intExp.h index 3529990ef3b..04c163ff224 100644 --- a/src/Common/intExp.h +++ b/src/Common/intExp.h @@ -47,6 +47,11 @@ namespace common constexpr inline int exp10_i32(int x) { + if (x < 0) + return 0; + if (x > 9) + return std::numeric_limits::max(); + constexpr int values[] = { 1, @@ -65,6 +70,11 @@ constexpr inline int exp10_i32(int x) constexpr inline int64_t exp10_i64(int x) { + if (x < 0) + return 0; + if (x > 18) + return std::numeric_limits::max(); + constexpr int64_t values[] = { 1LL, @@ -92,6 +102,11 @@ constexpr inline int64_t exp10_i64(int x) constexpr inline Int128 exp10_i128(int x) { + if (x < 0) + return 0; + if (x > 38) + return std::numeric_limits::max(); + constexpr Int128 values[] = { static_cast(1LL), @@ -140,6 +155,11 @@ constexpr inline Int128 exp10_i128(int x) inline Int256 exp10_i256(int x) { + if (x < 0) + return 0; + if (x > 76) + return std::numeric_limits::max(); + using Int256 = Int256; static constexpr Int256 i10e18{1000000000000000000ll}; static const Int256 values[] = { @@ -231,8 +251,10 @@ inline Int256 exp10_i256(int x) template constexpr inline T intExp10OfSize(int x) { - if constexpr (sizeof(T) <= 8) - return intExp10(x); + if constexpr (sizeof(T) <= 4) + return common::exp10_i32(x); + else if constexpr (sizeof(T) <= 8) + return common::exp10_i64(x); else if constexpr (sizeof(T) <= 16) return common::exp10_i128(x); else diff --git a/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.reference b/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.sql b/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.sql new file mode 100644 index 00000000000..355d9012f1f --- /dev/null +++ b/tests/queries/0_stateless/02464_decimal_scale_buffer_overflow.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS series__fuzz_35; +CREATE TABLE series__fuzz_35 (`i` UInt8, `x_value` Decimal(18, 14), `y_value` DateTime) ENGINE = Memory; +INSERT INTO series__fuzz_35(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3); +SELECT skewSamp(x_value) FROM (SELECT x_value as x_value FROM series__fuzz_35 LIMIT 2) FORMAT Null; +DROP TABLE series__fuzz_35; From ae53ac63c4c206ddcd1c726f4cc2a5b700e78ad3 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 19 Oct 2022 06:53:44 +0000 Subject: [PATCH 169/252] fix bypass_cache_threshold --- src/Interpreters/Cache/FileCache.cpp | 6 +++--- src/Interpreters/Cache/FileCache.h | 4 ++-- src/Interpreters/Cache/FileCacheSettings.cpp | 9 +++++++-- src/Interpreters/Cache/FileCacheSettings.h | 4 ++-- src/Interpreters/Cache/FileCache_fwd.h | 2 +- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index f31e525dcca..72fa1b3c324 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -32,8 +32,8 @@ FileCache::FileCache( , allow_persistent_files(cache_settings_.do_not_evict_index_and_mark_files) , enable_cache_hits_threshold(cache_settings_.enable_cache_hits_threshold) , enable_filesystem_query_cache_limit(cache_settings_.enable_filesystem_query_cache_limit) - , enable_limit_download_cache_size(cache_settings_.enable_limit_download_cache_size) - , max_enable_download_cache_size(cache_settings_.max_enable_download_cache_size) + , enable_bypass_cache_with_threashold(cache_settings_.enable_bypass_cache_with_threashold) + , bypass_cache_threashold(cache_settings_.bypass_cache_threashold) , log(&Poco::Logger::get("FileCache")) , main_priority(std::make_unique()) , stash_priority(std::make_unique()) @@ -189,7 +189,7 @@ FileSegments FileCache::getImpl( FileSegments result; - if (enable_limit_download_cache_size && (range.size() > max_enable_download_cache_size)) + if (enable_bypass_cache_with_threashold && (range.size() > bypass_cache_threashold)) { auto file_segment = std::make_shared( range.left, range.size(), key, this, FileSegment::State::SKIP_CACHE, CreateFileSegmentSettings{}); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index fb31459d062..706762b6915 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -140,8 +140,8 @@ private: const size_t enable_cache_hits_threshold; const bool enable_filesystem_query_cache_limit; - const bool enable_limit_download_cache_size; - const size_t max_enable_download_cache_size; + const bool enable_bypass_cache_with_threashold; + const size_t bypass_cache_threashold; mutable std::mutex mutex; Poco::Logger * log; diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 8dcdbf40698..b13cdd2ed04 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -34,8 +34,13 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false); enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false); enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD); - enable_limit_download_cache_size = config.getUInt64(config_prefix + ".enable_limit_download_cache_size", false); - max_enable_download_cache_size = config.getUInt64(config_prefix + ".max_enable_download_cache_size", REMOTE_FS_OBJECTS_CACHE_MAX_ENABLE_DOWNLOAD_SIZE); + + enable_bypass_cache_with_threashold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threashold", false); + + if (config.has(config_prefix + ".bypass_cache_threashold")) + bypass_cache_threashold = parseWithSizeSuffix(config.getString(config_prefix + ".bypass_cache_threashold")); + else + bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD; do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", false); } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index 429480e955d..80f7b5fa93f 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -20,8 +20,8 @@ struct FileCacheSettings bool do_not_evict_index_and_mark_files = true; - bool enable_limit_download_cache_size = false; - size_t max_enable_download_cache_size = REMOTE_FS_OBJECTS_CACHE_MAX_ENABLE_DOWNLOAD_SIZE; + bool enable_bypass_cache_with_threashold = false; + size_t bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD; void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index de8dde91991..72dc1144fb9 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -7,7 +7,7 @@ namespace DB static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0; -static constexpr size_t REMOTE_FS_OBJECTS_CACHE_MAX_ENABLE_DOWNLOAD_SIZE = 256 * 1024 * 1024;; +static constexpr size_t REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;; class FileCache; using FileCachePtr = std::shared_ptr; From 32cf2c762300a14f9b49c3b99add39f68a6569ac Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 19 Oct 2022 06:54:09 +0000 Subject: [PATCH 170/252] add test for bypass_cache_threshold --- tests/config/config.d/storage_conf.xml | 16 ++++++++++++++++ ...tem_cache_bypass_cache_threshold.reference | 16 ++++++++++++++++ ...ilesystem_cache_bypass_cache_threshold.sql | 19 +++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference create mode 100644 tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index a2a7f5cc750..8226d801cef 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -93,6 +93,15 @@ 22548578304 0 + + cache + s3_disk_6 + s3_cache_6/ + 22548578304 + 0 + 1 + 100 + cache s3_disk_6 @@ -183,6 +192,13 @@ + + +
+ s3_cache_6 +
+
+
diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference new file mode 100644 index 00000000000..de9ac10f641 --- /dev/null +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.reference @@ -0,0 +1,16 @@ +-- { echo } + +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +0 79 80 +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; diff --git a/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql new file mode 100644 index 00000000000..d3b3d3d7f4c --- /dev/null +++ b/tests/queries/0_stateless/02240_filesystem_cache_bypass_cache_threshold.sql @@ -0,0 +1,19 @@ +-- Tags: no-parallel, no-fasttest, no-s3-storage, no-random-settings + +-- { echo } + +SYSTEM DROP FILESYSTEM CACHE; +SET enable_filesystem_cache_on_write_operations=0; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_6', min_bytes_for_wide_part = 10485760; +INSERT INTO test SELECT number, toString(number) FROM numbers(100); + +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SELECT * FROM test FORMAT Null; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; +SYSTEM DROP FILESYSTEM CACHE; +SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; From 8322b83ad20330b22275ea2c2d35292500a231c0 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Wed, 19 Oct 2022 07:20:00 +0000 Subject: [PATCH 171/252] rename filesystem_query_cache --- ...ery_cache.reference => 02240_filesystem_query_cache.reference} | 0 ...ilesystem_query_cache.sql => 02240_filesystem_query_cache.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02240_system_remote_filesystem_query_cache.reference => 02240_filesystem_query_cache.reference} (100%) rename tests/queries/0_stateless/{02240_system_remote_filesystem_query_cache.sql => 02240_filesystem_query_cache.sql} (100%) diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference similarity index 100% rename from tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.reference rename to tests/queries/0_stateless/02240_filesystem_query_cache.reference diff --git a/tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql similarity index 100% rename from tests/queries/0_stateless/02240_system_remote_filesystem_query_cache.sql rename to tests/queries/0_stateless/02240_filesystem_query_cache.sql From 98c34159ee8605e2237696ce4324f62424ef8a57 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 19 Oct 2022 07:37:56 +0000 Subject: [PATCH 172/252] Use correct type for UInt128 --- src/AggregateFunctions/AggregateFunctionQuantile.cpp | 2 +- .../AggregateFunctionQuantileDeterministic.cpp | 2 +- src/AggregateFunctions/AggregateFunctionQuantileExact.cpp | 2 +- .../AggregateFunctionQuantileExactWeighted.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp index 38b3c91be69..60e759b45a3 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -46,7 +46,7 @@ AggregateFunctionPtr createAggregateFunctionQuantile( if (which.idx == TypeIndex::DateTime64) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int128) return std::make_shared>(argument_types, params); - if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::UInt256) return std::make_shared>(argument_types, params); diff --git a/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp b/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp index a9486da25fa..1605056e5d9 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantileDeterministic.cpp @@ -40,7 +40,7 @@ AggregateFunctionPtr createAggregateFunctionQuantile( if (which.idx == TypeIndex::DateTime) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int128) return std::make_shared>(argument_types, params); - if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::UInt256) return std::make_shared>(argument_types, params); diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp index 39de9d0eeaf..e9a3edf1e05 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantileExact.cpp @@ -47,7 +47,7 @@ AggregateFunctionPtr createAggregateFunctionQuantile( if (which.idx == TypeIndex::DateTime64) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int128) return std::make_shared>(argument_types, params); - if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::UInt256) return std::make_shared>(argument_types, params); diff --git a/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp b/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp index 63e4d3df24b..e9b6012dcdb 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantileExactWeighted.cpp @@ -46,7 +46,7 @@ AggregateFunctionPtr createAggregateFunctionQuantile( if (which.idx == TypeIndex::DateTime64) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int128) return std::make_shared>(argument_types, params); - if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::UInt256) return std::make_shared>(argument_types, params); From c0f18f29bb7a13ad38268988c79e2b03d7dac1ca Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 19 Oct 2022 13:23:21 +0000 Subject: [PATCH 173/252] Remove toExtendedReplated; Add template argument is_extended_result --- src/Functions/DateTimeTransforms.h | 129 +++++++++++---- src/Functions/dateDiff.cpp | 211 ++----------------------- src/Functions/toRelativeDayNum.cpp | 2 +- src/Functions/toRelativeHourNum.cpp | 2 +- src/Functions/toRelativeMinuteNum.cpp | 2 +- src/Functions/toRelativeMonthNum.cpp | 2 +- src/Functions/toRelativeQuarterNum.cpp | 2 +- src/Functions/toRelativeSecondNum.cpp | 2 +- src/Functions/toRelativeWeekNum.cpp | 2 +- src/Functions/toRelativeYearNum.cpp | 2 +- 10 files changed, 113 insertions(+), 243 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 217f158cc8e..c4ade5facd5 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1034,21 +1034,33 @@ struct ToISOWeekImpl using FactorTransform = ToISOYearImpl; }; +/// Unsigned results (is_extended_result = false) potentially lead to overflows when returning values. +/// This mode is used by SQL functions "toRelative*Num()" which cannot easily be changed due to backward compatibility. +/// According to documentation, these functions merely need to compute the time difference to a deterministic, fixed point in the past. +/// As a future TODO, we should fix their behavior in a backwards-compatible way. +/// See https://github.com/ClickHouse/ClickHouse/issues/41977#issuecomment-1267536814. +template struct ToRelativeYearNumImpl { static constexpr auto name = "toRelativeYearNum"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toYear(t); + if constexpr (is_extended_result) + return static_cast(time_zone.toYear(t)); + else + return static_cast(time_zone.toYear(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toYear(static_cast(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toYear(ExtendedDayNum(d)); + if constexpr (is_extended_result) + return static_cast(time_zone.toYear(ExtendedDayNum(d))); + else + return static_cast(time_zone.toYear(ExtendedDayNum(d))); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -1058,21 +1070,28 @@ struct ToRelativeYearNumImpl using FactorTransform = ZeroTransform; }; +template struct ToRelativeQuarterNumImpl { static constexpr auto name = "toRelativeQuarterNum"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeQuarterNum(t); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeQuarterNum(t)); + else + return static_cast(time_zone.toRelativeQuarterNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toRelativeQuarterNum(static_cast(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeQuarterNum(ExtendedDayNum(d)); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeQuarterNum(ExtendedDayNum(d))); + else + return static_cast(time_zone.toRelativeQuarterNum(ExtendedDayNum(d))); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -1082,21 +1101,28 @@ struct ToRelativeQuarterNumImpl using FactorTransform = ZeroTransform; }; +template struct ToRelativeMonthNumImpl { static constexpr auto name = "toRelativeMonthNum"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMonthNum(t); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeMonthNum(t)); + else + return static_cast(time_zone.toRelativeMonthNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toRelativeMonthNum(static_cast(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMonthNum(ExtendedDayNum(d)); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeMonthNum(ExtendedDayNum(d))); + else + return static_cast(time_zone.toRelativeMonthNum(ExtendedDayNum(d))); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -1106,21 +1132,28 @@ struct ToRelativeMonthNumImpl using FactorTransform = ZeroTransform; }; +template struct ToRelativeWeekNumImpl { static constexpr auto name = "toRelativeWeekNum"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeWeekNum(t); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeWeekNum(t)); + else + return static_cast(time_zone.toRelativeWeekNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toRelativeWeekNum(static_cast(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone) + static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeWeekNum(ExtendedDayNum(d)); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeWeekNum(ExtendedDayNum(d))); + else + return static_cast(time_zone.toRelativeWeekNum(ExtendedDayNum(d))); } static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -1130,21 +1163,28 @@ struct ToRelativeWeekNumImpl using FactorTransform = ZeroTransform; }; +template struct ToRelativeDayNumImpl { static constexpr auto name = "toRelativeDayNum"; - static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) + static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toDayNum(t); + if constexpr (is_extended_result) + return static_cast(time_zone.toDayNum(t)); + else + return static_cast(time_zone.toDayNum(t)); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayNum(static_cast(t)); } - static inline UInt16 execute(Int32 d, const DateLUTImpl &) + static inline auto execute(Int32 d, const DateLUTImpl &) { - return static_cast(d); + if constexpr (is_extended_result) + return static_cast(static_cast(d)); + else + return static_cast(static_cast(d)); } static inline UInt16 execute(UInt16 d, const DateLUTImpl &) { @@ -1154,46 +1194,65 @@ struct ToRelativeDayNumImpl using FactorTransform = ZeroTransform; }; - +template struct ToRelativeHourNumImpl { static constexpr auto name = "toRelativeHourNum"; - static inline UInt32 execute(Int64 t, const DateLUTImpl & time_zone) + static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(t); + if constexpr (is_extended_result) + return static_cast(time_zone.toStableRelativeHourNum(t)); + else + return static_cast(time_zone.toRelativeHourNum(t)); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(static_cast(t)); + if constexpr (is_extended_result) + return time_zone.toStableRelativeHourNum(static_cast(t)); + else + return time_zone.toRelativeHourNum(static_cast(t)); } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(ExtendedDayNum(d)); + if constexpr (is_extended_result) + return static_cast(time_zone.toStableRelativeHourNum(ExtendedDayNum(d))); + else + return static_cast(time_zone.toRelativeHourNum(ExtendedDayNum(d))); } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeHourNum(DayNum(d)); + if constexpr (is_extended_result) + return time_zone.toStableRelativeHourNum(DayNum(d)); + else + return time_zone.toRelativeHourNum(DayNum(d)); } using FactorTransform = ZeroTransform; }; +template struct ToRelativeMinuteNumImpl { static constexpr auto name = "toRelativeMinuteNum"; - static inline UInt32 execute(Int64 t, const DateLUTImpl & time_zone) + static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMinuteNum(t); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeMinuteNum(t)); + else + return static_cast(time_zone.toRelativeMinuteNum(t)); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toRelativeMinuteNum(static_cast(t)); } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMinuteNum(ExtendedDayNum(d)); + if constexpr (is_extended_result) + return static_cast(time_zone.toRelativeMinuteNum(ExtendedDayNum(d))); + else + return static_cast(time_zone.toRelativeMinuteNum(ExtendedDayNum(d))); } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { @@ -1203,6 +1262,7 @@ struct ToRelativeMinuteNumImpl using FactorTransform = ZeroTransform; }; +template struct ToRelativeSecondNumImpl { static constexpr auto name = "toRelativeSecondNum"; @@ -1215,9 +1275,12 @@ struct ToRelativeSecondNumImpl { return t; } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.fromDayNum(ExtendedDayNum(d)); + if constexpr (is_extended_result) + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); + else + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 18178b68165..479966ac9fd 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -33,199 +33,6 @@ namespace ErrorCodes namespace { -struct ToExtendedRelativeYearNumImpl -{ - static constexpr auto name = "toExtendedRelativeYearNum"; - - static inline Int16 execute(Int64 t, const DateLUTImpl & time_zone) - { - return time_zone.toYear(t); - } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) - { - return time_zone.toYear(static_cast(t)); - } - static inline Int16 execute(Int32 d, const DateLUTImpl & time_zone) - { - return time_zone.toYear(ExtendedDayNum(d)); - } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) - { - return time_zone.toYear(DayNum(d)); - } - - using FactorTransform = ZeroTransform; -}; - -struct ToExtendedRelativeQuarterNumImpl -{ - static constexpr auto name = "toExtendedRelativeQuarterNum"; - - static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeQuarterNum(t); - } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeQuarterNum(static_cast(t)); - } - static inline Int32 execute(Int32 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeQuarterNum(ExtendedDayNum(d)); - } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeQuarterNum(DayNum(d)); - } - - using FactorTransform = ZeroTransform; -}; - -struct ToExtendedRelativeMonthNumImpl -{ - static constexpr auto name = "toExtendedRelativeMonthNum"; - - static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMonthNum(t); - } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMonthNum(static_cast(t)); - } - static inline Int32 execute(Int32 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMonthNum(ExtendedDayNum(d)); - } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMonthNum(DayNum(d)); - } - - using FactorTransform = ZeroTransform; -}; - -struct ToExtendedRelativeWeekNumImpl -{ - static constexpr auto name = "toExtendedRelativeWeekNum"; - - static inline Int32 execute(Int64 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeWeekNum(t); - } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeWeekNum(static_cast(t)); - } - static inline Int32 execute(Int32 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeWeekNum(ExtendedDayNum(d)); - } - static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeWeekNum(DayNum(d)); - } - - using FactorTransform = ZeroTransform; -}; - -struct ToExtendedRelativeDayNumImpl -{ - static constexpr auto name = "toExtendedRelativeDayNum"; - - static inline Int64 execute(Int64 t, const DateLUTImpl & time_zone) - { - return time_zone.toDayNum(t); - } - static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) - { - return time_zone.toDayNum(static_cast(t)); - } - static inline Int32 execute(Int32 d, const DateLUTImpl &) - { - return static_cast(d); - } - static inline UInt16 execute(UInt16 d, const DateLUTImpl &) - { - return static_cast(d); - } - - using FactorTransform = ZeroTransform; -}; - -struct ToExtendedRelativeHourNumImpl -{ - static constexpr auto name = "toExtendedRelativeHourNum"; - - static inline Int64 execute(Int64 t, const DateLUTImpl & time_zone) - { - return time_zone.toStableRelativeHourNum(t); - } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) - { - return time_zone.toStableRelativeHourNum(static_cast(t)); - } - static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) - { - return time_zone.toStableRelativeHourNum(ExtendedDayNum(d)); - } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) - { - return time_zone.toStableRelativeHourNum(DayNum(d)); - } - - using FactorTransform = ZeroTransform; -}; - -struct ToExtendedRelativeMinuteNumImpl -{ - static constexpr auto name = "toExtendedRelativeMinuteNum"; - - static inline Int64 execute(Int64 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMinuteNum(t); - } - static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMinuteNum(static_cast(t)); - } - static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMinuteNum(ExtendedDayNum(d)); - } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) - { - return time_zone.toRelativeMinuteNum(DayNum(d)); - } - - using FactorTransform = ZeroTransform; -}; - -struct ToExtendedRelativeSecondNumImpl -{ - static constexpr auto name = "toExtendedRelativeSecondNum"; - - static inline Int64 execute(Int64 t, const DateLUTImpl &) - { - return t; - } - static inline UInt32 execute(UInt32 t, const DateLUTImpl &) - { - return t; - } - static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) - { - return time_zone.fromDayNum(ExtendedDayNum(d)); - } - static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) - { - return time_zone.fromDayNum(DayNum(d)); - } - - using FactorTransform = ZeroTransform; -}; - - /** dateDiff('unit', t1, t2, [timezone]) * t1 and t2 can be Date or DateTime * @@ -256,7 +63,7 @@ public: if (arguments.size() != 3 && arguments.size() != 4) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 3 or 4", - getName(), toString(arguments.size())); + getName(), arguments.size()); if (!isString(arguments[0])) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, @@ -305,21 +112,21 @@ public: const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "yy" || unit == "yyyy") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "quarter" || unit == "qq" || unit == "q") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "month" || unit == "mm" || unit == "m") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "week" || unit == "wk" || unit == "ww") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "day" || unit == "dd" || unit == "d") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "hour" || unit == "hh" || unit == "h") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "minute" || unit == "mi" || unit == "n") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "second" || unit == "ss" || unit == "s") - dispatchForColumns(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); diff --git a/src/Functions/toRelativeDayNum.cpp b/src/Functions/toRelativeDayNum.cpp index 241104493cd..e2dee0e305c 100644 --- a/src/Functions/toRelativeDayNum.cpp +++ b/src/Functions/toRelativeDayNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeDayNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeDayNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeDayNum) { diff --git a/src/Functions/toRelativeHourNum.cpp b/src/Functions/toRelativeHourNum.cpp index 2404d73c450..67e61735134 100644 --- a/src/Functions/toRelativeHourNum.cpp +++ b/src/Functions/toRelativeHourNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeHourNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeHourNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeHourNum) { diff --git a/src/Functions/toRelativeMinuteNum.cpp b/src/Functions/toRelativeMinuteNum.cpp index a5ecada1e92..5ead860131f 100644 --- a/src/Functions/toRelativeMinuteNum.cpp +++ b/src/Functions/toRelativeMinuteNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeMinuteNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeMinuteNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeMinuteNum) { diff --git a/src/Functions/toRelativeMonthNum.cpp b/src/Functions/toRelativeMonthNum.cpp index 8f46e04e483..f845d036ff5 100644 --- a/src/Functions/toRelativeMonthNum.cpp +++ b/src/Functions/toRelativeMonthNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeMonthNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeMonthNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeMonthNum) { diff --git a/src/Functions/toRelativeQuarterNum.cpp b/src/Functions/toRelativeQuarterNum.cpp index 8ea0c42ef09..2862a0aeff7 100644 --- a/src/Functions/toRelativeQuarterNum.cpp +++ b/src/Functions/toRelativeQuarterNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeQuarterNum) { diff --git a/src/Functions/toRelativeSecondNum.cpp b/src/Functions/toRelativeSecondNum.cpp index 7af41ab8334..c7552d550b8 100644 --- a/src/Functions/toRelativeSecondNum.cpp +++ b/src/Functions/toRelativeSecondNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeSecondNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeSecondNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeSecondNum) { diff --git a/src/Functions/toRelativeWeekNum.cpp b/src/Functions/toRelativeWeekNum.cpp index fe7aec3fd9a..c35eb18edaf 100644 --- a/src/Functions/toRelativeWeekNum.cpp +++ b/src/Functions/toRelativeWeekNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeWeekNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeWeekNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeWeekNum) { diff --git a/src/Functions/toRelativeYearNum.cpp b/src/Functions/toRelativeYearNum.cpp index 4574d8513e0..fb80957062b 100644 --- a/src/Functions/toRelativeYearNum.cpp +++ b/src/Functions/toRelativeYearNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething; +using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeYearNum) { From 42f680849cd72559b51e7e5279c203d0f09e955f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Oct 2022 12:35:47 +0000 Subject: [PATCH 174/252] Fix bad_cast in Annoy index - Problem originally found by data type fuzzer https://s3.amazonaws.com/clickhouse-test-reports/42180/2f83d8790581dce0ffeec56c137b1d13160cfa7b/fuzzer_astfuzzermsan//report.html - This commit restricts which data types are allowed for Annoy indexes (similar things are done for other index types). --- .../mergetree-family/mergetree.md | 2 + .../MergeTree/MergeTreeIndexAnnoy.cpp | 82 +++++++++++++------ src/Storages/MergeTree/MergeTreeIndexAnnoy.h | 16 ++-- tests/queries/0_stateless/02354_annoy.sql | 68 +++++++++++++++ 4 files changed, 130 insertions(+), 38 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9dc7e300d45..486baac2310 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -419,6 +419,8 @@ Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `St For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. +There are also special-purpose and experimental indexes to support approximate nearest neighbor (ANN) queries. See [here](annindexes.md) for details. + The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall). Example of index creation for `Map` data type diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 3b16998337e..595e790ea3b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -64,9 +65,11 @@ uint64_t AnnoyIndex::getNumOfDimensions() const namespace ErrorCodes { - extern const int LOGICAL_ERROR; - extern const int INCORRECT_QUERY; + extern const int ILLEGAL_COLUMN; extern const int INCORRECT_DATA; + extern const int INCORRECT_NUMBER_OF_COLUMNS; + extern const int INCORRECT_QUERY; + extern const int LOGICAL_ERROR; } MergeTreeIndexGranuleAnnoy::MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_) @@ -132,9 +135,7 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, si return; if (index_sample_block.columns() > 1) - { throw Exception("Only one column is supported", ErrorCodes::LOGICAL_ERROR); - } auto index_column_name = index_sample_block.getByPosition(0).name; const auto & column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); @@ -144,27 +145,22 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, si const auto & data = column_array->getData(); const auto & array = typeid_cast(data).getData(); if (array.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Array have 0 rows, but {} expected", rows_read); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read); const auto & offsets = column_array->getOffsets(); size_t num_rows = offsets.size(); - /// All sizes are the same + /// Check all sizes are the same size_t size = offsets[0]; for (size_t i = 0; i < num_rows - 1; ++i) - { if (offsets[i + 1] - offsets[i] != size) - { throw Exception(ErrorCodes::INCORRECT_DATA, "Arrays should have same length"); - } - } + index = std::make_shared(size); index->add_item(index->get_n_items(), array.data()); /// add all rows from 1 to num_rows - 1 (this is the same as the beginning of the last element) for (size_t current_row = 1; current_row < num_rows; ++current_row) - { index->add_item(index->get_n_items(), &array[offsets[current_row - 1]]); - } } else { @@ -181,19 +177,13 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t * pos, si { const auto& pod_array = typeid_cast(column.get())->getData(); for (size_t i = 0; i < pod_array.size(); ++i) - { data[i].push_back(pod_array[i]); - } } assert(!data.empty()); if (!index) - { index = std::make_shared(data[0].size()); - } for (const auto& item : data) - { index->add_item(index->get_n_items(), item.data()); - } } *pos += rows_read; @@ -222,7 +212,7 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex { UInt64 limit = condition.getLimit(); UInt64 index_granularity = condition.getIndexGranularity(); - std::optional comp_dist = condition.getQueryType() == ANN::ANNQueryInformation::Type::Where ? + std::optional comp_dist = condition.getQueryType() == ApproximateNearestNeighbour::ANNQueryInformation::Type::Where ? std::optional(condition.getComparisonDistanceForWhereQuery()) : std::nullopt; if (comp_dist && comp_dist.value() < 0) @@ -232,16 +222,13 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex auto granule = std::dynamic_pointer_cast(idx_granule); if (granule == nullptr) - { throw Exception("Granule has the wrong type", ErrorCodes::LOGICAL_ERROR); - } + auto annoy = granule->index; if (condition.getNumOfDimensions() != annoy->getNumOfDimensions()) - { throw Exception("The dimension of the space in the request (" + toString(condition.getNumOfDimensions()) + ") " + "does not match with the dimension in the index (" + toString(annoy->getNumOfDimensions()) + ")", ErrorCodes::INCORRECT_QUERY); - } /// neighbors contain indexes of dots which were closest to target vector std::vector neighbors; @@ -268,23 +255,25 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndex for (size_t i = 0; i < neighbors.size(); ++i) { if (comp_dist && distances[i] > comp_dist) - { continue; - } granule_numbers.insert(neighbors[i] / index_granularity); } std::vector result_vector; result_vector.reserve(granule_numbers.size()); for (auto granule_number : granule_numbers) - { result_vector.push_back(granule_number); - } return result_vector; } +MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_) + : IMergeTreeIndex(index_) + , number_of_trees(number_of_trees_) +{ +} + MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const { return std::make_shared(index.name, index.sample_block); @@ -307,6 +296,40 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) return std::make_shared(index, param); } +static void assertIndexColumnsType(const Block & header) +{ + DataTypePtr column_data_type_ptr = header.getDataTypes()[0]; + + if (const auto * array_type = typeid_cast(column_data_type_ptr.get())) + { + TypeIndex nested_type_index = array_type->getNestedType()->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Unexpected type {} of Annoy index. Only Array(Float32) and Tuple(Float32) are supported.", + column_data_type_ptr->getName()); + } + else if (const auto * tuple_type = typeid_cast(column_data_type_ptr.get())) + { + const DataTypes & nested_types = tuple_type->getElements(); + for (const auto & type : nested_types) + { + TypeIndex nested_type_index = type->getTypeId(); + if (!WhichDataType(nested_type_index).isFloat32()) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Unexpected type {} of Annoy index. Only Array(Float32) and Tuple(Float32) are supported.", + column_data_type_ptr->getName()); + } + } + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Unexpected type {} of Annoy index. Only Array(Float32) and Tuple(Float32) are supported.", + column_data_type_ptr->getName()); + +} + void annoyIndexValidator(const IndexDescription & index, bool /* attach */) { if (index.arguments.size() != 1) @@ -317,6 +340,11 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) { throw Exception("Annoy index argument must be UInt64.", ErrorCodes::INCORRECT_QUERY); } + + if (index.column_names.size() != 1 || index.data_types.size() != 1) + throw Exception("Annoy indexes must be created on a single column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + + assertIndexColumnsType(index.sample_block); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h index 85bbb0a1bd2..6a844947bd2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h @@ -10,8 +10,6 @@ namespace DB { -namespace ANN = ApproximateNearestNeighbour; - // auxiliary namespace for working with spotify-annoy library // mainly for serialization and deserialization of the index namespace ApproximateNearestNeighbour @@ -33,7 +31,7 @@ namespace ApproximateNearestNeighbour struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule { - using AnnoyIndex = ANN::AnnoyIndex<>; + using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<>; using AnnoyIndexPtr = std::shared_ptr; MergeTreeIndexGranuleAnnoy(const String & index_name_, const Block & index_sample_block_); @@ -57,7 +55,7 @@ struct MergeTreeIndexGranuleAnnoy final : public IMergeTreeIndexGranule struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator { - using AnnoyIndex = ANN::AnnoyIndex<>; + using AnnoyIndex = ApproximateNearestNeighbour::AnnoyIndex<>; using AnnoyIndexPtr = std::shared_ptr; MergeTreeIndexAggregatorAnnoy(const String & index_name_, const Block & index_sample_block, uint64_t number_of_trees); @@ -74,7 +72,7 @@ struct MergeTreeIndexAggregatorAnnoy final : IMergeTreeIndexAggregator }; -class MergeTreeIndexConditionAnnoy final : public ANN::IMergeTreeIndexConditionAnn +class MergeTreeIndexConditionAnnoy final : public ApproximateNearestNeighbour::IMergeTreeIndexConditionAnn { public: MergeTreeIndexConditionAnnoy( @@ -91,18 +89,14 @@ public: ~MergeTreeIndexConditionAnnoy() override = default; private: - ANN::ANNCondition condition; + ApproximateNearestNeighbour::ANNCondition condition; }; class MergeTreeIndexAnnoy : public IMergeTreeIndex { public: - MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_) - : IMergeTreeIndex(index_) - , number_of_trees(number_of_trees_) - {} - + MergeTreeIndexAnnoy(const IndexDescription & index_, uint64_t number_of_trees_); ~MergeTreeIndexAnnoy() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; diff --git a/tests/queries/0_stateless/02354_annoy.sql b/tests/queries/0_stateless/02354_annoy.sql index 8a8d023a104..654a4b545ea 100644 --- a/tests/queries/0_stateless/02354_annoy.sql +++ b/tests/queries/0_stateless/02354_annoy.sql @@ -44,3 +44,71 @@ ORDER BY L2Distance(embedding, [0.0, 0.0]) LIMIT 3; -- { serverError 80 } DROP TABLE IF EXISTS 02354_annoy; + +-- ------------------------------------ +-- Check that weird base columns are rejected + +-- Index spans >1 column + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(Float32), + INDEX annoy_index (embedding, id) TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 7 } + +-- Index must be created on Array(Float32) or Tuple(Float32) + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Float32, + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(Float64), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Tuple(Float32, Float64), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(LowCardinality(Float32)), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } + +CREATE TABLE 02354_annoy +( + id Int32, + embedding Array(Nullable(Float32)), + INDEX annoy_index embedding TYPE annoy(100) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity=5; -- {serverError 44 } From 5cd9ce1b20c47de84d29403f54ce97e190fa4e07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Rodr=C3=ADguez=20Hern=C3=A1ndez?= Date: Wed, 19 Oct 2022 15:41:49 +0200 Subject: [PATCH 175/252] Mark 22.8 as LTS and unify format (#42481) Co-authored-by: Nikita Mikhaylov --- CHANGELOG.md | 51 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56d117d05dd..83c1cbf1eb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ### Table of Contents **[ClickHouse release v22.9, 2022-09-22](#229)**
-**[ClickHouse release v22.8, 2022-08-18](#228)**
+**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
**[ClickHouse release v22.7, 2022-07-21](#227)**
**[ClickHouse release v22.6, 2022-06-16](#226)**
**[ClickHouse release v22.5, 2022-05-19](#225)**
@@ -10,10 +10,10 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
- ### ClickHouse release 22.9, 2022-09-22 #### Backward Incompatible Change + * Upgrade from 20.3 and older to 22.9 and newer should be done through an intermediate version if there are any `ReplicatedMergeTree` tables, otherwise server with the new version will not start. [#40641](https://github.com/ClickHouse/ClickHouse/pull/40641) ([Alexander Tokmakov](https://github.com/tavplubix)). * Remove the functions `accurate_Cast` and `accurate_CastOrNull` (they are different to `accurateCast` and `accurateCastOrNull` by underscore in the name and they are not affected by the value of `cast_keep_nullable` setting). These functions were undocumented, untested, unused, and unneeded. They appeared to be alive due to code generalization. [#40682](https://github.com/ClickHouse/ClickHouse/pull/40682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Add a test to ensure that every new table function will be documented. See [#40649](https://github.com/ClickHouse/ClickHouse/issues/40649). Rename table function `MeiliSearch` to `meilisearch`. [#40709](https://github.com/ClickHouse/ClickHouse/pull/40709) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -21,6 +21,7 @@ * Make interpretation of YAML configs to be more conventional. [#41044](https://github.com/ClickHouse/ClickHouse/pull/41044) ([Vitaly Baranov](https://github.com/vitlibar)). #### New Feature + * Support `insert_quorum = 'auto'` to use majority number. [#39970](https://github.com/ClickHouse/ClickHouse/pull/39970) ([Sachin](https://github.com/SachinSetiya)). * Add embedded dashboards to ClickHouse server. This is a demo project about how to achieve 90% results with 1% effort using ClickHouse features. [#40461](https://github.com/ClickHouse/ClickHouse/pull/40461) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Added new settings constraint writability kind `changeable_in_readonly`. [#40631](https://github.com/ClickHouse/ClickHouse/pull/40631) ([Sergei Trifonov](https://github.com/serxa)). @@ -38,6 +39,7 @@ * Improvement for in-memory data parts: remove completely processed WAL files. [#40592](https://github.com/ClickHouse/ClickHouse/pull/40592) ([Azat Khuzhin](https://github.com/azat)). #### Performance Improvement + * Implement compression of marks and primary key. Close [#34437](https://github.com/ClickHouse/ClickHouse/issues/34437). [#37693](https://github.com/ClickHouse/ClickHouse/pull/37693) ([zhongyuankai](https://github.com/zhongyuankai)). * Allow to load marks with threadpool in advance. Regulated by setting `load_marks_asynchronously` (default: 0). [#40821](https://github.com/ClickHouse/ClickHouse/pull/40821) ([Kseniia Sumarokova](https://github.com/kssenii)). * Virtual filesystem over s3 will use random object names split into multiple path prefixes for better performance on AWS. [#40968](https://github.com/ClickHouse/ClickHouse/pull/40968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -58,6 +60,7 @@ * Parallel hash JOIN for Float data types might be suboptimal. Make it better. [#41183](https://github.com/ClickHouse/ClickHouse/pull/41183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### Improvement + * During startup and ATTACH call, `ReplicatedMergeTree` tables will be readonly until the ZooKeeper connection is made and the setup is finished. [#40148](https://github.com/ClickHouse/ClickHouse/pull/40148) ([Antonio Andelic](https://github.com/antonio2368)). * Add `enable_extended_results_for_datetime_functions` option to return results of type Date32 for functions toStartOfYear, toStartOfISOYear, toStartOfQuarter, toStartOfMonth, toStartOfWeek, toMonday and toLastDayOfMonth when argument is Date32 or DateTime64, otherwise results of Date type are returned. For compatibility reasons default value is ‘0’. [#41214](https://github.com/ClickHouse/ClickHouse/pull/41214) ([Roman Vasin](https://github.com/rvasin)). * For security and stability reasons, CatBoost models are no longer evaluated within the ClickHouse server. Instead, the evaluation is now done in the clickhouse-library-bridge, a separate process that loads the catboost library and communicates with the server process via HTTP. [#40897](https://github.com/ClickHouse/ClickHouse/pull/40897) ([Robert Schulze](https://github.com/rschu1ze)). [#39629](https://github.com/ClickHouse/ClickHouse/pull/39629) ([Robert Schulze](https://github.com/rschu1ze)). @@ -108,6 +111,7 @@ * Add `has_lightweight_delete` to system.parts. [#41564](https://github.com/ClickHouse/ClickHouse/pull/41564) ([Kseniia Sumarokova](https://github.com/kssenii)). #### Build/Testing/Packaging Improvement + * Enforce documentation for every setting. [#40644](https://github.com/ClickHouse/ClickHouse/pull/40644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Enforce documentation for every current metric. [#40645](https://github.com/ClickHouse/ClickHouse/pull/40645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Enforce documentation for every profile event counter. Write the documentation where it was missing. [#40646](https://github.com/ClickHouse/ClickHouse/pull/40646) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -217,15 +221,16 @@ * Fix read bytes/rows in X-ClickHouse-Summary with materialized views. [#41586](https://github.com/ClickHouse/ClickHouse/pull/41586) ([Raúl Marín](https://github.com/Algunenano)). * Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -### ClickHouse release 22.8, 2022-08-18 +### ClickHouse release 22.8-lts, 2022-08-18 #### Backward Incompatible Change + * Extended range of `Date32` and `DateTime64` to support dates from the year 1900 to 2299. In previous versions, the supported interval was only from the year 1925 to 2283. The implementation is using the proleptic Gregorian calendar (which is conformant with [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601):2004 (clause 3.2.1 The Gregorian calendar)) instead of accounting for historical transitions from the Julian to the Gregorian calendar. This change affects implementation-specific behavior for out-of-range arguments. E.g. if in previous versions the value of `1899-01-01` was clamped to `1925-01-01`, in the new version it will be clamped to `1900-01-01`. It changes the behavior of rounding with `toStartOfInterval` if you pass `INTERVAL 3 QUARTER` up to one quarter because the intervals are counted from an implementation-specific point of time. Closes [#28216](https://github.com/ClickHouse/ClickHouse/issues/28216), improves [#38393](https://github.com/ClickHouse/ClickHouse/issues/38393). [#39425](https://github.com/ClickHouse/ClickHouse/pull/39425) ([Roman Vasin](https://github.com/rvasin)). * Now, all relevant dictionary sources respect `remote_url_allow_hosts` setting. It was already done for HTTP, Cassandra, Redis. Added ClickHouse, MongoDB, MySQL, PostgreSQL. Host is checked only for dictionaries created from DDL. [#39184](https://github.com/ClickHouse/ClickHouse/pull/39184) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Make the remote filesystem cache composable, allow not to evict certain files (regarding idx, mrk, ..), delete old cache version. Now it is possible to configure cache over Azure blob storage disk, over Local disk, over StaticWeb disk, etc. This PR is marked backward incompatible because cache configuration changes and in order for cache to work need to update the config file. Old cache will still be used with new configuration. The server will startup fine with the old cache configuration. Closes https://github.com/ClickHouse/ClickHouse/issues/36140. Closes https://github.com/ClickHouse/ClickHouse/issues/37889. ([Kseniia Sumarokova](https://github.com/kssenii)). [#36171](https://github.com/ClickHouse/ClickHouse/pull/36171)) #### New Feature + * Query parameters can be set in interactive mode as `SET param_abc = 'def'` and transferred via the native protocol as settings. [#39906](https://github.com/ClickHouse/ClickHouse/pull/39906) ([Nikita Taranov](https://github.com/nickitat)). * Quota key can be set in the native protocol ([Yakov Olkhovsky](https://github.com/ClickHouse/ClickHouse/pull/39874)). * Added a setting `exact_rows_before_limit` (0/1). When enabled, ClickHouse will provide exact value for `rows_before_limit_at_least` statistic, but with the cost that the data before limit will have to be read completely. This closes [#6613](https://github.com/ClickHouse/ClickHouse/issues/6613). [#25333](https://github.com/ClickHouse/ClickHouse/pull/25333) ([kevin wan](https://github.com/MaxWk)). @@ -240,12 +245,14 @@ * Add new setting schema_inference_hints that allows to specify structure hints in schema inference for specific columns. Closes [#39569](https://github.com/ClickHouse/ClickHouse/issues/39569). [#40068](https://github.com/ClickHouse/ClickHouse/pull/40068) ([Kruglov Pavel](https://github.com/Avogar)). #### Experimental Feature + * Support SQL standard DELETE FROM syntax on merge tree tables and lightweight delete implementation for merge tree families. [#37893](https://github.com/ClickHouse/ClickHouse/pull/37893) ([Jianmei Zhang](https://github.com/zhangjmruc)) ([Alexander Gololobov](https://github.com/davenger)). Note: this new feature does not make ClickHouse an HTAP DBMS. #### Performance Improvement + * Improved memory usage during memory efficient merging of aggregation results. [#39429](https://github.com/ClickHouse/ClickHouse/pull/39429) ([Nikita Taranov](https://github.com/nickitat)). * Added concurrency control logic to limit total number of concurrent threads created by queries. [#37558](https://github.com/ClickHouse/ClickHouse/pull/37558) ([Sergei Trifonov](https://github.com/serxa)). Add `concurrent_threads_soft_limit parameter` to increase performance in case of high QPS by means of limiting total number of threads for all queries. [#37285](https://github.com/ClickHouse/ClickHouse/pull/37285) ([Roman Vasin](https://github.com/rvasin)). -* Add `SLRU` cache policy for uncompressed cache and marks cache. ([Kseniia Sumarokova](https://github.com/kssenii)). [#34651](https://github.com/ClickHouse/ClickHouse/pull/34651) ([alexX512](https://github.com/alexX512)). Decoupling local cache function and cache algorithm [#38048](https://github.com/ClickHouse/ClickHouse/pull/38048) ([Han Shukai](https://github.com/KinderRiven)). +* Add `SLRU` cache policy for uncompressed cache and marks cache. ([Kseniia Sumarokova](https://github.com/kssenii)). [#34651](https://github.com/ClickHouse/ClickHouse/pull/34651) ([alexX512](https://github.com/alexX512)). Decoupling local cache function and cache algorithm [#38048](https://github.com/ClickHouse/ClickHouse/pull/38048) ([Han Shukai](https://github.com/KinderRiven)). * Intel® In-Memory Analytics Accelerator (Intel® IAA) is a hardware accelerator available in the upcoming generation of Intel® Xeon® Scalable processors ("Sapphire Rapids"). Its goal is to speed up common operations in analytics like data (de)compression and filtering. ClickHouse gained the new "DeflateQpl" compression codec which utilizes the Intel® IAA offloading technology to provide a high-performance DEFLATE implementation. The codec uses the [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) which abstracts access to the hardware accelerator, respectively to a software fallback in case the hardware accelerator is not available. DEFLATE provides in general higher compression rates than ClickHouse's LZ4 default codec, and as a result, offers less disk I/O and lower main memory consumption. [#36654](https://github.com/ClickHouse/ClickHouse/pull/36654) ([jasperzhu](https://github.com/jinjunzh)). [#39494](https://github.com/ClickHouse/ClickHouse/pull/39494) ([Robert Schulze](https://github.com/rschu1ze)). * `DISTINCT` in order with `ORDER BY`: Deduce way to sort based on input stream sort description. Skip sorting if input stream is already sorted. [#38719](https://github.com/ClickHouse/ClickHouse/pull/38719) ([Igor Nikonov](https://github.com/devcrafter)). Improve memory usage (significantly) and query execution time + use `DistinctSortedChunkTransform` for final distinct when `DISTINCT` columns match `ORDER BY` columns, but rename to `DistinctSortedStreamTransform` in `EXPLAIN PIPELINE` → this improves memory usage significantly + remove unnecessary allocations in hot loop in `DistinctSortedChunkTransform`. [#39432](https://github.com/ClickHouse/ClickHouse/pull/39432) ([Igor Nikonov](https://github.com/devcrafter)). Use `DistinctSortedTransform` only when sort description is applicable to DISTINCT columns, otherwise fall back to ordinary DISTINCT implementation + it allows making less checks during `DistinctSortedTransform` execution. [#39528](https://github.com/ClickHouse/ClickHouse/pull/39528) ([Igor Nikonov](https://github.com/devcrafter)). Fix: `DistinctSortedTransform` didn't take advantage of sorting. It never cleared HashSet since clearing_columns were detected incorrectly (always empty). So, it basically worked as ordinary `DISTINCT` (`DistinctTransform`). The fix reduces memory usage significantly. [#39538](https://github.com/ClickHouse/ClickHouse/pull/39538) ([Igor Nikonov](https://github.com/devcrafter)). * Use local node as first priority to get structure of remote table when executing `cluster` and similar table functions. [#39440](https://github.com/ClickHouse/ClickHouse/pull/39440) ([Mingliang Pan](https://github.com/liangliangpan)). @@ -256,6 +263,7 @@ * Improve bytes to bits mask transform for SSE/AVX/AVX512. [#39586](https://github.com/ClickHouse/ClickHouse/pull/39586) ([Guo Wangyang](https://github.com/guowangy)). #### Improvement + * Normalize `AggregateFunction` types and state representations because optimizations like [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) will treat `count(not null columns)` as `count()`, which might confuses distributed interpreters with the following error : `Conversion from AggregateFunction(count) to AggregateFunction(count, Int64) is not supported`. [#39420](https://github.com/ClickHouse/ClickHouse/pull/39420) ([Amos Bird](https://github.com/amosbird)). The functions with identical states can be used in materialized views interchangeably. * Rework and simplify the `system.backups` table, remove the `internal` column, allow user to set the ID of operation, add columns `num_files`, `uncompressed_size`, `compressed_size`, `start_time`, `end_time`. [#39503](https://github.com/ClickHouse/ClickHouse/pull/39503) ([Vitaly Baranov](https://github.com/vitlibar)). * Improved structure of DDL query result table for `Replicated` database (separate columns with shard and replica name, more clear status) - `CREATE TABLE ... ON CLUSTER` queries can be normalized on initiator first if `distributed_ddl_entry_format_version` is set to 3 (default value). It means that `ON CLUSTER` queries may not work if initiator does not belong to the cluster that specified in query. Fixes [#37318](https://github.com/ClickHouse/ClickHouse/issues/37318), [#39500](https://github.com/ClickHouse/ClickHouse/issues/39500) - Ignore `ON CLUSTER` clause if database is `Replicated` and cluster name equals to database name. Related to [#35570](https://github.com/ClickHouse/ClickHouse/issues/35570) - Miscellaneous minor fixes for `Replicated` database engine - Check metadata consistency when starting up `Replicated` database, start replica recovery in case of mismatch of local metadata and metadata in Keeper. Resolves [#24880](https://github.com/ClickHouse/ClickHouse/issues/24880). [#37198](https://github.com/ClickHouse/ClickHouse/pull/37198) ([Alexander Tokmakov](https://github.com/tavplubix)). @@ -294,6 +302,7 @@ * Add support for LARGE_BINARY/LARGE_STRING with Arrow (Closes [#32401](https://github.com/ClickHouse/ClickHouse/issues/32401)). [#40293](https://github.com/ClickHouse/ClickHouse/pull/40293) ([Josh Taylor](https://github.com/joshuataylor)). #### Build/Testing/Packaging Improvement + * [ClickFiddle](https://fiddle.clickhouse.com/): A new tool for testing ClickHouse versions in read/write mode (**Igor Baliuk**). * ClickHouse binary is made self-extracting [#35775](https://github.com/ClickHouse/ClickHouse/pull/35775) ([Yakov Olkhovskiy, Arthur Filatenkov](https://github.com/yakov-olkhovskiy)). * Update tzdata to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently, after it falls back on 2022-09-21. There are corrections of the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -308,6 +317,7 @@ * Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). #### Bug Fix + * Fix possible segfault in `CapnProto` input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)). * Fix a very rare case of incorrect behavior of array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -358,16 +368,17 @@ * A fix for reverse DNS resolution. [#40134](https://github.com/ClickHouse/ClickHouse/pull/40134) ([Arthur Passos](https://github.com/arthurpassos)). * Fix unexpected result `arrayDifference` of `Array(UInt32). [#40211](https://github.com/ClickHouse/ClickHouse/pull/40211) ([Duc Canh Le](https://github.com/canhld94)). - ### ClickHouse release 22.7, 2022-07-21 #### Upgrade Notes + * Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Disable `format_csv_allow_single_quotes` by default. See [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). ([Kruglov Pavel](https://github.com/Avogar)). * `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)). * Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back. #### New Feature + * Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)). * Add new `direct` join algorithm for `EmbeddedRocksDB` tables, see [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)). * Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)). @@ -395,9 +406,11 @@ * Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). #### Experimental Feature + * Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)). #### Performance Improvement + * Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)). * Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)). * More parallel execution for queries with `FINAL` [#36396](https://github.com/ClickHouse/ClickHouse/pull/36396) ([Nikita Taranov](https://github.com/nickitat)). @@ -407,7 +420,7 @@ * Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)). * Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)). * Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)). * Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)). * Add AVX-512 VBMI optimized `copyOverlap32Shuffle` for LZ4 decompression. In other words, LZ4 decompression performance is improved. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)). * `ORDER BY (a, b)` will use all the same benefits as `ORDER BY a, b`. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)). @@ -419,6 +432,7 @@ * The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### Improvement + * Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). * Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). * Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). @@ -464,6 +478,7 @@ * Allow to declare `RabbitMQ` queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)). #### Build/Testing/Packaging Improvement + * Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)). * Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Preparation for building on `s390x` platform. [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). @@ -473,6 +488,7 @@ * Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). #### Bug Fix (user-visible misbehavior in official stable or prestable release) + * Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)). * Fixed crash caused by data race in storage `Hive` (integration table engine). [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)). * Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)). @@ -529,6 +545,7 @@ ### ClickHouse release 22.6, 2022-06-16 #### Backward Incompatible Change + * Remove support for octal number literals in SQL. In previous versions they were parsed as Float64. [#37765](https://github.com/ClickHouse/ClickHouse/pull/37765) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * Changes how settings using `seconds` as type are parsed to support floating point values (for example: `max_execution_time=0.5`). Infinity or NaN values will throw an exception. [#37187](https://github.com/ClickHouse/ClickHouse/pull/37187) ([Raúl Marín](https://github.com/Algunenano)). * Changed format of binary serialization of columns of experimental type `Object`. New format is more convenient to implement by third-party clients. [#37482](https://github.com/ClickHouse/ClickHouse/pull/37482) ([Anton Popov](https://github.com/CurtizJ)). @@ -537,6 +554,7 @@ * If you run different ClickHouse versions on a cluster with AArch64 CPU or mix AArch64 and amd64 on a cluster, and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, and the size of the result is huge, the data will not be fully aggregated in the result of these queries during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade. #### New Feature + * Add `GROUPING` function. It allows to disambiguate the records in the queries with `ROLLUP`, `CUBE` or `GROUPING SETS`. Closes [#19426](https://github.com/ClickHouse/ClickHouse/issues/19426). [#37163](https://github.com/ClickHouse/ClickHouse/pull/37163) ([Dmitry Novik](https://github.com/novikd)). * A new codec [FPC](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf) algorithm for floating point data compression. [#37553](https://github.com/ClickHouse/ClickHouse/pull/37553) ([Mikhail Guzov](https://github.com/koloshmet)). * Add new columnar JSON formats: `JSONColumns`, `JSONCompactColumns`, `JSONColumnsWithMetadata`. Closes [#36338](https://github.com/ClickHouse/ClickHouse/issues/36338) Closes [#34509](https://github.com/ClickHouse/ClickHouse/issues/34509). [#36975](https://github.com/ClickHouse/ClickHouse/pull/36975) ([Kruglov Pavel](https://github.com/Avogar)). @@ -557,11 +575,13 @@ * Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)). #### Experimental Feature + * Enables `POPULATE` for `WINDOW VIEW`. [#36945](https://github.com/ClickHouse/ClickHouse/pull/36945) ([vxider](https://github.com/Vxider)). * `ALTER TABLE ... MODIFY QUERY` support for `WINDOW VIEW`. [#37188](https://github.com/ClickHouse/ClickHouse/pull/37188) ([vxider](https://github.com/Vxider)). * This PR changes the behavior of the `ENGINE` syntax in `WINDOW VIEW`, to make it like in `MATERIALIZED VIEW`. [#37214](https://github.com/ClickHouse/ClickHouse/pull/37214) ([vxider](https://github.com/Vxider)). #### Performance Improvement + * Added numerous optimizations for ARM NEON [#38093](https://github.com/ClickHouse/ClickHouse/pull/38093)([Daniel Kutenin](https://github.com/danlark1)), ([Alexandra Pilipyuk](https://github.com/chalice19)) Note: if you run different ClickHouse versions on a cluster with ARM CPU and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, the result of the aggregation query will be wrong during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade. * Improve performance and memory usage for select of subset of columns for formats Native, Protobuf, CapnProto, JSONEachRow, TSKV, all formats with suffixes WithNames/WithNamesAndTypes. Previously while selecting only subset of columns from files in these formats all columns were read and stored in memory. Now only required columns are read. This PR enables setting `input_format_skip_unknown_fields` by default, because otherwise in case of select of subset of columns exception will be thrown. [#37192](https://github.com/ClickHouse/ClickHouse/pull/37192) ([Kruglov Pavel](https://github.com/Avogar)). * Now more filters can be pushed down for join. [#37472](https://github.com/ClickHouse/ClickHouse/pull/37472) ([Amos Bird](https://github.com/amosbird)). @@ -592,6 +612,7 @@ * In function: CompressedWriteBuffer::nextImpl(), there is an unnecessary write-copy step that would happen frequently during inserting data. Below shows the differentiation with this patch: - Before: 1. Compress "working_buffer" into "compressed_buffer" 2. write-copy into "out" - After: Directly Compress "working_buffer" into "out". [#37242](https://github.com/ClickHouse/ClickHouse/pull/37242) ([jasperzhu](https://github.com/jinjunzh)). #### Improvement + * Support types with non-standard defaults in ROLLUP, CUBE, GROUPING SETS. Closes [#37360](https://github.com/ClickHouse/ClickHouse/issues/37360). [#37667](https://github.com/ClickHouse/ClickHouse/pull/37667) ([Dmitry Novik](https://github.com/novikd)). * Fix stack traces collection on ARM. Closes [#37044](https://github.com/ClickHouse/ClickHouse/issues/37044). Closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#37797](https://github.com/ClickHouse/ClickHouse/pull/37797) ([Maksim Kita](https://github.com/kitaisreal)). * Client will try every IP address returned by DNS resolution until successful connection. [#37273](https://github.com/ClickHouse/ClickHouse/pull/37273) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). @@ -633,6 +654,7 @@ * Add implicit grants with grant option too. For example `GRANT CREATE TABLE ON test.* TO A WITH GRANT OPTION` now allows `A` to execute `GRANT CREATE VIEW ON test.* TO B`. [#38017](https://github.com/ClickHouse/ClickHouse/pull/38017) ([Vitaly Baranov](https://github.com/vitlibar)). #### Build/Testing/Packaging Improvement + * Use `clang-14` and LLVM infrastructure version 14 for builds. This closes [#34681](https://github.com/ClickHouse/ClickHouse/issues/34681). [#34754](https://github.com/ClickHouse/ClickHouse/pull/34754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: `clang-14` has [a bug](https://github.com/google/sanitizers/issues/1540) in ThreadSanitizer that makes our CI work worse. * Allow to drop privileges at startup. This simplifies Docker images. Closes [#36293](https://github.com/ClickHouse/ClickHouse/issues/36293). [#36341](https://github.com/ClickHouse/ClickHouse/pull/36341) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Add docs spellcheck to CI. [#37790](https://github.com/ClickHouse/ClickHouse/pull/37790) ([Vladimir C](https://github.com/vdimir)). @@ -690,7 +712,6 @@ * Fix possible heap-use-after-free error when reading system.projection_parts and system.projection_parts_columns . This fixes [#37184](https://github.com/ClickHouse/ClickHouse/issues/37184). [#37185](https://github.com/ClickHouse/ClickHouse/pull/37185) ([Amos Bird](https://github.com/amosbird)). * Fixed `DateTime64` fractional seconds behavior prior to Unix epoch. [#37697](https://github.com/ClickHouse/ClickHouse/pull/37697) ([Andrey Zvonov](https://github.com/zvonand)). [#37039](https://github.com/ClickHouse/ClickHouse/pull/37039) ([æŽæ‰¬](https://github.com/taiyang-li)). - ### ClickHouse release 22.5, 2022-05-19 #### Upgrade Notes @@ -743,7 +764,7 @@ * Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)). #### Improvement - + * Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)). * Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)). @@ -852,7 +873,6 @@ * Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)). * Fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). - ### ClickHouse release 22.4, 2022-04-19 #### Backward Incompatible Change @@ -1004,8 +1024,7 @@ * Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)). * Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)). - -## ClickHouse release v22.3-lts, 2022-03-17 +### ClickHouse release v22.3-lts, 2022-03-17 #### Backward Incompatible Change @@ -1132,7 +1151,6 @@ * Fix incorrect result of trivial count query when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). * Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). - ### ClickHouse release v22.2, 2022-02-17 #### Upgrade Notes @@ -1308,7 +1326,6 @@ * Fix issue [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)). * This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). - ### ClickHouse release v22.1, 2022-01-18 #### Upgrade Notes @@ -1335,7 +1352,6 @@ * Add function `decodeURLFormComponent` slightly different to `decodeURLComponent`. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). * Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). - #### Performance Improvement * Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). @@ -1351,7 +1367,6 @@ * Optimize selecting of MergeTree parts that can be moved between volumes. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([OnePiece](https://github.com/zhongyuankai)). * Fix `sparse_hashed` dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). - #### Experimental Feature * Parallel reading from multiple replicas within a shard during distributed query without using sample key. To enable this, set `allow_experimental_parallel_reading_from_replicas = 1` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). @@ -1364,7 +1379,6 @@ * Fix ACL with explicit digit hash in `clickhouse-keeper`: now the behavior consistent with ZooKeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([å°è·¯](https://github.com/nicelulu)). [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246). * Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). - #### Improvement * Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. @@ -1411,7 +1425,6 @@ * Updating `modification_time` for data part in `system.parts` after part movement [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). * Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). - #### Build/Testing/Packaging Improvement * Add packages, functional tests and Docker builds for AArch64 (ARM) version of ClickHouse. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) @@ -1426,7 +1439,6 @@ * Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([taiyang-li](https://github.com/taiyang-li)). * Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([alexey-milovidov](https://github.com/alexey-milovidov)). - #### Bug Fix (user-visible misbehavior in official stable or prestable release) * Several fixes for format parsing. This is relevant if `clickhouse-server` is open for write access to adversary. Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). Fixed Apache Avro Union type index out of boundary issue in Apache Avro binary format. [#33022](https://github.com/ClickHouse/ClickHouse/pull/33022) ([Harry Lee](https://github.com/HarryLeeIBM)). Fix null pointer dereference in `LowCardinality` data when deserializing `LowCardinality` data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). @@ -1485,5 +1497,4 @@ * Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix hang up with command `DROP TABLE system.query_log sync`. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). - ## [Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021) From 4db28d0bd6d8ff2eaceedbb153bd6921d93a2a52 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 19 Oct 2022 14:01:26 +0000 Subject: [PATCH 176/252] Add toStableRelativeHourNum to gtest_DateLUTImpl.cpp --- src/Common/tests/gtest_DateLUTImpl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 49013625ed3..aca17ae4f93 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -134,6 +134,7 @@ TEST(DateLUTTest, TimeValuesInMiddleOfRange) EXPECT_EQ(lut.toRelativeMonthNum(time), 24237 /*unsigned*/); EXPECT_EQ(lut.toRelativeQuarterNum(time), 8078 /*unsigned*/); EXPECT_EQ(lut.toRelativeHourNum(time), 435736 /*time_t*/); + EXPECT_EQ(lut.toStableRelativeHourNum(time), 435757 /*time_t*/); EXPECT_EQ(lut.toRelativeMinuteNum(time), 26144180 /*time_t*/); EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 1568650680 /*time_t*/); EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 1568650811 /*time_t*/); @@ -196,6 +197,7 @@ TEST(DateLUTTest, TimeValuesAtLeftBoderOfRange) EXPECT_EQ(lut.toRelativeMonthNum(time), 23641 /*unsigned*/); // ? EXPECT_EQ(lut.toRelativeQuarterNum(time), 7880 /*unsigned*/); // ? EXPECT_EQ(lut.toRelativeHourNum(time), 0 /*time_t*/); + EXPECT_EQ(lut.toStableRelativeHourNum(time), 24 /*time_t*/); EXPECT_EQ(lut.toRelativeMinuteNum(time), 0 /*time_t*/); EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 0 /*time_t*/); EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 0 /*time_t*/); @@ -259,6 +261,7 @@ TEST(DateLUTTest, TimeValuesAtRightBoderOfRangeOfOldLUT) EXPECT_EQ(lut.toRelativeMonthNum(time), 25273 /*unsigned*/); EXPECT_EQ(lut.toRelativeQuarterNum(time), 8424 /*unsigned*/); EXPECT_EQ(lut.toRelativeHourNum(time), 1192873 /*time_t*/); + EXPECT_EQ(lut.toStableRelativeHourNum(time), 1192897 /*time_t*/); EXPECT_EQ(lut.toRelativeMinuteNum(time), 71572397 /*time_t*/); EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 4294343520 /*time_t*/); EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 4294343872 /*time_t*/); From b0dd95447dda159740fac2236c9fe23faf10f6cd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 17:36:08 +0300 Subject: [PATCH 177/252] Update src/Functions/GregorianDate.h Co-authored-by: Antonio Andelic --- src/Functions/GregorianDate.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index da6a24268f7..5edaffd2b15 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -329,7 +329,8 @@ namespace DB } template - OrdinalDate::OrdinalDate(is_integer auto modified_julian_day) + template + OrdinalDate::OrdinalDate(TDay modified_julian_day) { /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). From 898c935d3137be02f6ba59ca803a835fbf5ddb60 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 17:36:49 +0300 Subject: [PATCH 178/252] Update GregorianDate.h --- src/Functions/GregorianDate.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index 5edaffd2b15..248ff014fa4 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -328,17 +328,16 @@ namespace DB } } - template template OrdinalDate::OrdinalDate(TDay modified_julian_day) { /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). - if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) + if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) if (modified_julian_day < -678941) modified_julian_day = -678941; - if constexpr (std::numeric_limits::max() > 2973119) + if constexpr (std::numeric_limits::max() > 2973119) if (modified_julian_day > 2973119) modified_julian_day = 2973119; From 8942c18caa8a673ef22d0950fda6fce169843682 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 19 Oct 2022 17:10:31 +0200 Subject: [PATCH 179/252] fix test --- tests/queries/0_stateless/02448_clone_replica_lost_part.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql index 14b75d4c322..371f7389837 100644 --- a/tests/queries/0_stateless/02448_clone_replica_lost_part.sql +++ b/tests/queries/0_stateless/02448_clone_replica_lost_part.sql @@ -122,7 +122,7 @@ detach table rmt1; -- create a gap in block numbers buy dropping part insert into rmt2 values (300); -alter table rmt2 drop part 'all_19_19_0'; +alter table rmt2 drop part 'all_19_19_0'; -- remove 200 insert into rmt2 values (400); insert into rmt2 values (500); insert into rmt2 values (600); @@ -135,8 +135,8 @@ select sleep(2) format Null; -- increases probability of reproducing the issue -- rmt1 will mimic rmt2, but will not be able to fetch parts for a while system stop replicated sends rmt2; attach table rmt1; --- rmt1 should not show the value (100) from dropped part -select throwIf(n = 100) from rmt1 format Null; +-- rmt1 should not show the value (200) from dropped part +select throwIf(n = 200) from rmt1 format Null; select 11, arraySort(groupArray(n)) from rmt2; system start replicated sends rmt2; From 0338fd4e8d2d6298d71f21c9edd17ff4b1020faa Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Wed, 19 Oct 2022 17:19:00 +0200 Subject: [PATCH 180/252] Update 00463_long_sessions_in_http_interface.reference It is really should be 1 here. The query fails with exception "DB::Exception: Table default.t doesn't exist". The count of matches Exception is 1. --- .../0_stateless/00463_long_sessions_in_http_interface.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference index 8d97a12a7f1..a14d334a483 100644 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.reference @@ -18,7 +18,7 @@ World And cannot be accessed for a non-existent user: 1 The temporary tables created in a session are not accessible without entering this session: -0 +1 A session successfully expire after a timeout: 111 A session successfully expire after a timeout and the session's temporary table shadows the permanent table: From e4f93149ff4c671b6e6aec3ce78e550ef5e27edf Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 19 Oct 2022 17:24:50 +0200 Subject: [PATCH 181/252] fix another issue --- tests/clickhouse-test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 12f85a5adbf..20e63412d91 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -987,7 +987,7 @@ class TestCase: and (proc.stderr is None) and (proc.stdout is None or "Exception" not in proc.stdout) ) - need_drop_database = not maybe_passed + need_drop_database = maybe_passed debug_log = "" if os.path.exists(self.testcase_args.debug_log_file): @@ -2055,7 +2055,7 @@ if __name__ == "__main__": parser.add_argument( "--no-drop-if-fail", action="store_true", - help="Do not drop database for test if test has failed", + help="Do not drop database for test if test has failed (does not work if reference file mismatch)", ) parser.add_argument( "--hide-db-name", From fabc8f5a1833167c6cbc5f6566157304b3b51a15 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Oct 2022 23:27:12 +0200 Subject: [PATCH 182/252] Remove support for {database} macro from the client's prompt --- programs/client/clickhouse-client.xml | 1 - src/Client/ClientBase.cpp | 7 ++----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index 66e7afd8f8c..00f5b26eddf 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -19,7 +19,6 @@ {host} {port} {user} - {database} {display_name} Terminal colors: https://misc.flogisoft.com/bash/tip_colors_and_formatting See also: https://wiki.hackzine.org/development/misc/readline-color-prompt.html diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 0a2fbcf9f46..0db7a9533db 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include @@ -9,7 +8,6 @@ #include "config.h" #include -#include #include #include #include @@ -32,7 +30,6 @@ #include #include #include -#include #include #include @@ -70,10 +67,10 @@ #include #include #include -#include #include #include + namespace fs = std::filesystem; using namespace std::literals; @@ -1925,7 +1922,7 @@ bool ClientBase::processQueryText(const String & text) String ClientBase::prompt() const { - return boost::replace_all_copy(prompt_by_server_display_name, "{database}", config().getString("database", "default")); + return prompt_by_server_display_name; } From 81750a81e768eabaf772e5be259064db3f4fb26b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Oct 2022 01:17:11 +0200 Subject: [PATCH 183/252] Add a test for #16827 --- .../0_stateless/02467_cross_join_three_table_functions.reference | 1 + .../0_stateless/02467_cross_join_three_table_functions.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/02467_cross_join_three_table_functions.reference create mode 100644 tests/queries/0_stateless/02467_cross_join_three_table_functions.sql diff --git a/tests/queries/0_stateless/02467_cross_join_three_table_functions.reference b/tests/queries/0_stateless/02467_cross_join_three_table_functions.reference new file mode 100644 index 00000000000..0718dd8e65f --- /dev/null +++ b/tests/queries/0_stateless/02467_cross_join_three_table_functions.reference @@ -0,0 +1 @@ +1320 diff --git a/tests/queries/0_stateless/02467_cross_join_three_table_functions.sql b/tests/queries/0_stateless/02467_cross_join_three_table_functions.sql new file mode 100644 index 00000000000..5c7da815bbe --- /dev/null +++ b/tests/queries/0_stateless/02467_cross_join_three_table_functions.sql @@ -0,0 +1 @@ +SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c; From b4d241b54dd5abe797e618521707df4dacdd35c0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Oct 2022 01:39:08 +0200 Subject: [PATCH 184/252] Add a test for #13653 --- tests/queries/0_stateless/02468_has_any_tuple.reference | 4 ++++ tests/queries/0_stateless/02468_has_any_tuple.sql | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/02468_has_any_tuple.reference create mode 100644 tests/queries/0_stateless/02468_has_any_tuple.sql diff --git a/tests/queries/0_stateless/02468_has_any_tuple.reference b/tests/queries/0_stateless/02468_has_any_tuple.reference new file mode 100644 index 00000000000..252a9293563 --- /dev/null +++ b/tests/queries/0_stateless/02468_has_any_tuple.reference @@ -0,0 +1,4 @@ +1 +1 +[(3,3)] +1 diff --git a/tests/queries/0_stateless/02468_has_any_tuple.sql b/tests/queries/0_stateless/02468_has_any_tuple.sql new file mode 100644 index 00000000000..12c7222d593 --- /dev/null +++ b/tests/queries/0_stateless/02468_has_any_tuple.sql @@ -0,0 +1,4 @@ +select [(toUInt8(3), toUInt8(3))] = [(toInt16(3), toInt16(3))]; +select hasAny([(toInt16(3), toInt16(3))],[(toInt16(3), toInt16(3))]); +select arrayFilter(x -> x = (toInt16(3), toInt16(3)), arrayZip([toUInt8(3)], [toUInt8(3)])); +select hasAny([(toUInt8(3), toUInt8(3))],[(toInt16(3), toInt16(3))]); From 00f9ae99249c636320141607ee51dac170ae6938 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Oct 2022 04:42:35 +0200 Subject: [PATCH 185/252] Correct documentation for settings --- src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 07618ee731d..0b8d24b1abc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -331,8 +331,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, max_bytes_before_remerge_sort, 1000000000, "In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows.", 0) \ M(Float, remerge_sort_lowered_memory_bytes_ratio, 2., "If memory usage after remerge does not reduced by this ratio, remerge will be disabled.", 0) \ \ - M(UInt64, max_result_rows, 0, "Limit on result size in rows. Also checked for intermediate data sent from remote servers.", 0) \ - M(UInt64, max_result_bytes, 0, "Limit on result size in bytes (uncompressed). Also checked for intermediate data sent from remote servers.", 0) \ + M(UInt64, max_result_rows, 0, "Limit on result size in rows. The query will stop after processing a block of data if the threshold is met, but it will not cut the last block of the result, therefore the result size can be larger than the threshold.", 0) \ + M(UInt64, max_result_bytes, 0, "Limit on result size in bytes (uncompressed). The query will stop after processing a block of data if the threshold is met, but it will not cut the last block of the result, therefore the result size can be larger than the threshold. Caveats: the result size in memory is taken into account for this threshold. Even if the result size is small, it can reference larger data structures in memory, representing dictionaries of LowCardinality columns, and Arenas of AggregateFunction columns, so the threshold can be exceeded despite the small result size. The setting is fairly low level and should be used with caution.", 0) \ M(OverflowMode, result_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ \ /* TODO: Check also when merging and finalizing aggregate functions. */ \ From 9e59735b8137f2eb9fb70c08b5e50c970549eae1 Mon Sep 17 00:00:00 2001 From: KinderRiven <1339764596@qq.com> Date: Thu, 20 Oct 2022 03:13:15 +0000 Subject: [PATCH 186/252] fix show cache --- tests/queries/0_stateless/02344_show_caches.reference | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02344_show_caches.reference b/tests/queries/0_stateless/02344_show_caches.reference index 0c5957edb82..68882f63e1f 100644 --- a/tests/queries/0_stateless/02344_show_caches.reference +++ b/tests/queries/0_stateless/02344_show_caches.reference @@ -1,12 +1,13 @@ cached_azure s3_cache_2 +s3_cache +s3_cache_3 +s3_cache_multi s3_cache_4 s3_cache_5 local_cache +s3_cache_6 s3_cache_small local_cache_2 local_cache_3 -s3_cache_multi -s3_cache_3 -s3_cache s3_cache_multi_2 From e2417eb518540b631f83b5c34adb00a0f136442c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 20 Oct 2022 09:33:48 +0200 Subject: [PATCH 187/252] tests: fix 00705_drop_create_merge_tree flakiness CI caught tiemout for this test [1]: 2022.10.19 16:43:46.238264 [ 24048 ] {aea0ff2a-f8de-498a-bd9f-0b8069a49f48} executeQuery: Code: 60. DB::Exception: Table test_orfkwn0y.table doesn't exist. (UNKNOWN_TABLE) (version 22.10.1.1) (from [::1]:60028) (comment: 00705_drop_create_merge_tree.sh) (in query: DROP TABLE table), Stack trace (when copying this message, always include the lines below): ... 2022.10.19 16:53:34.484777 [ 24042 ] {aec5a80a-4492-429b-87fb-7dbf5ffb5d67} executeQuery: (from [::1]:57944) (comment: 00705_drop_create_merge_tree.sh) DROP DATABASE test_orfkwn0y (stage: Complete) But as you can see there is huge delay between last query from the test and final DROP DATABASE. [1]: https://s3.amazonaws.com/clickhouse-test-reports/42457/65cd040d1565bb7b2a9ba515041c3a139d31a4f9/stateless_tests__tsan__[1/3]/runlog.log Apparently it is the same issue in bash [1]. [1]: https://gist.github.com/azat/affbda3f8c6b5c38648d4ab105777d88 Anyway it is easier to simply invoke clickhouse-client only two times, since each invocation is very slow (~1-2 sec) in debug build. Signed-off-by: Azat Khuzhin --- .../00705_drop_create_merge_tree.reference | 1 - .../00705_drop_create_merge_tree.sh | 33 ++----------------- 2 files changed, 3 insertions(+), 31 deletions(-) diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.reference b/tests/queries/0_stateless/00705_drop_create_merge_tree.reference index 8b137891791..e69de29bb2d 100644 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.reference +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.reference @@ -1 +0,0 @@ - diff --git a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh index 146d6e54c0b..d7754091290 100755 --- a/tests/queries/0_stateless/00705_drop_create_merge_tree.sh +++ b/tests/queries/0_stateless/00705_drop_create_merge_tree.sh @@ -1,39 +1,12 @@ #!/usr/bin/env bash # Tags: no-fasttest -set -e - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function stress() -{ - # We set up a signal handler to make sure to wait for all queries to be finished before ending - CONTINUE=true - handle_interruption() - { - CONTINUE=false - } - trap handle_interruption INT - - while $CONTINUE; do - ${CLICKHOUSE_CLIENT} --query "CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple()" 2>/dev/null - ${CLICKHOUSE_CLIENT} --query "DROP TABLE table" 2>/dev/null - done - - trap - INT -} - -# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout -export -f stress - -for _ in {1..5}; do - # Ten seconds are just barely enough to reproduce the issue in most of runs. - timeout -s INT 10 bash -c stress & -done - +yes 'CREATE TABLE IF NOT EXISTS table (x UInt8) ENGINE = MergeTree ORDER BY tuple();' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & +yes 'DROP TABLE table;' | head -n 1000 | $CLICKHOUSE_CLIENT --ignore-error -nm 2>/dev/null & wait -echo -${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table"; +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table" From 885f71d5f50356cf96106f8929d4226c3b221239 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Oct 2022 08:18:22 +0000 Subject: [PATCH 188/252] Replace is_extended_result by ResultPrecision --- src/Functions/DateTimeTransforms.h | 58 ++++++++++++++------------ src/Functions/dateDiff.cpp | 16 +++---- src/Functions/toRelativeDayNum.cpp | 2 +- src/Functions/toRelativeHourNum.cpp | 2 +- src/Functions/toRelativeMinuteNum.cpp | 2 +- src/Functions/toRelativeMonthNum.cpp | 2 +- src/Functions/toRelativeQuarterNum.cpp | 2 +- src/Functions/toRelativeSecondNum.cpp | 2 +- src/Functions/toRelativeWeekNum.cpp | 2 +- src/Functions/toRelativeYearNum.cpp | 2 +- 10 files changed, 48 insertions(+), 42 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index c4ade5facd5..fa66cb2a891 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1034,19 +1034,25 @@ struct ToISOWeekImpl using FactorTransform = ToISOYearImpl; }; -/// Unsigned results (is_extended_result = false) potentially lead to overflows when returning values. +enum class ResultPrecision +{ + Standard, + Extended +}; + +/// Standard precision results (precision_ == ResultPrecision::Standard) potentially lead to overflows when returning values. /// This mode is used by SQL functions "toRelative*Num()" which cannot easily be changed due to backward compatibility. /// According to documentation, these functions merely need to compute the time difference to a deterministic, fixed point in the past. /// As a future TODO, we should fix their behavior in a backwards-compatible way. /// See https://github.com/ClickHouse/ClickHouse/issues/41977#issuecomment-1267536814. -template +template struct ToRelativeYearNumImpl { static constexpr auto name = "toRelativeYearNum"; static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toYear(t)); else return static_cast(time_zone.toYear(t)); @@ -1057,7 +1063,7 @@ struct ToRelativeYearNumImpl } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toYear(ExtendedDayNum(d))); else return static_cast(time_zone.toYear(ExtendedDayNum(d))); @@ -1070,14 +1076,14 @@ struct ToRelativeYearNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeQuarterNumImpl { static constexpr auto name = "toRelativeQuarterNum"; static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeQuarterNum(t)); else return static_cast(time_zone.toRelativeQuarterNum(t)); @@ -1088,7 +1094,7 @@ struct ToRelativeQuarterNumImpl } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeQuarterNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeQuarterNum(ExtendedDayNum(d))); @@ -1101,14 +1107,14 @@ struct ToRelativeQuarterNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeMonthNumImpl { static constexpr auto name = "toRelativeMonthNum"; static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMonthNum(t)); else return static_cast(time_zone.toRelativeMonthNum(t)); @@ -1119,7 +1125,7 @@ struct ToRelativeMonthNumImpl } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMonthNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeMonthNum(ExtendedDayNum(d))); @@ -1132,14 +1138,14 @@ struct ToRelativeMonthNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeWeekNumImpl { static constexpr auto name = "toRelativeWeekNum"; static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeWeekNum(t)); else return static_cast(time_zone.toRelativeWeekNum(t)); @@ -1150,7 +1156,7 @@ struct ToRelativeWeekNumImpl } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeWeekNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeWeekNum(ExtendedDayNum(d))); @@ -1163,14 +1169,14 @@ struct ToRelativeWeekNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeDayNumImpl { static constexpr auto name = "toRelativeDayNum"; static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toDayNum(t)); else return static_cast(time_zone.toDayNum(t)); @@ -1181,7 +1187,7 @@ struct ToRelativeDayNumImpl } static inline auto execute(Int32 d, const DateLUTImpl &) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(static_cast(d)); else return static_cast(static_cast(d)); @@ -1194,35 +1200,35 @@ struct ToRelativeDayNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeHourNumImpl { static constexpr auto name = "toRelativeHourNum"; static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toStableRelativeHourNum(t)); else return static_cast(time_zone.toRelativeHourNum(t)); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return time_zone.toStableRelativeHourNum(static_cast(t)); else return time_zone.toRelativeHourNum(static_cast(t)); } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toStableRelativeHourNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeHourNum(ExtendedDayNum(d))); } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return time_zone.toStableRelativeHourNum(DayNum(d)); else return time_zone.toRelativeHourNum(DayNum(d)); @@ -1231,14 +1237,14 @@ struct ToRelativeHourNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeMinuteNumImpl { static constexpr auto name = "toRelativeMinuteNum"; static inline auto execute(Int64 t, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMinuteNum(t)); else return static_cast(time_zone.toRelativeMinuteNum(t)); @@ -1249,7 +1255,7 @@ struct ToRelativeMinuteNumImpl } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.toRelativeMinuteNum(ExtendedDayNum(d))); else return static_cast(time_zone.toRelativeMinuteNum(ExtendedDayNum(d))); @@ -1262,7 +1268,7 @@ struct ToRelativeMinuteNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeSecondNumImpl { static constexpr auto name = "toRelativeSecondNum"; @@ -1277,7 +1283,7 @@ struct ToRelativeSecondNumImpl } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { - if constexpr (is_extended_result) + if constexpr (precision_ == ResultPrecision::Extended) return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); else return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 479966ac9fd..b33fcf32de1 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -112,21 +112,21 @@ public: const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2); if (unit == "year" || unit == "yy" || unit == "yyyy") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "quarter" || unit == "qq" || unit == "q") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "month" || unit == "mm" || unit == "m") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "week" || unit == "wk" || unit == "ww") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "day" || unit == "dd" || unit == "d") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "hour" || unit == "hh" || unit == "h") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "minute" || unit == "mi" || unit == "n") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "second" || unit == "ss" || unit == "s") - dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); diff --git a/src/Functions/toRelativeDayNum.cpp b/src/Functions/toRelativeDayNum.cpp index e2dee0e305c..db3eb119dcf 100644 --- a/src/Functions/toRelativeDayNum.cpp +++ b/src/Functions/toRelativeDayNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeDayNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeDayNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeDayNum) { diff --git a/src/Functions/toRelativeHourNum.cpp b/src/Functions/toRelativeHourNum.cpp index 67e61735134..838b1bb1ca1 100644 --- a/src/Functions/toRelativeHourNum.cpp +++ b/src/Functions/toRelativeHourNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeHourNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeHourNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeHourNum) { diff --git a/src/Functions/toRelativeMinuteNum.cpp b/src/Functions/toRelativeMinuteNum.cpp index 5ead860131f..e9318517119 100644 --- a/src/Functions/toRelativeMinuteNum.cpp +++ b/src/Functions/toRelativeMinuteNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeMinuteNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeMinuteNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeMinuteNum) { diff --git a/src/Functions/toRelativeMonthNum.cpp b/src/Functions/toRelativeMonthNum.cpp index f845d036ff5..7b058c3ba12 100644 --- a/src/Functions/toRelativeMonthNum.cpp +++ b/src/Functions/toRelativeMonthNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeMonthNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeMonthNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeMonthNum) { diff --git a/src/Functions/toRelativeQuarterNum.cpp b/src/Functions/toRelativeQuarterNum.cpp index 2862a0aeff7..c7702d47f42 100644 --- a/src/Functions/toRelativeQuarterNum.cpp +++ b/src/Functions/toRelativeQuarterNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeQuarterNum) { diff --git a/src/Functions/toRelativeSecondNum.cpp b/src/Functions/toRelativeSecondNum.cpp index c7552d550b8..db80f721fbd 100644 --- a/src/Functions/toRelativeSecondNum.cpp +++ b/src/Functions/toRelativeSecondNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeSecondNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeSecondNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeSecondNum) { diff --git a/src/Functions/toRelativeWeekNum.cpp b/src/Functions/toRelativeWeekNum.cpp index c35eb18edaf..beca00d8cc4 100644 --- a/src/Functions/toRelativeWeekNum.cpp +++ b/src/Functions/toRelativeWeekNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeWeekNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeWeekNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeWeekNum) { diff --git a/src/Functions/toRelativeYearNum.cpp b/src/Functions/toRelativeYearNum.cpp index fb80957062b..b4fe3318129 100644 --- a/src/Functions/toRelativeYearNum.cpp +++ b/src/Functions/toRelativeYearNum.cpp @@ -7,7 +7,7 @@ namespace DB { -using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething>; +using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething>; REGISTER_FUNCTION(ToRelativeYearNum) { From 7bb94b7643aa15b2488c956135236eac1af3ba79 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 20 Oct 2022 09:21:39 +0200 Subject: [PATCH 189/252] Fix template typenames --- src/Functions/GregorianDate.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index 248ff014fa4..b91c1b6391d 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -89,7 +89,8 @@ namespace DB * integral type which should be at least 32 bits wide, and * should preferably signed. */ - explicit OrdinalDate(is_integer auto modified_julian_day); + template + explicit OrdinalDate(DayT modified_julian_day); /** Convert to Modified Julian Day. The type T is an integral * type which should be at least 32 bits wide, and should @@ -328,16 +329,17 @@ namespace DB } } - template - OrdinalDate::OrdinalDate(TDay modified_julian_day) + template + template + OrdinalDate::OrdinalDate(DayT modified_julian_day) { /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). - if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) + if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) if (modified_julian_day < -678941) modified_julian_day = -678941; - if constexpr (std::numeric_limits::max() > 2973119) + if constexpr (std::numeric_limits::max() > 2973119) if (modified_julian_day > 2973119) modified_julian_day = 2973119; From 87db1b534b4f6e4cfdab35e24407c0d0671c9de5 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Thu, 20 Oct 2022 10:50:32 +0200 Subject: [PATCH 190/252] Update 00463_long_sessions_in_http_interface.sh no -c option in grep in busy loop. --- .../0_stateless/00463_long_sessions_in_http_interface.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index 9bf8a0c297a..89da84a5bdd 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -79,7 +79,7 @@ ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABA # An infinite loop is required to make the test reliable. We will ensure that at least once the query on the line above has started before this check while true do - ${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.processes WHERE query_id = '${CLICKHOUSE_DATABASE}_9'" | grep -c -F '1' && break || sleep 1 + ${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.processes WHERE query_id = '${CLICKHOUSE_DATABASE}_9'" | grep -F '1' && break || sleep 1 done ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT 1" | grep -c -F 'Session is locked' From 31ad33561c7a52551ab99edf78a599510902c0c9 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Oct 2022 10:53:36 +0000 Subject: [PATCH 191/252] Add Date32 for dateName function --- src/Functions/dateName.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 3911b1cf838..e33d48c53e5 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -34,6 +35,11 @@ template <> struct DataTypeToTimeTypeMap using TimeType = UInt16; }; +template <> struct DataTypeToTimeTypeMap +{ + using TimeType = Int32; +}; + template <> struct DataTypeToTimeTypeMap { using TimeType = UInt32; @@ -83,7 +89,7 @@ public: WhichDataType first_argument_type(arguments[1].type); - if (!(first_argument_type.isDate() || first_argument_type.isDateTime() || first_argument_type.isDateTime64())) + if (!(first_argument_type.isDate() || first_argument_type.isDateTime() || first_argument_type.isDate32() || first_argument_type.isDateTime64())) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2 argument of function {}. Must be a date or a date with time", @@ -109,6 +115,7 @@ public: if (!((res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)) + || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)))) throw Exception( ErrorCodes::ILLEGAL_COLUMN, From 55df097ea3b5b4b1cfdaa6515f9fa710d09538b6 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Oct 2022 11:06:06 +0000 Subject: [PATCH 192/252] Remove extra toString in Exception formatting --- src/Functions/dateName.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index e33d48c53e5..36c0be49190 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -78,7 +78,7 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}", getName(), - toString(arguments.size())); + arguments.size()); if (!WhichDataType(arguments[0].type).isString()) throw Exception( @@ -114,8 +114,8 @@ public: ColumnPtr res; if (!((res = executeType(arguments, result_type)) - || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)) + || (res = executeType(arguments, result_type)) || (res = executeType(arguments, result_type)))) throw Exception( ErrorCodes::ILLEGAL_COLUMN, From 0d07aeec2f1393730ba1ac99a17db02a4f645b88 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 20 Oct 2022 13:09:02 +0200 Subject: [PATCH 193/252] Fix logical error from welchTTest (#42487) --- src/AggregateFunctions/AggregateFunctionWelchTTest.cpp | 10 +++++++++- .../0_stateless/02461_welch_t_test_fuzz.reference | 0 tests/queries/0_stateless/02461_welch_t_test_fuzz.sql | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02461_welch_t_test_fuzz.reference create mode 100644 tests/queries/0_stateless/02461_welch_t_test_fuzz.sql diff --git a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp index 74000296a2d..3a72e0e92bb 100644 --- a/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionWelchTTest.cpp @@ -40,7 +40,15 @@ struct WelchTTestData : public TTestMoments Float64 denominator_x = sx2 * sx2 / (nx * nx * (nx - 1)); Float64 denominator_y = sy2 * sy2 / (ny * ny * (ny - 1)); - return numerator / (denominator_x + denominator_y); + auto result = numerator / (denominator_x + denominator_y); + + if (result <= 0 || std::isinf(result) || isNaN(result)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot calculate p_value, because the t-distribution \ + has inappropriate value of degrees of freedom (={}). It should be > 0", result); + + return result; } std::tuple getResult() const diff --git a/tests/queries/0_stateless/02461_welch_t_test_fuzz.reference b/tests/queries/0_stateless/02461_welch_t_test_fuzz.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02461_welch_t_test_fuzz.sql b/tests/queries/0_stateless/02461_welch_t_test_fuzz.sql new file mode 100644 index 00000000000..b22dc49dec3 --- /dev/null +++ b/tests/queries/0_stateless/02461_welch_t_test_fuzz.sql @@ -0,0 +1,8 @@ + +DROP TABLE IF EXISTS welch_ttest__fuzz_7; +CREATE TABLE welch_ttest__fuzz_7 (left UInt128, right UInt128) ENGINE = Memory; + +INSERT INTO welch_ttest__fuzz_7 VALUES (0.010268, 0), (0.000167, 0), (0.000167, 0), (0.159258, 1), (0.136278, 1), (0.122389, 1); + +SELECT roundBankers(welchTTest(left, right).2, 6) from welch_ttest__fuzz_7; -- { serverError 36 } +SELECT roundBankers(studentTTest(left, right).2, 6) from welch_ttest__fuzz_7; -- { serverError 36 } From 52eefe1ffd5ef4b1940605ba6d0b7141c8530ab7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 20 Oct 2022 13:25:48 +0000 Subject: [PATCH 194/252] Fix 02403_enable_extended_results_for_datetime_functions --- .../functions/date-time-functions.md | 8 +---- ...d_results_for_datetime_functions.reference | 32 +++++++++---------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 368ddce1cfe..351c5859bf9 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -969,13 +969,7 @@ SELECT now('Europe/Moscow'); ## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size} Ð”Ð»Ñ Ð¸Ð½Ñ‚ÐµÑ€Ð²Ð°Ð»Ð°, начинающегоÑÑ Ð² `StartTime` и длÑщегоÑÑ `Duration` Ñекунд, возвращает маÑÑив моментов времени, кратных `Size`. Параметр `Size` указывать необÑзательно, по умолчанию он равен 1800 Ñекундам (30 минутам) - необÑзательный параметр. -<<<<<<< HEAD -Ð”Ð°Ð½Ð½Ð°Ñ Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð¼Ð¾Ð¶ÐµÑ‚ иÑпользоватьÑÑ, например, Ð´Ð»Ñ Ð°Ð½Ð°Ð»Ð¸Ð·Ð° количеÑтва проÑмотров Ñтраницы за ÑоответÑтвующую ÑеÑÑию. -Ðргумент `StartTime` может иметь тип `DateTime` или `DateTime64`. Ð’ Ñлучае, еÑли иÑпользуетÑÑ `DateTime`, аргументы `Duration` и `Size` должны иметь тип `UInt32`; Ð”Ð»Ñ DateTime64 они должны быть типа `Decimal64`. -======= -Ð”Ð°Ð½Ð½Ð°Ñ Ñ„ÑƒÐ½ÐºÑ†Ð¸Ñ Ð¼Ð¾Ð¶ÐµÑ‚ иÑпользоватьÑÑ, например, Ð´Ð»Ñ Ð°Ð½Ð°Ð»Ð¸Ð·Ð° количеÑтва проÑмотров Ñтраницы за ÑоответÑтвующую ÑеÑÑию. -Ðргумент `StartTime` может иметь тип `DateTime` или `DateTime64`. Ð’ Ñлучае, еÑли иÑпользуетÑÑ `DateTime`, аргументы `Duration` и `Size` должны иметь тип `UInt32`; Ð”Ð»Ñ DateTime64 они должны быть типа `Decimal64`. ->>>>>>> parent of df934d8762 (Merge pull request #40217 from zvonand/zvonand-minmax) + Возвращает маÑÑив DateTime/DateTime64 (тип будет Ñовпадать Ñ Ñ‚Ð¸Ð¿Ð¾Ð¼ параметра ’StartTime’). Ð”Ð»Ñ DateTime64 маÑштаб(scale) возвращаемой величины может отличатьÑÑ Ð¾Ñ‚ маÑштаба фргумента ’StartTime’ --- результат будет иметь наибольший маÑштаб Ñреди вÑех данных аргументов. Пример иÑпользованиÑ: diff --git a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference index 5773810bf64..025191c234a 100644 --- a/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference +++ b/tests/queries/0_stateless/02403_enable_extended_results_for_datetime_functions.reference @@ -42,39 +42,39 @@ timeSlot;toDateTime64;true 1920-02-02 10:00:00.000 type;timeSlot;toDateTime64;true DateTime64(3, \'UTC\') toStartOfDay;toDate32;true 1920-02-02 00:00:00.000 type;toStartOfDay;toDate32;true DateTime64(3, \'UTC\') -toStartOfYear;toDate32;false 1970-01-01 +toStartOfYear;toDate32;false 2099-06-06 type;toStartOfYear;toDate32;false Date -toStartOfYear;toDateTime64;false 1970-01-01 +toStartOfYear;toDateTime64;false 2099-06-06 type;toStartOfYear;toDateTime64;false Date toStartOfISOYear;toDate32;false 1970-01-01 type;toStartOfISOYear;toDate32;false Date toStartOfISOYear;toDateTime64;false 1970-01-01 type;toStartOfISOYear;toDateTime64;false Date -toStartOfQuarter;toDate32;false 1970-01-01 +toStartOfQuarter;toDate32;false 2099-06-06 type;toStartOfQuarter;toDate32;false Date -toStartOfQuarter;toDateTime64;false 1970-01-01 +toStartOfQuarter;toDateTime64;false 2099-06-06 type;toStartOfQuarter;toDateTime64;false Date -toStartOfMonth;toDate32;false 1970-01-01 +toStartOfMonth;toDate32;false 2099-07-07 type;toStartOfMonth;toDate32;false Date -toStartOfMonth;toDateTime64;false 1970-01-01 +toStartOfMonth;toDateTime64;false 2099-07-07 type;toStartOfMonth;toDateTime64;false Date -toStartOfWeek;toDate32;false 1970-01-01 +toStartOfWeek;toDate32;false 2099-07-07 type;toStartOfWeek;toDate32;false Date -toStartOfWeek;toDateTime64;false 1970-01-01 +toStartOfWeek;toDateTime64;false 2099-07-07 type;toStartOfWeek;toDateTime64;false Date -toMonday;toDate32;false 1970-01-01 +toMonday;toDate32;false 2099-07-08 type;toMonday;toDate32;false Date -toMonday;toDateTime64;false 1970-01-01 +toMonday;toDateTime64;false 2099-07-08 type;toMonday;toDateTime64;false Date -toLastDayOfMonth;toDate32;false 1970-01-01 +toLastDayOfMonth;toDate32;false 2099-08-04 type;toLastDayOfMonth;toDate32;false Date -toLastDayOfMonth;toDateTime64;false 1970-01-01 +toLastDayOfMonth;toDateTime64;false 2099-08-04 type;toLastDayOfMonth;toDateTime64;false Date -toStartOfDay;toDateTime64;false 1970-01-01 00:00:00 +toStartOfDay;toDateTime64;false 2056-03-09 06:28:16 type;toStartOfDay;toDateTime64;false DateTime(\'UTC\') -toStartOfHour;toDateTime64;false 1970-01-01 00:00:00 +toStartOfHour;toDateTime64;false 2056-03-09 16:28:16 type;toStartOfHour;toDateTime64;false DateTime(\'UTC\') -toStartOfMinute;toDateTime64;false 1970-01-01 00:00:00 +toStartOfMinute;toDateTime64;false 2056-03-09 16:51:16 type;toStartOfMinute;toDateTime64;false DateTime(\'UTC\') toStartOfFiveMinutes;toDateTime64;false 2056-03-09 16:48:16 type;toStartOfFiveMinutes;toDateTime64;false DateTime(\'UTC\') @@ -84,5 +84,5 @@ toStartOfFifteenMinutes;toDateTime64;false 2056-03-09 16:43:16 type;toStartOfFifteenMinutes;toDateTime64;false DateTime(\'UTC\') timeSlot;toDateTime64;false 2056-03-09 16:58:16 type;timeSlot;toDateTime64;false DateTime(\'UTC\') -toStartOfDay;toDate32;false 1970-01-01 00:00:00 +toStartOfDay;toDate32;false 2056-03-09 06:28:16 type;toStartOfDay;toDate32;false DateTime(\'UTC\') From 5a725a218286da161e60cc6543a7b5e4ec158b99 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Oct 2022 13:43:01 +0000 Subject: [PATCH 195/252] Add tests for Date32 --- .../0_stateless/01811_datename.reference | 14 ++++++------- tests/queries/0_stateless/01811_datename.sql | 21 ++++++++++++------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/01811_datename.reference b/tests/queries/0_stateless/01811_datename.reference index 2968fde301a..29bf05750e7 100644 --- a/tests/queries/0_stateless/01811_datename.reference +++ b/tests/queries/0_stateless/01811_datename.reference @@ -1,10 +1,10 @@ -2021 2021 2021 -2 2 2 -April April April -104 104 104 -14 14 14 -15 15 15 -Wednesday Wednesday Wednesday +2021 2021 2021 2021 +2 2 2 2 +April April April April +104 104 104 104 +14 14 14 14 +15 15 15 15 +Wednesday Wednesday Wednesday Wednesday 11 11 22 22 33 33 diff --git a/tests/queries/0_stateless/01811_datename.sql b/tests/queries/0_stateless/01811_datename.sql index b757d9ae018..fe9f5d20238 100644 --- a/tests/queries/0_stateless/01811_datename.sql +++ b/tests/queries/0_stateless/01811_datename.sql @@ -1,44 +1,51 @@ WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('year', date_value), dateName('year', date_time_value), dateName('year', date_time_64_value); +SELECT dateName('year', date_value), dateName('year', date_32_value), dateName('year', date_time_value), dateName('year', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('quarter', date_value), dateName('quarter', date_time_value), dateName('quarter', date_time_64_value); +SELECT dateName('quarter', date_value), dateName('quarter', date_32_value), dateName('quarter', date_time_value), dateName('quarter', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('month', date_value), dateName('month', date_time_value), dateName('month', date_time_64_value); +SELECT dateName('month', date_value), dateName('month', date_32_value), dateName('month', date_time_value), dateName('month', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('dayofyear', date_value), dateName('dayofyear', date_time_value), dateName('dayofyear', date_time_64_value); +SELECT dateName('dayofyear', date_value), dateName('dayofyear', date_32_value), dateName('dayofyear', date_time_value), dateName('dayofyear', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('day', date_value), dateName('day', date_time_value), dateName('day', date_time_64_value); +SELECT dateName('day', date_value), dateName('day', date_32_value), dateName('day', date_time_value), dateName('day', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('week', date_value), dateName('week', date_time_value), dateName('week', date_time_64_value); +SELECT dateName('week', date_value), dateName('week', date_32_value), dateName('week', date_time_value), dateName('week', date_time_64_value); WITH toDate('2021-04-14') AS date_value, + toDate32('2021-04-14') AS date_32_value, toDateTime('2021-04-14 11:22:33') AS date_time_value, toDateTime64('2021-04-14 11:22:33', 3) AS date_time_64_value -SELECT dateName('weekday', date_value), dateName('weekday', date_time_value), dateName('weekday', date_time_64_value); +SELECT dateName('weekday', date_value), dateName('weekday', date_32_value), dateName('weekday', date_time_value), dateName('weekday', date_time_64_value); WITH toDateTime('2021-04-14 11:22:33') AS date_time_value, From c399b021cf9f935cb140a2a285010011cc0f5e76 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 20 Oct 2022 14:09:31 +0200 Subject: [PATCH 196/252] Fix sanitizer reports in integration tests Before they was ignored because first there was a check for a sign of sanitizer (==================), but it was done by clickhouse-server.log, while sanitizer write to stderr.log. Signed-off-by: Azat Khuzhin --- tests/integration/helpers/cluster.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index c987ca292c1..666833013c8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2678,7 +2678,9 @@ class ClickHouseCluster: # Check server logs for Fatal messages and sanitizer failures. # NOTE: we cannot do this via docker since in case of Fatal message container may already die. for name, instance in self.instances.items(): - if instance.contains_in_log(SANITIZER_SIGN, from_host=True): + if instance.contains_in_log( + SANITIZER_SIGN, from_host=True, filename="stderr.log" + ): sanitizer_assert_instance = instance.grep_in_log( SANITIZER_SIGN, from_host=True, filename="stderr.log" ) From 89ff3d4731d651e92793d8964c18ee4e1615bd05 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 20 Oct 2022 16:54:52 +0300 Subject: [PATCH 197/252] Update test.py --- tests/integration/test_storage_nats/test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py index 63dde8922a6..77db3008524 100644 --- a/tests/integration/test_storage_nats/test.py +++ b/tests/integration/test_storage_nats/test.py @@ -1,3 +1,10 @@ +import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/39185 + +pytestmark = pytest.mark.skip + import json import os.path as p import random @@ -9,7 +16,6 @@ from random import randrange import math import asyncio -import pytest from google.protobuf.internal.encoder import _VarintBytes from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, check_nats_is_available, nats_connect_ssl From 4d703b792c122bb32a4291694b7382c103c6073d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 20 Oct 2022 17:13:18 +0200 Subject: [PATCH 198/252] Attempt to fix abort from parallel parsing (#42496) --- src/Common/ThreadPool.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 76ada9e0d75..b3ab20ae592 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -178,7 +178,11 @@ public: func = std::forward(func), args = std::make_tuple(std::forward(args)...)]() mutable /// mutable is needed to destroy capture { - SCOPE_EXIT(state->event.set()); + SCOPE_EXIT( + { + state->finished = true; + state->event.set(); + }); state->thread_id = std::this_thread::get_id(); @@ -213,6 +217,17 @@ public: ~ThreadFromGlobalPoolImpl() { + /// The problem is that the our ThreadFromGlobalPool can be actually finished + /// before we try to join the thread or check whether it is joinable or not. + /// In some places we have code like: + /// if (thread->joinable()) + /// thread->join(); + /// Where join() won't be executed in case when we call it + /// from the same std::thread and it will end to std::abort(). + /// So we just do nothing in this case + if (state->finished) + return; + if (initialized()) abort(); } @@ -252,6 +267,9 @@ protected: /// The state used in this object and inside the thread job. Poco::Event event; + + /// To allow joining to the same std::thread after finishing + std::atomic finished{false}; }; std::shared_ptr state; From 9a73eb2fbbcdc199458f09dcc3e7c52e6b4f8071 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 20 Oct 2022 17:25:28 +0200 Subject: [PATCH 199/252] Add functions to generate random values according to the distribution (#42411) --- src/Functions/randDistribution.cpp | 472 ++++++++++++++++++ .../0_stateless/02462_distributions.reference | 12 + .../0_stateless/02462_distributions.sql | 24 + 3 files changed, 508 insertions(+) create mode 100644 src/Functions/randDistribution.cpp create mode 100644 tests/queries/0_stateless/02462_distributions.reference create mode 100644 tests/queries/0_stateless/02462_distributions.sql diff --git a/src/Functions/randDistribution.cpp b/src/Functions/randDistribution.cpp new file mode 100644 index 00000000000..94dad4fdc89 --- /dev/null +++ b/src/Functions/randDistribution.cpp @@ -0,0 +1,472 @@ +#include +#include +#include +#include "Common/Exception.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; +} + +namespace +{ +struct UniformDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randUniform"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 min, Float64 max, ColumnFloat64::Container & container) + { + auto distribution = std::uniform_real_distribution<>(min, max); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct NormalDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randNormal"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 mean, Float64 variance, ColumnFloat64::Container & container) + { + auto distribution = std::normal_distribution<>(mean, variance); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct LogNormalDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randLogNormal"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 mean, Float64 variance, ColumnFloat64::Container & container) + { + auto distribution = std::lognormal_distribution<>(mean, variance); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct ExponentialDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randExponential"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 lambda, ColumnFloat64::Container & container) + { + auto distribution = std::exponential_distribution<>(lambda); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct ChiSquaredDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randChiSquared"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 degree_of_freedom, ColumnFloat64::Container & container) + { + auto distribution = std::chi_squared_distribution<>(degree_of_freedom); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct StudentTDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randStudentT"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 degree_of_freedom, ColumnFloat64::Container & container) + { + auto distribution = std::student_t_distribution<>(degree_of_freedom); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct FisherFDistribution +{ + using ReturnType = DataTypeFloat64; + static constexpr const char * getName() { return "randFisherF"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(Float64 d1, Float64 d2, ColumnFloat64::Container & container) + { + auto distribution = std::fisher_f_distribution<>(d1, d2); + for (auto & elem : container) + elem = distribution(thread_local_rng); + } +}; + +struct BernoulliDistribution +{ + using ReturnType = DataTypeUInt8; + static constexpr const char * getName() { return "randBernoulli"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(Float64 p, ColumnUInt8::Container & container) + { + if (p < 0.0f || p > 1.0f) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of function {} should be inside [0, 1] because it is a probability", getName()); + + auto distribution = std::bernoulli_distribution(p); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +struct BinomialDistribution +{ + using ReturnType = DataTypeUInt64; + static constexpr const char * getName() { return "randBinomial"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(UInt64 t, Float64 p, ColumnUInt64::Container & container) + { + if (p < 0.0f || p > 1.0f) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of function {} should be inside [0, 1] because it is a probability", getName()); + + auto distribution = std::binomial_distribution(t, p); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +struct NegativeBinomialDistribution +{ + using ReturnType = DataTypeUInt64; + static constexpr const char * getName() { return "randNegativeBinomial"; } + static constexpr size_t getNumberOfArguments() { return 2; } + + static void generate(UInt64 t, Float64 p, ColumnUInt64::Container & container) + { + if (p < 0.0f || p > 1.0f) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of function {} should be inside [0, 1] because it is a probability", getName()); + + auto distribution = std::negative_binomial_distribution(t, p); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +struct PoissonDistribution +{ + using ReturnType = DataTypeUInt64; + static constexpr const char * getName() { return "randPoisson"; } + static constexpr size_t getNumberOfArguments() { return 1; } + + static void generate(UInt64 n, ColumnUInt64::Container & container) + { + auto distribution = std::poisson_distribution(n); + for (auto & elem : container) + elem = static_cast(distribution(thread_local_rng)); + } +}; + +} + +/** Function which will generate values according to the specified distribution + * Accepts only constant arguments + * Similar to the functions rand and rand64 an additional 'tag' argument could be added to the + * end of arguments list (this argument will be ignored) which will guarantee that functions are not sticked together + * during optimisations. + * Example: SELECT randNormal(0, 1, 1), randNormal(0, 1, 2) FROM numbers(10) + * This query will return two different columns + */ +template +class FunctionRandomDistribution : public IFunction +{ +private: + + template + ResultType getParameterFromConstColumn(size_t parameter_number, const ColumnsWithTypeAndName & arguments) const + { + if (parameter_number >= arguments.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Parameter number ({}) is greater than the size of arguments ({}). This is a bug", parameter_number, arguments.size()); + + const IColumn * col = arguments[parameter_number].column.get(); + + if (!isColumnConst(*col)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Parameter number {} of function must be constant.", parameter_number, getName()); + + auto parameter = applyVisitor(FieldVisitorConvertToNumber(), assert_cast(*col).getField()); + + if (isNaN(parameter) || !std::isfinite(parameter)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter number {} of function {} cannot be NaN of infinite", parameter_number, getName()); + + return parameter; + } + +public: + static FunctionPtr create(ContextPtr) + { + return std::make_shared>(); + } + + static constexpr auto name = Distribution::getName(); + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return Distribution::getNumberOfArguments(); } + bool isVariadic() const override { return true; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + auto desired = Distribution::getNumberOfArguments(); + if (arguments.size() != desired && arguments.size() != desired + 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}. Should be {} or {}", getName(), desired, desired + 1); + + for (size_t i = 0; i < Distribution::getNumberOfArguments(); ++i) + { + const auto & type = arguments[i]; + WhichDataType which(type); + if (!which.isFloat() && !which.isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}, expected Float64 or integer", type->getName(), getName()); + } + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override + { + if constexpr (std::is_same_v) + { + auto res_column = ColumnUInt8::create(input_rows_count); + auto & res_data = res_column->getData(); + Distribution::generate(getParameterFromConstColumn(0, arguments), res_data); + return res_column; + } + else if constexpr (std::is_same_v || std::is_same_v) + { + auto res_column = ColumnUInt64::create(input_rows_count); + auto & res_data = res_column->getData(); + Distribution::generate(getParameterFromConstColumn(0, arguments), getParameterFromConstColumn(1, arguments), res_data); + return res_column; + } + else if constexpr (std::is_same_v) + { + auto res_column = ColumnUInt64::create(input_rows_count); + auto & res_data = res_column->getData(); + Distribution::generate(getParameterFromConstColumn(0, arguments), res_data); + return res_column; + } + else + { + auto res_column = ColumnFloat64::create(input_rows_count); + auto & res_data = res_column->getData(); + if constexpr (Distribution::getNumberOfArguments() == 1) + { + Distribution::generate(getParameterFromConstColumn(0, arguments), res_data); + } + else if constexpr (Distribution::getNumberOfArguments() == 2) + { + Distribution::generate(getParameterFromConstColumn(0, arguments), getParameterFromConstColumn(1, arguments), res_data); + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than two argument specified for function {}", getName()); + } + + return res_column; + } + } +}; + + +REGISTER_FUNCTION(Distribution) +{ + factory.registerFunction>( + { + R"( +Returns a random number from the uniform distribution in the specified range. +Accepts two parameters - minimum bound and maximum bound. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randUniform(0, 1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + factory.registerFunction>( + { + R"( +Returns a random number from the normal distribution. +Accepts two parameters - mean and variance. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randNormal(0, 5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the lognormal distribution (a distribution of a random variable whose logarithm is normally distributed). +Accepts two parameters - mean and variance. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randLogNormal(0, 5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the exponential distribution. +Accepts one parameter. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randExponential(0, 5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the chi-squared distribution (a distribution of a sum of the squares of k independent standard normal random variables). +Accepts one parameter - degree of freedom. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randChiSquared(5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + factory.registerFunction>( + { + R"( +Returns a random number from the t-distribution. +Accepts one parameter - degree of freedom. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randStudentT(5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the f-distribution. +The F-distribution is the distribution of X = (S1 / d1) / (S2 / d2) where d1 and d2 are degrees of freedom. +Accepts two parameters - degrees of freedom. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randFisherF(5) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the Bernoulli distribution. +Accepts two parameters - probability of success. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randBernoulli(0.1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the binomial distribution. +Accepts two parameters - number of experiments and probability of success in each experiment. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randBinomial(10, 0.1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the negative binomial distribution. +Accepts two parameters - number of experiments and probability of success in each experiment. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randNegativeBinomial(10, 0.1) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); + + + factory.registerFunction>( + { + R"( +Returns a random number from the poisson distribution. +Accepts two parameters - the mean number of occurrences. + +Typical usage: +[example:typical] +)", + Documentation::Examples{ + {"typical", "SELECT randPoisson(3) FROM numbers(100000);"}}, + Documentation::Categories{"Distribution"} + }); +} + +} diff --git a/tests/queries/0_stateless/02462_distributions.reference b/tests/queries/0_stateless/02462_distributions.reference new file mode 100644 index 00000000000..56b04bcb856 --- /dev/null +++ b/tests/queries/0_stateless/02462_distributions.reference @@ -0,0 +1,12 @@ +Ok +Ok +Ok +Ok +Ok +Ok +Ok +0 +1 +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/02462_distributions.sql b/tests/queries/0_stateless/02462_distributions.sql new file mode 100644 index 00000000000..b45dc897f2a --- /dev/null +++ b/tests/queries/0_stateless/02462_distributions.sql @@ -0,0 +1,24 @@ +# Values should be between 0 and 1 +SELECT DISTINCT if (a >= toFloat64(0) AND a <= toFloat64(1), 'Ok', 'Fail') FROM (SELECT randUniform(0, 1) AS a FROM numbers(100000)); +# Mean should be around 0 +SELECT DISTINCT if (m >= toFloat64(-0.2) AND m <= toFloat64(0.2), 'Ok', 'Fail') FROM (SELECT avg(a) as m FROM (SELECT randNormal(0, 5) AS a FROM numbers(100000))); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randLogNormal(0, 5) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randExponential(15) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randChiSquared(3) AS a FROM numbers(100000)); +# Mean should be around 0 +SELECT DISTINCT if (m > toFloat64(-0.2) AND m < toFloat64(0.2), 'Ok', 'Fail') FROM (SELECT avg(a) as m FROM (SELECT randStudentT(5) AS a FROM numbers(100000))); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randFisherF(3, 4) AS a FROM numbers(100000)); +# There should be only 0s and 1s +SELECT a FROM (SELECT DISTINCT randBernoulli(0.5) AS a FROM numbers(100000)) ORDER BY a; +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randBinomial(3, 0.5) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randNegativeBinomial(3, 0.5) AS a FROM numbers(100000)); +# Values should be >= 0 +SELECT DISTINCT if (a >= toFloat64(0), 'Ok', 'Fail') FROM (SELECT randPoisson(44) AS a FROM numbers(100000)); +# No errors +SELECT randUniform(1, 2, 1), randNormal(0, 1, 'abacaba'), randLogNormal(0, 10, 'b'), randChiSquared(1, 1), randStudentT(7, '8'), randFisherF(23, 42, 100), randBernoulli(0.5, 2), randBinomial(3, 0.5, 1), randNegativeBinomial(3, 0.5, 2), randPoisson(44, 44) FORMAT Null; From d09a5e8fd7194cbdd23919570d05fa73b87f953c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 20 Oct 2022 21:46:26 +0200 Subject: [PATCH 200/252] Revert "Attempt to fix abort from parallel parsing (#42496)" This reverts commit 4d703b792c122bb32a4291694b7382c103c6073d. --- src/Common/ThreadPool.h | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index b3ab20ae592..76ada9e0d75 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -178,11 +178,7 @@ public: func = std::forward(func), args = std::make_tuple(std::forward(args)...)]() mutable /// mutable is needed to destroy capture { - SCOPE_EXIT( - { - state->finished = true; - state->event.set(); - }); + SCOPE_EXIT(state->event.set()); state->thread_id = std::this_thread::get_id(); @@ -217,17 +213,6 @@ public: ~ThreadFromGlobalPoolImpl() { - /// The problem is that the our ThreadFromGlobalPool can be actually finished - /// before we try to join the thread or check whether it is joinable or not. - /// In some places we have code like: - /// if (thread->joinable()) - /// thread->join(); - /// Where join() won't be executed in case when we call it - /// from the same std::thread and it will end to std::abort(). - /// So we just do nothing in this case - if (state->finished) - return; - if (initialized()) abort(); } @@ -267,9 +252,6 @@ protected: /// The state used in this object and inside the thread job. Poco::Event event; - - /// To allow joining to the same std::thread after finishing - std::atomic finished{false}; }; std::shared_ptr state; From ba5c3d122f624b7ad8054f49fc5668b4985b8f63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Oct 2022 23:59:43 +0200 Subject: [PATCH 201/252] Remove old test --- ...el_processing_on_replicas_part_1.reference | 110 ------------------ ..._parallel_processing_on_replicas_part_1.sh | 103 ---------------- 2 files changed, 213 deletions(-) delete mode 100644 tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference delete mode 100755 tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference deleted file mode 100644 index 2675904dea0..00000000000 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference +++ /dev/null @@ -1,110 +0,0 @@ -Testing 00001_count_hits.sql ----> Ok! ✅ -Testing 00002_count_visits.sql ----> Ok! ✅ -Testing 00004_top_counters.sql ----> Ok! ✅ -Testing 00005_filtering.sql ----> Ok! ✅ -Testing 00006_agregates.sql ----> Ok! ✅ -Testing 00007_uniq.sql ----> Ok! ✅ -Testing 00008_uniq.sql ----> Ok! ✅ -Testing 00009_uniq_distributed.sql ----> Ok! ✅ -Testing 00010_quantiles_segfault.sql ----> Ok! ✅ -Testing 00011_sorting.sql ----> Ok! ✅ -Testing 00012_sorting_distributed.sql ----> Ok! ✅ -Skipping 00013_sorting_of_nested.sql -Testing 00014_filtering_arrays.sql ----> Ok! ✅ -Testing 00015_totals_and_no_aggregate_functions.sql ----> Ok! ✅ -Testing 00016_any_if_distributed_cond_always_false.sql ----> Ok! ✅ -Testing 00017_aggregation_uninitialized_memory.sql ----> Ok! ✅ -Testing 00020_distinct_order_by_distributed.sql ----> Ok! ✅ -Testing 00021_1_select_with_in.sql ----> Ok! ✅ -Testing 00021_2_select_with_in.sql ----> Ok! ✅ -Testing 00021_3_select_with_in.sql ----> Ok! ✅ -Testing 00022_merge_prewhere.sql ----> Ok! ✅ -Testing 00023_totals_limit.sql ----> Ok! ✅ -Testing 00024_random_counters.sql ----> Ok! ✅ -Testing 00030_array_enumerate_uniq.sql ----> Ok! ✅ -Testing 00031_array_enumerate_uniq.sql ----> Ok! ✅ -Testing 00032_aggregate_key64.sql ----> Ok! ✅ -Testing 00033_aggregate_key_string.sql ----> Ok! ✅ -Testing 00034_aggregate_key_fixed_string.sql ----> Ok! ✅ -Testing 00035_aggregate_keys128.sql ----> Ok! ✅ -Testing 00036_aggregate_hashed.sql ----> Ok! ✅ -Testing 00037_uniq_state_merge1.sql ----> Ok! ✅ -Testing 00038_uniq_state_merge2.sql ----> Ok! ✅ -Testing 00039_primary_key.sql ----> Ok! ✅ -Testing 00040_aggregating_materialized_view.sql ----> Ok! ✅ -Testing 00041_aggregating_materialized_view.sql ----> Ok! ✅ -Testing 00042_any_left_join.sql ----> Ok! ✅ -Testing 00043_any_left_join.sql ----> Ok! ✅ -Testing 00044_any_left_join_string.sql ----> Ok! ✅ -Testing 00045_uniq_upto.sql ----> Ok! ✅ -Testing 00046_uniq_upto_distributed.sql ----> Ok! ✅ -Testing 00047_bar.sql ----> Ok! ✅ -Testing 00048_min_max.sql ----> Ok! ✅ -Testing 00049_max_string_if.sql ----> Ok! ✅ -Testing 00050_min_max.sql ----> Ok! ✅ -Testing 00051_min_max_array.sql ----> Ok! ✅ -Testing 00052_group_by_in.sql ----> Ok! ✅ -Testing 00053_replicate_segfault.sql ----> Ok! ✅ -Testing 00054_merge_tree_partitions.sql ----> Ok! ✅ -Testing 00055_index_and_not.sql ----> Ok! ✅ -Testing 00056_view.sql ----> Ok! ✅ -Testing 00059_merge_sorting_empty_array_joined.sql ----> Ok! ✅ -Testing 00060_move_to_prewhere_and_sets.sql ----> Ok! ✅ -Skipping 00061_storage_buffer.sql -Testing 00062_loyalty.sql ----> Ok! ✅ -Testing 00063_loyalty_joins.sql ----> Ok! ✅ -Testing 00065_loyalty_with_storage_join.sql ----> Ok! ✅ -Testing 00066_sorting_distributed_many_replicas.sql ----> Ok! ✅ -Testing 00067_union_all.sql ----> Ok! ✅ -Testing 00068_subquery_in_prewhere.sql ----> Ok! ✅ -Testing 00069_duplicate_aggregation_keys.sql ----> Ok! ✅ -Testing 00071_merge_tree_optimize_aio.sql ----> Ok! ✅ -Testing 00072_compare_date_and_string_index.sql ----> Ok! ✅ -Testing 00073_uniq_array.sql ----> Ok! ✅ -Testing 00074_full_join.sql ----> Ok! ✅ -Testing 00075_left_array_join.sql ----> Ok! ✅ -Testing 00076_system_columns_bytes.sql ----> Ok! ✅ -Testing 00077_log_tinylog_stripelog.sql ----> Ok! ✅ -Testing 00078_group_by_arrays.sql ----> Ok! ✅ -Testing 00079_array_join_not_used_joined_column.sql ----> Ok! ✅ -Testing 00080_array_join_and_union.sql ----> Ok! ✅ -Testing 00081_group_by_without_key_and_totals.sql ----> Ok! ✅ -Testing 00082_quantiles.sql ----> Ok! ✅ -Testing 00083_array_filter.sql ----> Ok! ✅ -Testing 00084_external_aggregation.sql ----> Ok! ✅ -Testing 00085_monotonic_evaluation_segfault.sql ----> Ok! ✅ -Testing 00086_array_reduce.sql ----> Ok! ✅ -Testing 00087_where_0.sql ----> Ok! ✅ -Testing 00088_global_in_one_shard_and_rows_before_limit.sql ----> Ok! ✅ -Testing 00089_position_functions_with_non_constant_arg.sql ----> Ok! ✅ -Testing 00091_prewhere_two_conditions.sql ----> Ok! ✅ -Testing 00093_prewhere_array_join.sql ----> Ok! ✅ -Testing 00094_order_by_array_join_limit.sql ----> Ok! ✅ -Skipping 00095_hyperscan_profiler.sql -Testing 00139_like.sql ----> Ok! ✅ -Skipping 00140_rename.sql -Testing 00141_transform.sql ----> Ok! ✅ -Testing 00142_system_columns.sql ----> Ok! ✅ -Testing 00143_transform_non_const_default.sql ----> Ok! ✅ -Testing 00144_functions_of_aggregation_states.sql ----> Ok! ✅ -Testing 00145_aggregate_functions_statistics.sql ----> Ok! ✅ -Testing 00146_aggregate_function_uniq.sql ----> Ok! ✅ -Testing 00147_global_in_aggregate_function.sql ----> Ok! ✅ -Testing 00148_monotonic_functions_and_index.sql ----> Ok! ✅ -Testing 00149_quantiles_timing_distributed.sql ----> Ok! ✅ -Testing 00150_quantiles_timing_precision.sql ----> Ok! ✅ -Testing 00151_order_by_read_in_order.sql ----> Ok! ✅ -Skipping 00151_replace_partition_with_different_granularity.sql -Skipping 00152_insert_different_granularity.sql -Testing 00153_aggregate_arena_race.sql ----> Ok! ✅ -Skipping 00154_avro.sql -Testing 00156_max_execution_speed_sample_merge.sql ----> Ok! ✅ -Skipping 00157_cache_dictionary.sql -Skipping 00158_cache_dictionary_has.sql -Testing 00160_decode_xml_component.sql ----> Ok! ✅ -Testing 00162_mmap_compression_none.sql ----> Ok! ✅ -Testing 00164_quantileBfloat16.sql ----> Ok! ✅ -Testing 00165_jit_aggregate_functions.sql ----> Ok! ✅ -Skipping 00166_explain_estimate.sql -Testing 00167_read_bytes_from_fs.sql ----> Ok! ✅ -Total failed tests: diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh deleted file mode 100755 index a0e2442ae9e..00000000000 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-random-settings - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# set -e - -# All replicas are localhost, disable `prefer_localhost_replica` option to test network interface -# Currently this feature could not work with hedged requests -# Enabling `enable_sample_offset_parallel_processing` feature could lead to intersecting marks, so some of them would be thrown away and it will lead to incorrect result of SELECT query -SETTINGS="--max_parallel_replicas=3 --use_hedged_requests=false --allow_experimental_parallel_reading_from_replicas=true" - -# Prepare tables -$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' - drop table if exists test.dist_hits SYNC; - drop table if exists test.dist_visits SYNC; - - create table test.dist_hits as test.hits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, hits, rand()); - create table test.dist_visits as test.visits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, visits, rand()); -'''; - -FAILED=() - -# PreviouslyFailed=( -# ) - -SkipList=( - "00013_sorting_of_nested.sql" # It contains FINAL, which is not allowed together with parallel reading - - "00061_storage_buffer.sql" - "00097_constexpr_in_index.sql" - "00095_hyperscan_profiler.sql" # too long in debug (there is a --no-debug tag inside a test) - - "00140_rename.sql" # Multiple renames are not allowed with DatabaseReplicated and tags are not forwarded through this test - - "00154_avro.sql" # Plain select * with limit with Distributed table is not deterministic - "00151_replace_partition_with_different_granularity.sql" # Replace partition from Distributed is not allowed - "00152_insert_different_granularity.sql" # The same as above - - "00157_cache_dictionary.sql" # Too long in debug mode, but result is correct - "00158_cache_dictionary_has.sql" # The same as above - - "00166_explain_estimate.sql" # Distributed table returns nothing -) - -# for TESTPATH in "${PreviouslyFailed[@]}" -for TESTPATH in "$CURDIR"/*.sql; -do - TESTNAME=$(basename $TESTPATH) - NUM=$(echo "${TESTNAME}" | grep -o -P '^\d+' | sed 's/^0*//') - if [[ "${NUM}" -ge 168 ]]; then - continue - fi - - if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then - echo "Skipping $TESTNAME " - continue - fi - - echo -n "Testing $TESTNAME ----> " - - # prepare test - NEW_TESTNAME="/tmp/dist_$TESTNAME" - # Added g to sed command to replace all tables, not the first - cat $TESTPATH | sed -e 's/test.hits/test.dist_hits/g' | sed -e 's/test.visits/test.dist_visits/g' > $NEW_TESTNAME - - TESTNAME_RESULT="/tmp/result_$TESTNAME" - NEW_TESTNAME_RESULT="/tmp/result_dist_$TESTNAME" - - $CLICKHOUSE_CLIENT $SETTINGS -nm < $TESTPATH > $TESTNAME_RESULT - $CLICKHOUSE_CLIENT $SETTINGS -nm < $NEW_TESTNAME > $NEW_TESTNAME_RESULT - - expected=$(cat $TESTNAME_RESULT | md5sum) - actual=$(cat $NEW_TESTNAME_RESULT | md5sum) - - if [[ "$expected" != "$actual" ]]; then - FAILED+=("$TESTNAME") - echo "Failed! âŒ" - echo "Plain:" - cat $TESTNAME_RESULT - echo "Distributed:" - cat $NEW_TESTNAME_RESULT - else - echo "Ok! ✅" - fi -done - - -echo "Total failed tests: " -# Iterate the loop to read and print each array element -for value in "${FAILED[@]}" -do - echo "🔺 $value" -done - -# Drop tables - -$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' - drop table if exists test.dist_hits SYNC; - drop table if exists test.dist_visits SYNC; -'''; From 1a6fa0758763165bd3e18ff209d206b9aab56d87 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 21 Oct 2022 00:08:17 +0200 Subject: [PATCH 202/252] Fix clang-tidy --- src/Functions/timeSlots.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index 949ca7bc0e4..e986e32d76f 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; + extern const int BAD_ARGUMENTS; } namespace @@ -41,6 +42,9 @@ struct TimeSlotsImpl const PaddedPODArray & starts, const PaddedPODArray & durations, UInt32 time_slot_size, PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + size_t size = starts.size(); result_offsets.resize(size); @@ -63,6 +67,9 @@ struct TimeSlotsImpl const PaddedPODArray & starts, UInt32 duration, UInt32 time_slot_size, PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + size_t size = starts.size(); result_offsets.resize(size); @@ -85,6 +92,9 @@ struct TimeSlotsImpl UInt32 start, const PaddedPODArray & durations, UInt32 time_slot_size, PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + size_t size = durations.size(); result_offsets.resize(size); @@ -125,6 +135,9 @@ struct TimeSlotsImpl ColumnArray::Offset current_offset = 0; time_slot_size = time_slot_size.value * ts_multiplier; + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + for (size_t i = 0; i < size; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) @@ -155,6 +168,9 @@ struct TimeSlotsImpl ColumnArray::Offset current_offset = 0; duration = duration * dur_multiplier; time_slot_size = time_slot_size.value * ts_multiplier; + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + for (size_t i = 0; i < size; ++i) { for (DateTime64 value = (starts[i] * dt_multiplier) / time_slot_size, end = (starts[i] * dt_multiplier + duration) / time_slot_size; value <= end; value += 1) @@ -185,6 +201,9 @@ struct TimeSlotsImpl ColumnArray::Offset current_offset = 0; start = dt_multiplier * start; time_slot_size = time_slot_size.value * ts_multiplier; + if (time_slot_size == 0) + throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + for (size_t i = 0; i < size; ++i) { for (DateTime64 value = start / time_slot_size, end = (start + durations[i] * dur_multiplier) / time_slot_size; value <= end; value += 1) From b720030ac645aaef4c4850471d0449b34b82b842 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 21 Oct 2022 00:13:30 +0200 Subject: [PATCH 203/252] Remove outdated documentation --- docs/en/development/architecture.md | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index c13b2519b84..fe644c43889 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -49,27 +49,13 @@ When we calculate some function over columns in a block, we add another column w Blocks are created for every processed chunk of data. Note that for the same type of calculation, the column names and types remain the same for different blocks, and only column data changes. It is better to split block data from the block header because small block sizes have a high overhead of temporary strings for copying shared_ptrs and column names. -## Block Streams {#block-streams} +## Processors -Block streams are for processing data. We use streams of blocks to read data from somewhere, perform data transformations, or write data to somewhere. `IBlockInputStream` has the `read` method to fetch the next block while available. `IBlockOutputStream` has the `write` method to push the block somewhere. - -Streams are responsible for: - -1. Reading or writing to a table. The table just returns a stream for reading or writing blocks. -2. Implementing data formats. For example, if you want to output data to a terminal in `Pretty` format, you create a block output stream where you push blocks, and it formats them. -3. Performing data transformations. Let’s say you have `IBlockInputStream` and want to create a filtered stream. You create `FilterBlockInputStream` and initialize it with your stream. Then when you pull a block from `FilterBlockInputStream`, it pulls a block from your stream, filters it, and returns the filtered block to you. Query execution pipelines are represented this way. - -There are more sophisticated transformations. For example, when you pull from `AggregatingBlockInputStream`, it reads all data from its source, aggregates it, and then returns a stream of aggregated data for you. Another example: `UnionBlockInputStream` accepts many input sources in the constructor and also a number of threads. It launches multiple threads and reads from multiple sources in parallel. - -> Block streams use the “pull†approach to control flow: when you pull a block from the first stream, it consequently pulls the required blocks from nested streams, and the entire execution pipeline will work. Neither “pull†nor “push†is the best solution, because control flow is implicit, and that limits the implementation of various features like simultaneous execution of multiple queries (merging many pipelines together). This limitation could be overcome with coroutines or just running extra threads that wait for each other. We may have more possibilities if we make control flow explicit: if we locate the logic for passing data from one calculation unit to another outside of those calculation units. Read this [article](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) for more thoughts. - -We should note that the query execution pipeline creates temporary data at each step. We try to keep block size small enough so that temporary data fits in the CPU cache. With that assumption, writing and reading temporary data is almost free in comparison with other calculations. We could consider an alternative, which is to fuse many operations in the pipeline together. It could make the pipeline as short as possible and remove much of the temporary data, which could be an advantage, but it also has drawbacks. For example, a split pipeline makes it easy to implement caching intermediate data, stealing intermediate data from similar queries running at the same time, and merging pipelines for similar queries. +See the description at [https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h](https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/IProcessor.h). ## Formats {#formats} -Data formats are implemented with block streams. There are “presentational†formats only suitable for the output of data to the client, such as `Pretty` format, which provides only `IBlockOutputStream`. And there are input/output formats, such as `TabSeparated` or `JSONEachRow`. - -There are also row streams: `IRowInputStream` and `IRowOutputStream`. They allow you to pull/push data by individual rows, not by blocks. And they are only needed to simplify the implementation of row-oriented formats. The wrappers `BlockInputStreamFromRowInputStream` and `BlockOutputStreamFromRowOutputStream` allow you to convert row-oriented streams to regular block-oriented streams. +Data formats are implemented with processors. ## I/O {#io} From d7d0794d2f57e27a2b3eef3a3889ef272ee95338 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 20 Oct 2022 23:14:19 +0000 Subject: [PATCH 204/252] Better logs in clickhouse-disks --- programs/disks/DisksApp.cpp | 31 +++++++++------- tests/integration/test_disks_app_func/test.py | 36 +++++++++---------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 749ccb3e503..91472a8df33 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -57,7 +57,7 @@ void DisksApp::addOptions( ("config-file,C", po::value(), "Set config file") ("disk", po::value(), "Set disk name") ("command_name", po::value(), "Name for command to do") - ("send-logs", "Send logs") + ("save-logs", "Save logs to a file") ("log-level", po::value(), "Logging level") ; @@ -82,10 +82,10 @@ void DisksApp::processOptions() config().setString("config-file", options["config-file"].as()); if (options.count("disk")) config().setString("disk", options["disk"].as()); - if (options.count("send-logs")) - config().setBool("send-logs", true); + if (options.count("save-logs")) + config().setBool("save-logs", true); if (options.count("log-level")) - Poco::Logger::root().setLevel(options["log-level"].as()); + config().setString("log-level", options["log-level"].as()); } void DisksApp::init(std::vector & common_arguments) @@ -149,15 +149,6 @@ void DisksApp::parseAndCheckOptions( int DisksApp::main(const std::vector & /*args*/) { - if (config().has("send-logs")) - { - auto log_level = config().getString("log-level", "trace"); - Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level)); - - auto log_path = config().getString("logger.clickhouse-disks", "/var/log/clickhouse-server/clickhouse-disks.log"); - Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::FileChannel(log_path))); - } - if (config().has("config-file") || fs::exists(getDefaultConfigFileName())) { String config_path = config().getString("config-file", getDefaultConfigFileName()); @@ -171,6 +162,20 @@ int DisksApp::main(const std::vector & /*args*/) throw Exception(ErrorCodes::BAD_ARGUMENTS, "No config-file specifiged"); } + if (config().has("save-logs")) + { + auto log_level = config().getString("log-level", "trace"); + Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level)); + + auto log_path = config().getString("logger.clickhouse-disks", "/var/log/clickhouse-server/clickhouse-disks.log"); + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::FileChannel(log_path))); + } + else + { + auto log_level = config().getString("log-level", "none"); + Poco::Logger::root().setLevel(Poco::Logger::parseLevel(log_level)); + } + registerDisks(); registerFormats(); diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index d87f387e122..de9b23abd5e 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -37,7 +37,7 @@ def test_disks_app_func_ld(started_cluster): source = cluster.instances["disks_app_test"] out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "list-disks"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "list-disks"] ) disks = out.split("\n") @@ -51,7 +51,7 @@ def test_disks_app_func_ls(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) files = out.split("\n") @@ -62,7 +62,7 @@ def test_disks_app_func_ls(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "list", @@ -89,7 +89,7 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "write", @@ -114,7 +114,7 @@ def test_disks_app_func_cp(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path1" in out @@ -123,7 +123,7 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test2", "remove", @@ -135,7 +135,7 @@ def test_disks_app_func_cp(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "remove", @@ -146,13 +146,13 @@ def test_disks_app_func_cp(started_cluster): # alesapin: Why we need list one more time? # kssenii: it is an assertion that the file is indeed deleted out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path1" not in out out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) assert "path1" not in out @@ -174,7 +174,7 @@ def test_disks_app_func_ln(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "list", "data/default/"] + ["/usr/bin/clickhouse", "disks", "--save-logs", "list", "data/default/"] ) files = out.split("\n") @@ -196,7 +196,7 @@ def test_disks_app_func_rm(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test2", "write", @@ -207,7 +207,7 @@ def test_disks_app_func_rm(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path3" in out @@ -216,7 +216,7 @@ def test_disks_app_func_rm(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test2", "remove", @@ -225,7 +225,7 @@ def test_disks_app_func_rm(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test2", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test2", "list", "."] ) assert "path3" not in out @@ -237,7 +237,7 @@ def test_disks_app_func_mv(started_cluster): init_data(source) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) files = out.split("\n") @@ -257,7 +257,7 @@ def test_disks_app_func_mv(started_cluster): ) out = source.exec_in_container( - ["/usr/bin/clickhouse", "disks", "--send-logs", "--disk", "test1", "list", "."] + ["/usr/bin/clickhouse", "disks", "--save-logs", "--disk", "test1", "list", "."] ) files = out.split("\n") @@ -277,7 +277,7 @@ def test_disks_app_func_read_write(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "write", @@ -291,7 +291,7 @@ def test_disks_app_func_read_write(started_cluster): [ "/usr/bin/clickhouse", "disks", - "--send-logs", + "--save-logs", "--disk", "test1", "read", From bd33c4f1805a25866702cfd14b6f1a5cbf8059a1 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 21 Oct 2022 01:25:51 +0000 Subject: [PATCH 205/252] add lib_fuzzer and lib_fuzzer_no_main to llvm-project build --- contrib/llvm-project-cmake/CMakeLists.txt | 36 +++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 6a73ae0f0c6..57995319cdd 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,3 +1,39 @@ +set(COMPILER_RT_FUZZER_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib/fuzzer") + +set(FUZZER_SRCS + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerCrossOver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDataFlowTrace.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDriver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsDlsym.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWeak.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCounters.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerFork.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIO.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerLoop.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMerge.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMutate.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerSHA1.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerTracePC.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtil.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilFuchsia.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilLinux.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilWindows.cpp" +) + +add_library(_fuzzer_no_main STATIC ${FUZZER_SRCS}) +add_library(ch_contrib::fuzzer_no_main ALIAS _fuzzer_no_main) + +add_library(_fuzzer STATIC ${FUZZER_SRCS} "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMain.cpp") +add_library(ch_contrib::fuzzer ALIAS _fuzzer) + + if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() From ee5f5a4cb488c9f3be38c03684ea0256b4e3815c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 21 Oct 2022 05:33:17 +0200 Subject: [PATCH 206/252] Unfathomable amount of trash --- src/Client/LocalConnection.cpp | 2 -- .../Executors/CompletedPipelineExecutor.cpp | 4 +-- .../Executors/CompletedPipelineExecutor.h | 2 ++ src/Processors/Executors/ExecutingGraph.h | 1 + src/Processors/Executors/PipelineExecutor.cpp | 1 + src/Processors/Executors/PipelineExecutor.h | 1 + src/QueryPipeline/PipelineResourcesHolder.h | 3 +- src/Server/TCPHandler.cpp | 33 ++++++++++--------- 8 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 7ac68324915..476386889d2 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -6,8 +6,6 @@ #include #include #include -#include -#include namespace DB diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index 9e5ea3916bc..a4c7fe2f687 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -72,9 +72,9 @@ void CompletedPipelineExecutor::execute() data->executor = std::make_shared(pipeline.processors, pipeline.process_list_element); data->executor->setReadProgressCallback(pipeline.getReadProgressCallback()); - /// Avoid passing this to labmda, copy ptr to data instead. + /// Avoid passing this to lambda, copy ptr to data instead. /// Destructor of unique_ptr copy raw ptr into local variable first, only then calls object destructor. - auto func = [data_ptr = data.get(), num_threads = pipeline.getNumThreads(), thread_group = CurrentThread::getGroup()]() + auto func = [data_ptr = data.get(), num_threads = pipeline.getNumThreads(), thread_group = CurrentThread::getGroup()] { threadFunction(*data_ptr, thread_group, num_threads); }; diff --git a/src/Processors/Executors/CompletedPipelineExecutor.h b/src/Processors/Executors/CompletedPipelineExecutor.h index e616cd6a2b7..65fab6035b1 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.h +++ b/src/Processors/Executors/CompletedPipelineExecutor.h @@ -1,7 +1,9 @@ #pragma once + #include #include + namespace DB { diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h index 7ccdf9f9898..b374f968122 100644 --- a/src/Processors/Executors/ExecutingGraph.h +++ b/src/Processors/Executors/ExecutingGraph.h @@ -1,4 +1,5 @@ #pragma once + #include #include #include diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 42140b40ecf..3772381de04 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -15,6 +15,7 @@ #include #endif + namespace DB { diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 7b5d3213dea..21bde312cbc 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -22,6 +22,7 @@ using ExecutingGraphPtr = std::unique_ptr; class ReadProgressCallback; using ReadProgressCallbackPtr = std::unique_ptr; + /// Executes query pipeline. class PipelineExecutor { diff --git a/src/QueryPipeline/PipelineResourcesHolder.h b/src/QueryPipeline/PipelineResourcesHolder.h index 46b1024f384..ed9eb68b7ba 100644 --- a/src/QueryPipeline/PipelineResourcesHolder.h +++ b/src/QueryPipeline/PipelineResourcesHolder.h @@ -19,8 +19,9 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(); QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); + /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. - QueryPlanResourceHolder& operator=(QueryPlanResourceHolder &&) noexcept; + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 25a832ab7e3..962d5412b48 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -377,8 +377,8 @@ void TCPHandler::runImpl() after_send_progress.restart(); if (state.io.pipeline.pushing()) - /// FIXME: check explicitly that insert query suggests to receive data via native protocol, { + /// FIXME: check explicitly that insert query suggests to receive data via native protocol, state.need_receive_data_for_insert = true; processInsertQuery(); state.io.onFinish(); @@ -390,27 +390,30 @@ void TCPHandler::runImpl() } else if (state.io.pipeline.completed()) { - CompletedPipelineExecutor executor(state.io.pipeline); - /// Should not check for cancel in case of input. - if (!state.need_receive_data_for_input) { - auto callback = [this]() + CompletedPipelineExecutor executor(state.io.pipeline); + + /// Should not check for cancel in case of input. + if (!state.need_receive_data_for_input) { - std::lock_guard lock(fatal_error_mutex); + auto callback = [this]() + { + std::lock_guard lock(fatal_error_mutex); - if (isQueryCancelled()) - return true; + if (isQueryCancelled()) + return true; - sendProgress(); - sendSelectProfileEvents(); - sendLogs(); + sendProgress(); + sendSelectProfileEvents(); + sendLogs(); - return false; - }; + return false; + }; - executor.setCancelCallback(callback, interactive_delay / 1000); + executor.setCancelCallback(callback, interactive_delay / 1000); + } + executor.execute(); } - executor.execute(); state.io.onFinish(); /// Send final progress after calling onFinish(), since it will update the progress. From 537f7000d671a70148860a7418b606b442732b29 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Oct 2022 06:55:24 +0000 Subject: [PATCH 207/252] Update documentation --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- docs/ru/sql-reference/functions/date-time-functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 76f66db924f..538d8532db7 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1163,7 +1163,7 @@ dateName(date_part, date) **Arguments** - `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — Date. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 897c4b3e86a..671d63f8f85 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -1084,7 +1084,7 @@ dateName(date_part, date) **Ðргументы** - `date_part` — чаÑÑ‚ÑŒ даты. Возможные значениÑ: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — дата. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). +- `date` — дата. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — чаÑовой поÑÑ. ÐеобÑзательный аргумент. [String](../../sql-reference/data-types/string.md). **Возвращаемое значение** From 73e16ad55a1c2b8b954af502bea37196b6192125 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 20 Oct 2022 13:19:48 +0000 Subject: [PATCH 208/252] Fix KeeperTCPHandler data race --- src/Server/KeeperTCPHandler.cpp | 4 ++++ src/Server/KeeperTCPHandler.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 94e3597f88e..03385014a43 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -395,6 +395,7 @@ void KeeperTCPHandler::runImpl() }; session_stopwatch.start(); + connected.store(true, std::memory_order_release); bool close_received = false; try @@ -584,6 +585,9 @@ KeeperConnectionStats & KeeperTCPHandler::getConnectionStats() void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) { + if (!connected.load(std::memory_order_acquire)) + return; + auto & stats = getConnectionStats(); writeText(' ', buf); diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index e9bd211628f..ffdd50b805a 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -81,6 +81,8 @@ private: std::shared_ptr in; std::shared_ptr out; + std::atomic connected{false}; + void runImpl(); void sendHandshake(bool has_leader); From 13480f940b04951225ac309cb9e3b881cdb53456 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 21 Oct 2022 07:27:37 +0000 Subject: [PATCH 209/252] Handle 4LW correctly --- src/Server/KeeperTCPHandler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 03385014a43..514f3841567 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -325,6 +325,7 @@ void KeeperTCPHandler::runImpl() int32_t four_letter_cmd = header; if (!isHandShake(four_letter_cmd)) { + connected.store(true, std::memory_order_relaxed); tryExecuteFourLetterWordCmd(four_letter_cmd); return; } From f3016fb86b9cf32d21593dc158fc654c5e6cdbb8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 21 Oct 2022 09:15:24 +0000 Subject: [PATCH 210/252] Throw exception on invalid values --- src/Functions/GregorianDate.h | 8 ++++++-- .../queries/0_stateless/02463_julian_day_ubsan.reference | 1 - tests/queries/0_stateless/02463_julian_day_ubsan.sql | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index b91c1b6391d..332069e45ed 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -337,11 +337,15 @@ namespace DB if constexpr (is_signed_v && std::numeric_limits::lowest() < -678941) if (modified_julian_day < -678941) - modified_julian_day = -678941; + throw Exception( + ErrorCodes::CANNOT_FORMAT_DATETIME, + "Value cannot be represented as date because it's out of range"); if constexpr (std::numeric_limits::max() > 2973119) if (modified_julian_day > 2973119) - modified_julian_day = 2973119; + throw Exception( + ErrorCodes::CANNOT_FORMAT_DATETIME, + "Value cannot be represented as date because it's out of range"); const auto a = modified_julian_day + 678575; const auto quad_cent = gd::div(a, 146097); diff --git a/tests/queries/0_stateless/02463_julian_day_ubsan.reference b/tests/queries/0_stateless/02463_julian_day_ubsan.reference index 8be8aff668e..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02463_julian_day_ubsan.reference +++ b/tests/queries/0_stateless/02463_julian_day_ubsan.reference @@ -1 +0,0 @@ -9999-01-01 diff --git a/tests/queries/0_stateless/02463_julian_day_ubsan.sql b/tests/queries/0_stateless/02463_julian_day_ubsan.sql index 60b51538bf3..a8583d7b0a8 100644 --- a/tests/queries/0_stateless/02463_julian_day_ubsan.sql +++ b/tests/queries/0_stateless/02463_julian_day_ubsan.sql @@ -1 +1 @@ -SELECT fromModifiedJulianDay(9223372036854775807 :: Int64); +SELECT fromModifiedJulianDay(9223372036854775807 :: Int64); -- { serverError 490 } From abd585a38a3dc802db7bc9c88aad4ab06a09b12a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 21 Oct 2022 12:52:46 +0200 Subject: [PATCH 211/252] Increase threshold for using physical cores (#42503) --- src/Common/getNumberOfPhysicalCPUCores.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index 7bb68b324b2..7a1f10b6435 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -48,7 +48,7 @@ static unsigned getNumberOfPhysicalCPUCoresImpl() /// Let's limit ourself to the number of physical cores. /// But if the number of logical cores is small - maybe it is a small machine /// or very limited cloud instance and it is reasonable to use all the cores. - if (cpu_count >= 8) + if (cpu_count >= 32) cpu_count /= 2; #endif From 19715f154251a19db77dbaf83aa1b1a4e7c60967 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 7 Oct 2022 12:16:39 +0200 Subject: [PATCH 212/252] Enable -Wshorten-64-to-32 Signed-off-by: Azat Khuzhin --- cmake/warnings.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 89f3a62ba2e..8364b0c2c08 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -27,7 +27,6 @@ if (COMPILER_CLANG) no_warning(sign-conversion) no_warning(implicit-int-conversion) no_warning(implicit-int-float-conversion) - no_warning(shorten-64-to-32) no_warning(ctad-maybe-unsupported) # clang 9+, linux-only no_warning(disabled-macro-expansion) no_warning(documentation-unknown-command) From 4e76629aafc4e855e6a1d006e655d48ea141c6ea Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 7 Oct 2022 12:46:45 +0200 Subject: [PATCH 213/252] Fixes for -Wshorten-64-to-32 - lots of static_cast - add safe_cast - types adjustments - config - IStorage::read/watch - ... - some TODO's (to convert types in future) P.S. That was quite a journey... v2: fixes after rebase v3: fix conflicts after #42308 merged Signed-off-by: Azat Khuzhin --- base/base/ReplxxLineReader.cpp | 8 +-- base/base/StringRef.h | 10 ++-- base/base/itoa.h | 2 +- programs/client/Client.cpp | 10 +++- programs/copier/ZooKeeperStaff.h | 4 +- programs/install/Install.cpp | 8 +-- programs/keeper/Keeper.cpp | 12 ++--- programs/local/LocalServer.cpp | 7 ++- programs/obfuscator/Obfuscator.cpp | 8 +-- programs/odbc-bridge/ColumnInfoHandler.cpp | 5 +- programs/odbc-bridge/ODBCBlockInputStream.cpp | 2 +- programs/server/Server.cpp | 16 +++--- src/Access/Common/AllowedClientHosts.cpp | 2 +- src/Access/LDAPClient.cpp | 2 +- .../AggregateFunctionGroupBitmap.h | 6 ++- .../AggregateFunctionGroupBitmapData.h | 2 +- .../AggregateFunctionHistogram.h | 8 +-- .../AggregateFunctionMinMaxAny.h | 4 +- .../AggregateFunctionRetention.h | 2 +- .../AggregateFunctionSequenceMatch.h | 2 +- .../AggregateFunctionSequenceNextNode.h | 4 +- .../AggregateFunctionSparkbar.h | 12 ++--- .../AggregateFunctionUniq.h | 3 +- .../AggregateFunctionUniqCombined.h | 2 +- .../ReservoirSamplerDeterministic.h | 2 +- src/AggregateFunctions/UniquesHashSet.h | 2 +- src/Backups/BackupIO_S3.cpp | 8 +-- src/Backups/BackupSettings.cpp | 5 +- src/Bridge/IBridge.h | 2 +- src/BridgeHelper/IBridgeHelper.h | 2 +- src/BridgeHelper/LibraryBridgeHelper.h | 2 +- src/BridgeHelper/XDBCBridgeHelper.h | 2 +- src/Client/ClientBase.cpp | 16 +++--- src/Client/HedgedConnections.cpp | 2 +- src/Client/HedgedConnectionsFactory.cpp | 4 +- src/Client/HedgedConnectionsFactory.h | 2 +- src/Client/QueryFuzzer.cpp | 6 +-- src/Columns/ColumnArray.cpp | 4 +- src/Columns/ColumnCompressed.cpp | 8 +-- src/Columns/ColumnFixedString.cpp | 4 +- src/Columns/ColumnLowCardinality.cpp | 6 +-- src/Columns/ColumnString.cpp | 4 +- src/Columns/ColumnUnique.h | 8 +-- src/Columns/MaskOperations.cpp | 6 +-- src/Columns/tests/gtest_weak_hash_32.cpp | 32 +++++------ src/Common/CombinedCardinalityEstimator.h | 2 +- src/Common/DateLUTImpl.cpp | 4 +- src/Common/DateLUTImpl.h | 44 +++++++-------- src/Common/Dwarf.h | 8 +-- src/Common/HashTable/Hash.h | 45 ++++++++-------- src/Common/HashTable/TwoLevelHashTable.h | 18 +++---- .../HashTable/TwoLevelStringHashTable.h | 18 +++---- src/Common/HyperLogLogCounter.h | 10 +++- .../HyperLogLogWithSmallSetOptimization.h | 2 +- src/Common/OptimizedRegularExpression.cpp | 10 +++- src/Common/PoolWithFailoverBase.h | 2 +- src/Common/QueryProfiler.cpp | 6 +-- src/Common/RadixSort.h | 4 +- src/Common/StringSearcher.h | 2 +- src/Common/ThreadPool.cpp | 8 +-- src/Common/ThreadPool.h | 12 ++--- src/Common/ThreadProfileEvents.cpp | 4 +- src/Common/UTF8Helpers.h | 9 +++- src/Common/Volnitsky.h | 3 +- src/Common/ZooKeeper/TestKeeper.cpp | 2 +- src/Common/ZooKeeper/ZooKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 4 +- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 +- src/Common/examples/parallel_aggregation.cpp | 4 +- src/Common/filesystemHelpers.cpp | 2 +- src/Common/filesystemHelpers.h | 2 +- src/Common/formatIPv6.cpp | 16 +++--- src/Common/getCurrentProcessFDCount.cpp | 4 +- src/Common/getCurrentProcessFDCount.h | 4 +- src/Common/intExp.h | 2 +- src/Common/mysqlxx/Value.cpp | 2 +- src/Common/mysqlxx/mysqlxx/Row.h | 4 +- src/Common/mysqlxx/mysqlxx/Value.h | 4 +- src/Common/parseGlobs.cpp | 4 +- src/Common/parseRemoteDescription.cpp | 4 +- src/Common/safe_cast.h | 22 ++++++++ src/Common/tests/gtest_DateLUTImpl.cpp | 2 +- src/Common/tests/gtest_hash_table.cpp | 10 ++-- src/Common/tests/gtest_lru_hash_map.cpp | 2 +- src/Common/waitForPid.cpp | 5 +- src/Compression/CompressedReadBufferBase.cpp | 4 +- src/Compression/CompressedWriteBuffer.cpp | 4 +- .../CompressionCodecDoubleDelta.cpp | 4 +- src/Compression/CompressionCodecEncrypted.cpp | 4 +- src/Compression/CompressionCodecFPC.cpp | 4 +- src/Compression/CompressionCodecGorilla.cpp | 4 +- src/Compression/CompressionCodecLZ4.cpp | 2 +- src/Compression/CompressionCodecMultiple.cpp | 4 +- src/Compression/CompressionCodecT64.cpp | 8 +-- src/Compression/CompressionCodecZSTD.cpp | 16 +++--- .../tests/gtest_compressionCodec.cpp | 20 ++++--- src/Coordination/KeeperServer.cpp | 5 +- src/Coordination/KeeperStorage.cpp | 4 +- src/Coordination/SummingStateMachine.cpp | 4 +- src/Coordination/ZooKeeperDataReader.cpp | 2 +- src/Coordination/tests/gtest_coordination.cpp | 4 +- src/Core/DecimalComparison.h | 2 +- src/Core/DecimalFunctions.h | 10 ++-- src/Core/MySQL/MySQLReplication.cpp | 14 ++--- src/Core/MySQL/PacketsReplication.cpp | 2 +- src/Core/PostgreSQL/insertPostgreSQLValue.cpp | 2 +- src/Core/PostgreSQLProtocol.h | 15 ++++-- src/Core/tests/gtest_DecimalFunctions.cpp | 15 ++++-- src/Daemon/BaseDaemon.cpp | 3 +- src/Daemon/SentryWriter.cpp | 2 +- src/DataTypes/DataTypeEnum.cpp | 2 +- src/DataTypes/Native.h | 2 +- .../Serializations/SerializationDateTime.cpp | 10 ++-- .../Serializations/SerializationInfoTuple.cpp | 2 +- .../SerializationLowCardinality.cpp | 8 +-- src/DataTypes/getLeastSupertype.cpp | 26 +++++---- src/Databases/DatabaseOnDisk.cpp | 8 ++- src/Databases/MySQL/DatabaseMySQL.cpp | 9 +++- .../MySQL/MaterializedMySQLSyncThread.cpp | 9 ++-- src/Databases/SQLite/DatabaseSQLite.cpp | 4 +- src/Dictionaries/CassandraSource.cpp | 2 +- .../HierarchyFormatReader.cpp | 2 +- src/Dictionaries/RedisSource.cpp | 2 +- src/Dictionaries/SSDCacheDictionaryStorage.h | 5 +- ...ynchronousReadIndirectBufferFromRemoteFS.h | 2 +- .../DiskObjectStorageMetadata.cpp | 8 --- .../DiskObjectStorageMetadata.h | 1 - .../ObjectStorages/S3/S3ObjectStorage.cpp | 4 +- src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 +- src/Formats/CapnProtoUtils.cpp | 2 +- src/Formats/ProtobufSerializer.cpp | 11 ++-- src/Functions/CRC.cpp | 4 +- src/Functions/CustomWeekTransforms.h | 7 ++- src/Functions/DateTimeTransforms.h | 51 +++++++++--------- src/Functions/DivisionUtils.h | 2 +- src/Functions/FunctionBinaryArithmetic.h | 40 +++++++++----- .../FunctionDateOrDateTimeAddInterval.h | 54 +++++++++---------- src/Functions/FunctionIfBase.h | 2 +- src/Functions/FunctionSQLJSON.h | 2 +- src/Functions/FunctionSnowflake.h | 3 +- src/Functions/FunctionUnixTimestamp64.h | 4 +- src/Functions/FunctionsAES.h | 9 ++-- src/Functions/FunctionsConversion.h | 24 ++++----- src/Functions/FunctionsJSON.cpp | 4 +- .../FunctionsLanguageClassification.cpp | 10 +++- src/Functions/FunctionsLogical.h | 2 +- src/Functions/FunctionsRandom.cpp | 2 +- src/Functions/FunctionsRound.h | 4 +- src/Functions/FunctionsStringHash.cpp | 14 ++--- src/Functions/FunctionsTimeWindow.h | 14 +++-- src/Functions/GregorianDate.h | 2 +- .../JSONPath/Generator/GeneratorJSONPath.h | 2 +- .../JSONPath/Parsers/ParserJSONPathRange.cpp | 4 +- src/Functions/MultiMatchAllIndicesImpl.h | 4 +- src/Functions/MultiMatchAnyImpl.h | 4 +- src/Functions/PolygonUtils.h | 2 +- src/Functions/Regexps.h | 6 +-- src/Functions/ReplaceRegexpImpl.h | 4 +- src/Functions/URL/CMakeLists.txt | 1 + src/Functions/array/arrayAggregation.cpp | 4 +- src/Functions/array/arrayCount.cpp | 4 +- src/Functions/array/arrayEnumerate.cpp | 2 +- src/Functions/array/arrayEnumerateRanked.cpp | 4 +- src/Functions/array/arrayFirstLastIndex.cpp | 4 +- src/Functions/array/arrayUniq.cpp | 2 +- src/Functions/array/range.cpp | 16 +++--- src/Functions/divide/divideImpl.cpp | 2 +- src/Functions/errorCodeToName.cpp | 3 +- src/Functions/extractAllGroups.h | 6 ++- src/Functions/extractGroups.cpp | 3 +- src/Functions/formatDateTime.cpp | 2 +- src/Functions/makeDate.cpp | 9 ++-- src/Functions/minus.cpp | 2 +- src/Functions/modulo.cpp | 11 ++-- src/Functions/now64.cpp | 2 +- src/Functions/nowInBlock.cpp | 2 +- src/Functions/plus.cpp | 2 +- src/Functions/pointInEllipses.cpp | 2 +- src/Functions/randomStringUTF8.cpp | 4 +- src/Functions/runningConcurrency.cpp | 3 +- src/Functions/stem.cpp | 4 +- src/Functions/tests/gtest_has_all.cpp | 8 +-- src/Functions/toStartOfInterval.cpp | 10 ++-- src/Functions/toValidUTF8.cpp | 2 +- src/IO/AIO.cpp | 8 +-- src/IO/Archives/ZipArchiveReader.cpp | 2 +- src/IO/Archives/ZipArchiveWriter.cpp | 3 +- src/IO/Bzip2ReadBuffer.cpp | 6 +-- src/IO/Bzip2WriteBuffer.cpp | 6 +-- src/IO/FileEncryptionCommon.cpp | 13 +++-- src/IO/HTTPCommon.cpp | 4 +- src/IO/HTTPCommon.h | 2 +- src/IO/ReadBufferFromPocoSocket.cpp | 6 ++- src/IO/VarInt.h | 4 +- src/IO/WriteBufferFromPocoSocket.cpp | 7 ++- src/IO/WriteBufferFromS3.cpp | 4 +- src/IO/WriteBufferFromS3.h | 4 +- src/IO/WriteBufferValidUTF8.cpp | 2 +- src/IO/WriteHelpers.h | 2 +- src/IO/ZlibDeflatingWriteBuffer.cpp | 8 +-- src/IO/ZlibInflatingReadBuffer.cpp | 4 +- src/IO/ZstdDeflatingAppendableWriteBuffer.cpp | 2 +- src/IO/ZstdDeflatingWriteBuffer.cpp | 2 +- src/IO/readDecimalText.h | 2 +- src/IO/readFloatText.h | 8 +-- src/Interpreters/ActionsVisitor.h | 2 +- src/Interpreters/Aggregator.cpp | 14 ++--- src/Interpreters/Aggregator.h | 4 +- src/Interpreters/Cluster.cpp | 16 +++--- .../ClusterProxy/SelectStreamFactory.cpp | 6 +-- .../ClusterProxy/SelectStreamFactory.h | 2 +- .../ClusterProxy/executeQuery.cpp | 5 +- src/Interpreters/ConcurrentHashJoin.cpp | 2 +- src/Interpreters/DDLWorker.cpp | 2 +- src/Interpreters/DDLWorker.h | 2 +- src/Interpreters/DatabaseCatalog.cpp | 6 +-- src/Interpreters/InterpreterCreateQuery.cpp | 8 ++- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Interpreters/JIT/compileFunction.cpp | 2 +- .../NormalizeSelectWithUnionQueryVisitor.cpp | 3 +- src/Interpreters/ProcessList.cpp | 7 ++- src/Interpreters/RowRefs.cpp | 2 +- src/Interpreters/RowRefs.h | 5 +- src/Interpreters/TemporaryDataOnDisk.cpp | 2 +- src/Interpreters/TemporaryDataOnDisk.h | 2 +- src/Interpreters/ThreadStatusExt.cpp | 6 +-- src/Interpreters/convertFieldToType.cpp | 2 +- src/Interpreters/createBlockSelector.cpp | 2 +- src/Interpreters/executeQuery.cpp | 7 +-- src/Parsers/ASTTTLElement.cpp | 2 +- src/Parsers/ParserSampleRatio.cpp | 6 +-- src/Parsers/parseQuery.cpp | 3 +- src/Processors/Formats/IRowInputFormat.cpp | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 35 ++++++------ .../Formats/Impl/AvroRowInputFormat.cpp | 37 ++++++------- .../Formats/Impl/AvroRowOutputFormat.cpp | 2 +- .../Formats/Impl/CHColumnToArrowColumn.cpp | 11 +++- .../Formats/Impl/CapnProtoRowInputFormat.cpp | 4 +- .../Formats/Impl/CapnProtoRowOutputFormat.cpp | 6 +-- .../Impl/ConstantExpressionTemplate.cpp | 2 +- .../Formats/Impl/MsgPackRowInputFormat.cpp | 4 +- .../Formats/Impl/MsgPackRowOutputFormat.cpp | 22 ++++---- .../Formats/Impl/MySQLOutputFormat.cpp | 2 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 4 +- .../Impl/ParallelParsingInputFormat.cpp | 3 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 +- .../Formats/Impl/RegexpRowInputFormat.cpp | 6 ++- .../Formats/Impl/ValuesBlockInputFormat.cpp | 2 +- .../QueryPlan/ReadFromMergeTree.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.h | 2 +- src/Processors/Sources/MySQLSource.cpp | 6 +-- src/Processors/Sources/SQLiteSource.cpp | 10 ++-- src/Processors/Sources/SQLiteSource.h | 2 +- src/Processors/Sources/ShellCommandSource.cpp | 4 +- src/Processors/TTL/ITTLAlgorithm.cpp | 4 +- .../Transforms/FillingTransform.cpp | 4 +- src/Processors/Transforms/MongoDBSource.cpp | 4 +- src/Processors/Transforms/WindowTransform.cpp | 22 +++----- .../RemoteQueryExecutorReadContext.cpp | 4 +- src/Server/GRPCServer.cpp | 3 +- .../WriteBufferFromHTTPServerResponse.cpp | 2 +- .../HTTP/WriteBufferFromHTTPServerResponse.h | 4 +- src/Server/HTTPHandler.cpp | 6 ++- src/Server/KeeperTCPHandler.cpp | 5 +- src/Server/MySQLHandler.cpp | 18 +++++-- src/Server/MySQLHandler.h | 18 +++++-- src/Server/MySQLHandlerFactory.cpp | 2 +- src/Server/MySQLHandlerFactory.h | 2 +- src/Server/TCPHandler.cpp | 2 +- src/Server/TCPHandler.h | 2 +- src/Storages/Distributed/DirectoryMonitor.cpp | 2 +- src/Storages/FileLog/DirectoryWatcherBase.cpp | 4 +- src/Storages/FileLog/StorageFileLog.cpp | 2 +- src/Storages/FileLog/StorageFileLog.h | 2 +- .../HDFS/AsynchronousReadBufferFromHDFS.h | 2 +- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 3 +- src/Storages/HDFS/StorageHDFS.cpp | 10 ++-- src/Storages/HDFS/StorageHDFS.h | 2 +- src/Storages/HDFS/StorageHDFSCluster.cpp | 2 +- src/Storages/HDFS/StorageHDFSCluster.h | 2 +- src/Storages/HDFS/WriteBufferFromHDFS.cpp | 3 +- src/Storages/Hive/HiveFile.cpp | 8 +-- src/Storages/Hive/StorageHive.cpp | 16 ++++-- src/Storages/Hive/StorageHive.h | 4 +- src/Storages/IStorage.cpp | 6 +-- src/Storages/IStorage.h | 6 +-- src/Storages/Kafka/StorageKafka.cpp | 4 +- src/Storages/Kafka/StorageKafka.h | 2 +- src/Storages/LiveView/StorageBlocks.h | 2 +- src/Storages/LiveView/StorageLiveView.cpp | 4 +- src/Storages/LiveView/StorageLiveView.h | 4 +- .../MeiliSearch/SourceMeiliSearch.cpp | 2 +- .../MeiliSearch/StorageMeiliSearch.cpp | 2 +- src/Storages/MeiliSearch/StorageMeiliSearch.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +-- .../MergeTree/MergeTreeDataSelectExecutor.h | 6 +-- .../MergeTree/MergeTreeIndexAnnoy.cpp | 2 +- src/Storages/MergeTree/MergeType.cpp | 2 +- src/Storages/MergeTree/MergeType.h | 2 +- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 2 +- ...ReplicatedMergeTreeMergeStrategyPicker.cpp | 6 +-- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 14 ++--- .../MergeTree/ReplicatedMergeTreeQueue.h | 1 + .../MergeTree/StorageFromMergeTreeDataPart.h | 2 +- src/Storages/MySQL/MySQLHelpers.cpp | 2 +- src/Storages/NATS/NATSConnection.cpp | 2 +- src/Storages/NATS/StorageNATS.cpp | 4 +- src/Storages/NATS/StorageNATS.h | 2 +- .../StorageMaterializedPostgreSQL.cpp | 2 +- .../StorageMaterializedPostgreSQL.h | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 4 +- src/Storages/RabbitMQ/StorageRabbitMQ.h | 2 +- .../ReadFinalForExternalReplicaStorage.cpp | 5 +- .../ReadFinalForExternalReplicaStorage.h | 2 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 4 +- src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 2 +- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageBuffer.h | 2 +- src/Storages/StorageDictionary.cpp | 2 +- src/Storages/StorageDictionary.h | 2 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageDistributed.h | 2 +- src/Storages/StorageExecutable.cpp | 2 +- src/Storages/StorageExecutable.h | 2 +- src/Storages/StorageExternalDistributed.cpp | 2 +- src/Storages/StorageExternalDistributed.h | 2 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageGenerateRandom.cpp | 2 +- src/Storages/StorageGenerateRandom.h | 2 +- src/Storages/StorageInput.cpp | 2 +- src/Storages/StorageInput.h | 2 +- src/Storages/StorageJoin.cpp | 2 +- src/Storages/StorageJoin.h | 2 +- src/Storages/StorageKeeperMap.cpp | 2 +- src/Storages/StorageKeeperMap.h | 2 +- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageLog.h | 2 +- src/Storages/StorageMaterializedMySQL.cpp | 2 +- src/Storages/StorageMaterializedMySQL.h | 9 +++- src/Storages/StorageMaterializedView.cpp | 2 +- src/Storages/StorageMaterializedView.h | 2 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMemory.h | 2 +- src/Storages/StorageMerge.cpp | 2 +- src/Storages/StorageMerge.h | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageMergeTree.h | 2 +- src/Storages/StorageMongoDB.cpp | 2 +- src/Storages/StorageMongoDB.h | 2 +- src/Storages/StorageMySQL.cpp | 2 +- src/Storages/StorageMySQL.h | 2 +- src/Storages/StorageNull.h | 4 +- src/Storages/StoragePostgreSQL.cpp | 2 +- src/Storages/StoragePostgreSQL.h | 2 +- src/Storages/StorageProxy.h | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 5 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- src/Storages/StorageS3.cpp | 13 ++--- src/Storages/StorageS3.h | 2 +- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageS3Cluster.h | 2 +- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSQLite.h | 2 +- src/Storages/StorageStripeLog.cpp | 2 +- src/Storages/StorageStripeLog.h | 2 +- src/Storages/StorageTableFunction.h | 2 +- src/Storages/StorageURL.cpp | 9 ++-- src/Storages/StorageURL.h | 4 +- src/Storages/StorageValues.cpp | 2 +- src/Storages/StorageValues.h | 2 +- src/Storages/StorageView.cpp | 2 +- src/Storages/StorageView.h | 2 +- src/Storages/StorageXDBC.cpp | 2 +- src/Storages/StorageXDBC.h | 2 +- src/Storages/System/IStorageSystemOneBlock.h | 2 +- src/Storages/System/StorageSystemBackups.cpp | 4 +- src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemColumns.h | 2 +- .../StorageSystemDataSkippingIndices.cpp | 2 +- .../System/StorageSystemDataSkippingIndices.h | 2 +- .../System/StorageSystemDetachedParts.cpp | 2 +- .../System/StorageSystemDetachedParts.h | 2 +- src/Storages/System/StorageSystemDisks.cpp | 2 +- src/Storages/System/StorageSystemDisks.h | 2 +- src/Storages/System/StorageSystemErrors.cpp | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 2 +- src/Storages/System/StorageSystemNumbers.h | 2 +- src/Storages/System/StorageSystemOne.cpp | 2 +- src/Storages/System/StorageSystemOne.h | 2 +- .../System/StorageSystemPartsBase.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.h | 2 +- .../System/StorageSystemQuotaLimits.cpp | 2 +- .../System/StorageSystemQuotaUsage.cpp | 4 +- src/Storages/System/StorageSystemQuotas.cpp | 5 +- .../System/StorageSystemRemoteDataPaths.cpp | 2 +- .../System/StorageSystemRemoteDataPaths.h | 2 +- src/Storages/System/StorageSystemReplicas.cpp | 2 +- src/Storages/System/StorageSystemReplicas.h | 2 +- .../System/StorageSystemStackTrace.cpp | 4 +- src/Storages/System/StorageSystemStackTrace.h | 2 +- .../System/StorageSystemStoragePolicies.cpp | 2 +- .../System/StorageSystemStoragePolicies.h | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/System/StorageSystemTables.h | 2 +- src/Storages/System/StorageSystemZeros.cpp | 2 +- src/Storages/System/StorageSystemZeros.h | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 9 ++-- src/Storages/WindowView/StorageWindowView.h | 4 +- src/TableFunctions/TableFunctionFile.cpp | 3 +- utils/compressor/decompress_perf.cpp | 6 ++- 411 files changed, 1209 insertions(+), 965 deletions(-) create mode 100644 src/Common/safe_cast.h diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index a014fa4b8f2..e0dc81af5b0 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -151,7 +151,7 @@ public: { size_t dot_pos = path.rfind('.'); if (dot_pos != std::string::npos) - fd = ::mkstemps(path.data(), path.size() - dot_pos); + fd = ::mkstemps(path.data(), static_cast(path.size() - dot_pos)); else fd = ::mkstemp(path.data()); @@ -408,7 +408,7 @@ ReplxxLineReader::ReplxxLineReader( // In a simplest case use simple comment. commented_line = fmt::format("-- {}", state.text()); } - rx.set_state(replxx::Replxx::State(commented_line.c_str(), commented_line.size())); + rx.set_state(replxx::Replxx::State(commented_line.c_str(), static_cast(commented_line.size()))); return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; @@ -480,7 +480,7 @@ void ReplxxLineReader::openEditor() if (executeCommand(argv) == 0) { const std::string & new_query = readFile(editor_file.getPath()); - rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); } } catch (const std::runtime_error & e) @@ -526,7 +526,7 @@ void ReplxxLineReader::openInteractiveHistorySearch() { std::string new_query = readFile(output_file.getPath()); rightTrim(new_query); - rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); } } catch (const std::runtime_error & e) diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 5ee197021ca..779099a1573 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -265,7 +265,7 @@ inline size_t hashLessThan16(const char * data, size_t size) struct CRC32Hash { - size_t operator() (StringRef x) const + unsigned operator() (StringRef x) const { const char * pos = x.data; size_t size = x.size; @@ -275,22 +275,22 @@ struct CRC32Hash if (size < 8) { - return hashLessThan8(x.data, x.size); + return static_cast(hashLessThan8(x.data, x.size)); } const char * end = pos + size; - size_t res = -1ULL; + unsigned res = -1U; do { UInt64 word = unalignedLoad(pos); - res = CRC_INT(res, word); + res = static_cast(CRC_INT(res, word)); pos += 8; } while (pos + 8 < end); UInt64 word = unalignedLoad(end - 8); /// I'm not sure if this is normal. - res = CRC_INT(res, word); + res = static_cast(CRC_INT(res, word)); return res; } diff --git a/base/base/itoa.h b/base/base/itoa.h index 5e0b18d50c0..809b7c86c42 100644 --- a/base/base/itoa.h +++ b/base/base/itoa.h @@ -122,7 +122,7 @@ QuotientAndRemainder static inline split(UnsignedOfSize value) constexpr DivisionBy10PowN division; UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = value - quotient * pow10>(N); + UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); return {quotient, remainder}; } diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index e616cb8cf72..58569a32619 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1108,15 +1108,21 @@ void Client::processConfig() else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); - format_max_block_size = config().getInt("format_max_block_size", global_context->getSettingsRef().max_block_size); + format_max_block_size = config().getUInt64("format_max_block_size", + global_context->getSettingsRef().max_block_size); insert_format = "Values"; /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) + { insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; + } else - insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); + { + insert_format_max_block_size = config().getUInt64("insert_format_max_block_size", + global_context->getSettingsRef().max_insert_block_size); + } ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index a9e04578607..3d4a11186e3 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -47,8 +47,8 @@ public: WrappingUInt32 epoch; WrappingUInt32 counter; explicit Zxid(UInt64 _zxid) - : epoch(_zxid >> 32) - , counter(_zxid) + : epoch(static_cast(_zxid >> 32)) + , counter(static_cast(_zxid)) {} bool operator<=(const Zxid & other) const diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 00c86571265..8028ccde72d 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -893,7 +893,7 @@ namespace if (fs::exists(pid_file)) { ReadBufferFromFile in(pid_file.string()); - UInt64 pid; + Int32 pid; if (tryReadIntText(pid, in)) { fmt::print("{} file exists and contains pid = {}.\n", pid_file.string(), pid); @@ -982,9 +982,9 @@ namespace return 0; } - UInt64 isRunning(const fs::path & pid_file) + int isRunning(const fs::path & pid_file) { - UInt64 pid = 0; + int pid = 0; if (fs::exists(pid_file)) { @@ -1057,7 +1057,7 @@ namespace if (force && do_not_kill) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specified flags are incompatible"); - UInt64 pid = isRunning(pid_file); + int pid = isRunning(pid_file); if (!pid) return 0; diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 5077f59b7dd..e1d03b40b66 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -68,12 +68,12 @@ namespace ErrorCodes namespace { -int waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) { - const int sleep_max_ms = 1000 * seconds_to_wait; - const int sleep_one_ms = 100; - int sleep_current_ms = 0; - int current_connections = 0; + const size_t sleep_max_ms = 1000 * seconds_to_wait; + const size_t sleep_one_ms = 100; + size_t sleep_current_ms = 0; + size_t current_connections = 0; for (;;) { current_connections = 0; @@ -441,7 +441,7 @@ int Keeper::main(const std::vector & /*args*/) main_config_reloader.reset(); LOG_DEBUG(log, "Waiting for current connections to Keeper to finish."); - int current_connections = 0; + size_t current_connections = 0; for (auto & server : *servers) { server.stop(); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index e7bc019f597..070f86aaad2 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -546,9 +546,14 @@ void LocalServer::processConfig() /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) + { insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; + } else - insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); + { + insert_format_max_block_size = config().getUInt64("insert_format_max_block_size", + global_context->getSettingsRef().max_insert_block_size); + } /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index bdf26c9e730..7fdc5a54d8a 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -279,7 +279,7 @@ Float transformFloatMantissa(Float x, UInt64 seed) constexpr size_t mantissa_num_bits = std::is_same_v ? 23 : 52; UInt x_uint = bit_cast(x); - x_uint = feistelNetwork(x_uint, mantissa_num_bits, seed); + x_uint = static_cast(feistelNetwork(x_uint, mantissa_num_bits, seed)); return bit_cast(x_uint); } @@ -511,13 +511,13 @@ public: for (size_t i = 0; i < size; ++i) { UInt32 src_datetime = src_data[i]; - UInt32 src_date = date_lut.toDate(src_datetime); + UInt32 src_date = static_cast(date_lut.toDate(src_datetime)); Int32 src_diff = src_datetime - src_prev_value; - Int32 res_diff = transformSigned(src_diff, seed); + Int32 res_diff = static_cast(transformSigned(src_diff, seed)); UInt32 new_datetime = res_prev_value + res_diff; - UInt32 new_time = new_datetime - date_lut.toDate(new_datetime); + UInt32 new_time = new_datetime - static_cast(date_lut.toDate(new_datetime)); res_data[i] = src_date + new_time; src_prev_value = src_datetime; diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 0ea2495af78..7fa51fc8fcd 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -183,7 +183,10 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ if (columns.empty()) throw Exception("Columns definition was not returned", ErrorCodes::LOGICAL_ERROR); - WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); + WriteBufferFromHTTPServerResponse out( + response, + request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, + static_cast(keep_alive_timeout)); try { writeStringBinary(columns.toString(), out); diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index dec4c249b4b..5bbc39dc559 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -139,7 +139,7 @@ void ODBCSource::insertValue( readDateTimeText(time, in, assert_cast(data_type.get())->getTimeZone()); if (time < 0) time = 0; - assert_cast(column).insertValue(time); + assert_cast(column).insertValue(static_cast(time)); break; } case ValueType::vtDateTime64: diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7c3193ceac6..b412b579539 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -228,12 +228,12 @@ catch (...) path)); } -int waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) { - const int sleep_max_ms = 1000 * seconds_to_wait; - const int sleep_one_ms = 100; - int sleep_current_ms = 0; - int current_connections = 0; + const size_t sleep_max_ms = 1000 * seconds_to_wait; + const size_t sleep_one_ms = 100; + size_t sleep_current_ms = 0; + size_t current_connections = 0; for (;;) { current_connections = 0; @@ -933,7 +933,7 @@ int Server::main(const std::vector & /*args*/) else { rlim_t old = rlim.rlim_cur; - rlim.rlim_cur = config().getUInt("max_open_files", rlim.rlim_max); + rlim.rlim_cur = config().getUInt("max_open_files", static_cast(rlim.rlim_max)); int rc = setrlimit(RLIMIT_NOFILE, &rlim); if (rc != 0) LOG_WARNING(log, "Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()); @@ -1507,7 +1507,7 @@ int Server::main(const std::vector & /*args*/) if (!servers_to_start_before_tables.empty()) { LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); - int current_connections = 0; + size_t current_connections = 0; for (auto & server : servers_to_start_before_tables) { server.stop(); @@ -1793,7 +1793,7 @@ int Server::main(const std::vector & /*args*/) is_cancelled = true; - int current_connections = 0; + size_t current_connections = 0; { std::lock_guard lock(servers_lock); for (auto & server : servers) diff --git a/src/Access/Common/AllowedClientHosts.cpp b/src/Access/Common/AllowedClientHosts.cpp index 2f8151bf757..905f7ba08b5 100644 --- a/src/Access/Common/AllowedClientHosts.cpp +++ b/src/Access/Common/AllowedClientHosts.cpp @@ -236,7 +236,7 @@ void AllowedClientHosts::IPSubnet::set(const IPAddress & prefix_, const IPAddres void AllowedClientHosts::IPSubnet::set(const IPAddress & prefix_, size_t num_prefix_bits) { - set(prefix_, IPAddress(num_prefix_bits, prefix_.family())); + set(prefix_, IPAddress(static_cast(num_prefix_bits), prefix_.family())); } void AllowedClientHosts::IPSubnet::set(const IPAddress & address) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index ff1ee6f3609..2affbc293ec 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -254,7 +254,7 @@ bool LDAPClient::openConnection() #endif { - const int search_timeout = params.search_timeout.count(); + const int search_timeout = static_cast(params.search_timeout.count()); diag(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout)); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h index fd8357e3ba8..dacde67f3ca 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h @@ -45,7 +45,8 @@ public: void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - assert_cast &>(to).getData().push_back(this->data(place).rbs.size()); + assert_cast &>(to).getData().push_back( + static_cast(this->data(place).rbs.size())); } }; @@ -142,7 +143,8 @@ public: void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - assert_cast &>(to).getData().push_back(this->data(place).rbs.size()); + assert_cast &>(to).getData().push_back( + static_cast(this->data(place).rbs.size())); } }; diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h index 21ba7cd7301..801526432ae 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h @@ -426,7 +426,7 @@ public: return 0; if (isSmall()) - return small.find(x) != small.end(); + return small.find(static_cast(x)) != small.end(); else return rb->contains(static_cast(x)); } diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index 9031eb73c09..18bfc085ba3 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -136,8 +136,8 @@ private: for (size_t i = 0; i <= size; ++i) { - previous[i] = i - 1; - next[i] = i + 1; + previous[i] = static_cast(i - 1); + next[i] = static_cast(i + 1); } next[size] = 0; @@ -157,7 +157,7 @@ private: auto quality = [&](UInt32 i) { return points[next[i]].mean - points[i].mean; }; for (size_t i = 0; i + 1 < size; ++i) - queue.push({quality(i), i}); + queue.push({quality(static_cast(i)), i}); while (new_size > max_bins && !queue.empty()) { @@ -217,7 +217,7 @@ private: points[left] = points[right]; } } - size = left + 1; + size = static_cast(left + 1); } public: diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 783fa0606b5..f8d252cf8e9 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -540,7 +540,7 @@ public: /// Assuming to.has() void changeImpl(StringRef value, Arena * arena) { - Int32 value_size = value.size; + Int32 value_size = static_cast(value.size); if (value_size <= MAX_SMALL_STRING_SIZE) { @@ -555,7 +555,7 @@ public: if (capacity < value_size) { /// Don't free large_data here. - capacity = roundUpToPowerOfTwoOrZero(value_size); + capacity = static_cast(roundUpToPowerOfTwoOrZero(value_size)); large_data = arena->alloc(capacity); } diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h index a949b5e93f6..18d04fb1ea4 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.h +++ b/src/AggregateFunctions/AggregateFunctionRetention.h @@ -44,7 +44,7 @@ struct AggregateFunctionRetentionData void serialize(WriteBuffer & buf) const { - UInt32 event_value = events.to_ulong(); + UInt32 event_value = static_cast(events.to_ulong()); writeBinary(event_value, buf); } diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 5c1ab803f19..bcea408d26b 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -272,7 +272,7 @@ private: actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1); dfa_states.back().transition = DFATransition::SpecificEvent; - dfa_states.back().event = event_number - 1; + dfa_states.back().event = static_cast(event_number - 1); dfa_states.emplace_back(); conditions_in_pattern.set(event_number - 1); } diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h index 423b53b03f3..c29055ae8db 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.h @@ -226,7 +226,7 @@ public: for (UInt8 i = 0; i < events_size; ++i) if (assert_cast *>(columns[min_required_args + i])->getData()[row_num]) node->events_bitset.set(i); - node->event_time = timestamp; + node->event_time = static_cast(timestamp); node->can_be_base = assert_cast *>(columns[base_cond_column_idx])->getData()[row_num]; @@ -365,7 +365,7 @@ public: /// The first matched event is 0x00000001, the second one is 0x00000002, the third one is 0x00000004, and so on. UInt32 getNextNodeIndex(Data & data) const { - const UInt32 unmatched_idx = data.value.size(); + const UInt32 unmatched_idx = static_cast(data.value.size()); if (data.value.size() <= events_size) return unmatched_idx; diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h index cb017053fd7..f0fbdd2f2e4 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.h +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h @@ -165,7 +165,7 @@ private: { for (size_t i = 0; i <= diff_x; ++i) { - auto it = data.points.find(min_x_local + i); + auto it = data.points.find(static_cast(min_x_local + i)); bool found = it != data.points.end(); value += getBar(found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0); } @@ -173,7 +173,7 @@ private: else { for (size_t i = 0; i <= diff_x; ++i) - value += getBar(data.points.has(min_x_local + i) ? 1 : 0); + value += getBar(data.points.has(min_x_local + static_cast(i)) ? 1 : 0); } } else @@ -202,7 +202,7 @@ private: if (i == bound.first) // is bound { Float64 proportion = bound.second - bound.first; - auto it = data.points.find(min_x_local + i); + auto it = data.points.find(min_x_local + static_cast(i)); bool found = (it != data.points.end()); if (found && proportion > 0) new_y = new_y.value_or(0) + it->getMapped() * proportion; @@ -229,7 +229,7 @@ private: } else { - auto it = data.points.find(min_x_local + i); + auto it = data.points.find(min_x_local + static_cast(i)); if (it != data.points.end()) new_y = new_y.value_or(0) + it->getMapped(); } @@ -267,8 +267,8 @@ public: if (params.size() == 3) { specified_min_max_x = true; - min_x = params.at(1).safeGet(); - max_x = params.at(2).safeGet(); + min_x = static_cast(params.at(1).safeGet()); + max_x = static_cast(params.at(2).safeGet()); } else { diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index d44937b3f9d..fe2530800cc 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -175,8 +175,9 @@ struct OneAdder { if constexpr (!std::is_same_v) { + using ValueType = typename decltype(data.set)::value_type; const auto & value = assert_cast &>(column).getElement(row_num); - data.set.insert(AggregateFunctionUniqTraits::hash(value)); + data.set.insert(static_cast(AggregateFunctionUniqTraits::hash(value))); } else { diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 51020abe826..47b3081225b 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -43,7 +43,7 @@ namespace detail { static Ret hash(UInt128 x) { - return sipHash64(x); + return static_cast(sipHash64(x)); } }; diff --git a/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/src/AggregateFunctions/ReservoirSamplerDeterministic.h index a64c02e823b..557fd93a3a9 100644 --- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h +++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h @@ -84,7 +84,7 @@ public: if (isNaN(v)) return; - UInt32 hash = intHash64(determinator); + UInt32 hash = static_cast(intHash64(determinator)); insertImpl(v, hash); sorted = false; ++total_values; diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h index 5c82ec10691..777ec0edc7e 100644 --- a/src/AggregateFunctions/UniquesHashSet.h +++ b/src/AggregateFunctions/UniquesHashSet.h @@ -118,7 +118,7 @@ private: HashValue hash(Value key) const { - return Hash()(key); + return static_cast(Hash()(key)); } /// Delete all values whose hashes do not divide by 2 ^ skip_degree diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index be509824261..12038a8a30c 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -44,12 +44,12 @@ namespace S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, context->getRemoteHostFilter(), - context->getGlobalContext()->getSettingsRef().s3_max_redirects, + static_cast(context->getGlobalContext()->getSettingsRef().s3_max_redirects), context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ false); client_configuration.endpointOverride = s3_uri.endpoint; - client_configuration.maxConnections = context->getSettingsRef().s3_max_connections; + client_configuration.maxConnections = static_cast(context->getSettingsRef().s3_max_connections); /// Increase connect timeout client_configuration.connectTimeoutMs = 10 * 1000; /// Requests in backups can be extremely long, set to one hour @@ -221,7 +221,7 @@ void BackupWriterS3::copyObjectMultipartImpl( part_request.SetBucket(dst_bucket); part_request.SetKey(dst_key); part_request.SetUploadId(multipart_upload_id); - part_request.SetPartNumber(part_number); + part_request.SetPartNumber(static_cast(part_number)); part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); auto outcome = client->UploadPartCopy(part_request); @@ -251,7 +251,7 @@ void BackupWriterS3::copyObjectMultipartImpl( for (size_t i = 0; i < part_tags.size(); ++i) { Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); + multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(static_cast(i) + 1)); } req.SetMultipartUpload(multipart_upload); diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index a4b20e0b863..295ab723326 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -62,7 +62,6 @@ namespace #define LIST_OF_BACKUP_SETTINGS(M) \ M(String, id) \ M(String, compression_method) \ - M(Int64, compression_level) \ M(String, password) \ M(Bool, structure_only) \ M(Bool, async) \ @@ -72,6 +71,7 @@ namespace M(String, host_id) \ M(String, coordination_zk_path) \ M(OptionalUUID, backup_uuid) + /// M(Int64, compression_level) BackupSettings BackupSettings::fromBackupQuery(const ASTBackupQuery & query) { @@ -82,6 +82,9 @@ BackupSettings BackupSettings::fromBackupQuery(const ASTBackupQuery & query) const auto & settings = query.settings->as().changes; for (const auto & setting : settings) { + if (setting.name == "compression_level") + res.compression_level = static_cast(SettingFieldInt64{setting.value}.value); + else #define GET_SETTINGS_FROM_BACKUP_QUERY_HELPER(TYPE, NAME) \ if (setting.name == #NAME) \ res.NAME = SettingField##TYPE{setting.value}.value; \ diff --git a/src/Bridge/IBridge.h b/src/Bridge/IBridge.h index 2f35c361cca..68af8860cb3 100644 --- a/src/Bridge/IBridge.h +++ b/src/Bridge/IBridge.h @@ -43,7 +43,7 @@ private: std::string hostname; size_t port; std::string log_level; - size_t max_server_connections; + unsigned max_server_connections; size_t http_timeout; Poco::Logger * log; diff --git a/src/BridgeHelper/IBridgeHelper.h b/src/BridgeHelper/IBridgeHelper.h index a3348c81b68..b7fac3f1303 100644 --- a/src/BridgeHelper/IBridgeHelper.h +++ b/src/BridgeHelper/IBridgeHelper.h @@ -43,7 +43,7 @@ protected: virtual String serviceFileName() const = 0; - virtual size_t getDefaultPort() const = 0; + virtual unsigned getDefaultPort() const = 0; virtual bool startBridgeManually() const = 0; diff --git a/src/BridgeHelper/LibraryBridgeHelper.h b/src/BridgeHelper/LibraryBridgeHelper.h index 447a4c713f4..1723d1f8fb4 100644 --- a/src/BridgeHelper/LibraryBridgeHelper.h +++ b/src/BridgeHelper/LibraryBridgeHelper.h @@ -23,7 +23,7 @@ protected: String serviceFileName() const override { return serviceAlias(); } - size_t getDefaultPort() const override { return DEFAULT_PORT; } + unsigned getDefaultPort() const override { return DEFAULT_PORT; } bool startBridgeManually() const override { return false; } diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h index b62cb277ecb..139c1ab9726 100644 --- a/src/BridgeHelper/XDBCBridgeHelper.h +++ b/src/BridgeHelper/XDBCBridgeHelper.h @@ -109,7 +109,7 @@ protected: String getName() const override { return BridgeHelperMixin::getName(); } - size_t getDefaultPort() const override { return DEFAULT_PORT; } + unsigned getDefaultPort() const override { return DEFAULT_PORT; } String serviceAlias() const override { return BridgeHelperMixin::serviceAlias(); } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 0db7a9533db..ee5c17ee8f2 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -550,7 +550,7 @@ try out_file_buf = wrapWriteBufferWithCompressionMethod( std::make_unique(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT), compression_method, - compression_level + static_cast(compression_level) ); if (query_with_output->is_into_outfile_with_stdout) @@ -1602,6 +1602,8 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( if (this_query_begin >= all_queries_end) return MultiQueryProcessingStage::QUERIES_END; + unsigned max_parser_depth = static_cast(global_context->getSettingsRef().max_parser_depth); + // If there are only comments left until the end of file, we just // stop. The parser can't handle this situation because it always // expects that there is some query that it can parse. @@ -1611,7 +1613,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // and it makes more sense to treat them as such. { Tokens tokens(this_query_begin, all_queries_end); - IParser::Pos token_iterator(tokens, global_context->getSettingsRef().max_parser_depth); + IParser::Pos token_iterator(tokens, max_parser_depth); if (!token_iterator.isValid()) return MultiQueryProcessingStage::QUERIES_END; } @@ -1632,7 +1634,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( if (ignore_error) { Tokens tokens(this_query_begin, all_queries_end); - IParser::Pos token_iterator(tokens, global_context->getSettingsRef().max_parser_depth); + IParser::Pos token_iterator(tokens, max_parser_depth); while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid()) ++token_iterator; this_query_begin = token_iterator->end; @@ -1672,7 +1674,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // after we have processed the query. But even this guess is // beneficial so that we see proper trailing comments in "echo" and // server log. - adjustQueryEnd(this_query_end, all_queries_end, global_context->getSettingsRef().max_parser_depth); + adjustQueryEnd(this_query_end, all_queries_end, max_parser_depth); return MultiQueryProcessingStage::EXECUTE_QUERY; } @@ -1866,7 +1868,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) if (insert_ast && isSyncInsertWithData(*insert_ast, global_context)) { this_query_end = insert_ast->end; - adjustQueryEnd(this_query_end, all_queries_end, global_context->getSettingsRef().max_parser_depth); + adjustQueryEnd( + this_query_end, all_queries_end, + static_cast(global_context->getSettingsRef().max_parser_depth)); } // Report error. @@ -2347,7 +2351,7 @@ void ClientBase::init(int argc, char ** argv) if (options.count("print-profile-events")) config().setBool("print-profile-events", true); if (options.count("profile-events-delay-ms")) - config().setInt("profile-events-delay-ms", options["profile-events-delay-ms"].as()); + config().setUInt64("profile-events-delay-ms", options["profile-events-delay-ms"].as()); if (options.count("progress")) config().setBool("progress", true); if (options.count("echo")) diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index f1802467b57..c7392a86a7e 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -338,7 +338,7 @@ HedgedConnections::ReplicaLocation HedgedConnections::getReadyReplicaLocation(As offset_states[location.offset].replicas[location.index].change_replica_timeout.reset(); offset_states[location.offset].replicas[location.index].is_change_replica_timeout_expired = true; offset_states[location.offset].next_replica_in_process = true; - offsets_queue.push(location.offset); + offsets_queue.push(static_cast(location.offset)); ProfileEvents::increment(ProfileEvents::HedgedRequestsChangeReplica); startNewReplica(); } diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 81067f51d29..bed73b1c200 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -362,7 +362,7 @@ void HedgedConnectionsFactory::removeReplicaFromEpoll(int index, int fd) timeout_fd_to_replica_index.erase(replicas[index].change_replica_timeout.getDescriptor()); } -int HedgedConnectionsFactory::numberOfProcessingReplicas() const +size_t HedgedConnectionsFactory::numberOfProcessingReplicas() const { if (epoll.empty()) return 0; @@ -381,7 +381,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::setBestUsableReplica(C && result.is_usable && !replicas[i].is_ready && (!skip_replicas_with_two_level_aggregation_incompatibility || !isTwoLevelAggregationIncompatible(&*result.entry))) - indexes.push_back(i); + indexes.push_back(static_cast(i)); } if (indexes.empty()) diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index c5e8d493efa..194e962d549 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -70,7 +70,7 @@ public: const ConnectionTimeouts & getConnectionTimeouts() const { return timeouts; } - int numberOfProcessingReplicas() const; + size_t numberOfProcessingReplicas() const; /// Tell Factory to not return connections with two level aggregation incompatibility. void skipReplicasWithTwoLevelAggregationIncompatibility() { skip_replicas_with_two_level_aggregation_incompatibility = true; } diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 6c5f5850b92..d5cd4ef1548 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -81,9 +81,9 @@ Field QueryFuzzer::getRandomField(int type) { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) - / sizeof(*bad_int64_values))], - scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]); + bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], + static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) + ); } default: assert(false); diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index bb56baf9216..0346851ee34 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -569,8 +569,8 @@ void ColumnArray::expand(const IColumn::Filter & mask, bool inverted) if (mask.size() < offsets_data.size()) throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR); - int index = mask.size() - 1; - int from = offsets_data.size() - 1; + ssize_t index = mask.size() - 1; + ssize_t from = offsets_data.size() - 1; offsets_data.resize(mask.size()); UInt64 last_offset = offsets_data[from]; while (index >= 0) diff --git a/src/Columns/ColumnCompressed.cpp b/src/Columns/ColumnCompressed.cpp index 292c6968b86..3560e9d7e1c 100644 --- a/src/Columns/ColumnCompressed.cpp +++ b/src/Columns/ColumnCompressed.cpp @@ -27,8 +27,8 @@ std::shared_ptr> ColumnCompressed::compressBuffer(const void * data, si auto compressed_size = LZ4_compress_default( reinterpret_cast(data), compressed.data(), - data_size, - max_dest_size); + static_cast(data_size), + static_cast(max_dest_size)); if (compressed_size <= 0) throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column"); @@ -51,8 +51,8 @@ void ColumnCompressed::decompressBuffer( auto processed_size = LZ4_decompress_safe( reinterpret_cast(compressed_data), reinterpret_cast(decompressed_data), - compressed_size, - decompressed_size); + static_cast(compressed_size), + static_cast(decompressed_size)); if (processed_size <= 0) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column"); diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 7038579d436..a45d4a0b5f1 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -277,8 +277,8 @@ void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverted) if (mask.size() < size()) throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR); - int index = mask.size() - 1; - int from = size() - 1; + ssize_t index = mask.size() - 1; + ssize_t from = size() - 1; chars.resize_fill(mask.size() * n, 0); while (index >= 0) { diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 17e9bd97669..0ac5a2c31bb 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -46,7 +46,7 @@ namespace HashMap hash_map; for (auto val : index) - hash_map.insert({val, hash_map.size()}); + hash_map.insert({val, static_cast(hash_map.size())}); auto res_col = ColumnVector::create(); auto & data = res_col->getData(); @@ -632,7 +632,7 @@ void ColumnLowCardinality::Index::convertPositions() /// TODO: Optimize with SSE? for (size_t i = 0; i < size; ++i) - new_data[i] = data[i]; + new_data[i] = static_cast(data[i]); positions = std::move(new_positions); size_of_type = sizeof(IndexType); @@ -717,7 +717,7 @@ void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, U positions_data.resize(size + limit); for (UInt64 i = 0; i < limit; ++i) - positions_data[size + i] = column_data[offset + i]; + positions_data[size + i] = static_cast(column_data[offset + i]); }; callForType(std::move(copy), size_of_type); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 9c8082dcd22..982951f05b0 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -168,8 +168,8 @@ void ColumnString::expand(const IColumn::Filter & mask, bool inverted) /// We cannot change only offsets, because each string should end with terminating zero byte. /// So, we will insert one zero byte when mask value is zero. - int index = mask.size() - 1; - int from = offsets_data.size() - 1; + ssize_t index = mask.size() - 1; + ssize_t from = offsets_data.size() - 1; /// mask.size() - offsets_data.size() should be equal to the number of zeros in mask /// (if not, one of exceptions below will throw) and we can calculate the resulting chars size. UInt64 last_offset = offsets_data[from] + (mask.size() - offsets_data.size()); diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index bba10bfebf0..a1579a46ae0 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -550,7 +550,7 @@ MutableColumnPtr ColumnUnique::uniqueInsertRangeImpl( auto insert_key = [&](StringRef ref, ReverseIndex & cur_index) -> MutableColumnPtr { auto inserted_pos = cur_index.insert(ref); - positions[num_added_rows] = inserted_pos; + positions[num_added_rows] = static_cast(inserted_pos); if (inserted_pos == next_position) return update_position(next_position); @@ -562,9 +562,9 @@ MutableColumnPtr ColumnUnique::uniqueInsertRangeImpl( auto row = start + num_added_rows; if (null_map && (*null_map)[row]) - positions[num_added_rows] = getNullValueIndex(); + positions[num_added_rows] = static_cast(getNullValueIndex()); else if (column->compareAt(getNestedTypeDefaultValueIndex(), row, *src_column, 1) == 0) - positions[num_added_rows] = getNestedTypeDefaultValueIndex(); + positions[num_added_rows] = static_cast(getNestedTypeDefaultValueIndex()); else { auto ref = src_column->getDataAt(row); @@ -576,7 +576,7 @@ MutableColumnPtr ColumnUnique::uniqueInsertRangeImpl( if (insertion_point == reverse_index.lastInsertionPoint()) res = insert_key(ref, *secondary_index); else - positions[num_added_rows] = insertion_point; + positions[num_added_rows] = static_cast(insertion_point); } else res = insert_key(ref, reverse_index); diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 3120828921f..e320e1d57a3 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -22,8 +22,8 @@ void expandDataByMask(PaddedPODArray & data, const PaddedPODArray & ma if (mask.size() < data.size()) throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR); - int from = data.size() - 1; - int index = mask.size() - 1; + ssize_t from = data.size() - 1; + ssize_t index = mask.size() - 1; data.resize(mask.size()); while (index >= 0) { @@ -317,7 +317,7 @@ int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments) for (size_t i = 0; i != arguments.size(); ++i) { if (checkAndGetShortCircuitArgument(arguments[i].column)) - last_short_circuit_argument_index = i; + last_short_circuit_argument_index = static_cast(i); } return last_short_circuit_argument_index; diff --git a/src/Columns/tests/gtest_weak_hash_32.cpp b/src/Columns/tests/gtest_weak_hash_32.cpp index 2fa6c0ea8ac..8027bd4d6cc 100644 --- a/src/Columns/tests/gtest_weak_hash_32.cpp +++ b/src/Columns/tests/gtest_weak_hash_32.cpp @@ -164,7 +164,7 @@ TEST(WeakHash32, ColumnVectorU32) for (int idx [[maybe_unused]] : {1, 2}) { - for (uint64_t i = 0; i < 65536; ++i) + for (uint32_t i = 0; i < 65536; ++i) data.push_back(i << 16u); } @@ -181,7 +181,7 @@ TEST(WeakHash32, ColumnVectorI32) for (int idx [[maybe_unused]] : {1, 2}) { - for (int64_t i = -32768; i < 32768; ++i) + for (int32_t i = -32768; i < 32768; ++i) data.push_back(i << 16); //-V610 } @@ -240,7 +240,7 @@ TEST(WeakHash32, ColumnVectorU128) val.items[0] = i << 32u; val.items[1] = i << 32u; data.push_back(val); - eq_data.push_back(i); + eq_data.push_back(static_cast(i)); } } @@ -274,7 +274,7 @@ TEST(WeakHash32, ColumnDecimal32) for (int idx [[maybe_unused]] : {1, 2}) { - for (int64_t i = -32768; i < 32768; ++i) + for (int32_t i = -32768; i < 32768; ++i) data.push_back(i << 16); //-V610 } @@ -326,7 +326,7 @@ TEST(WeakHash32, ColumnString1) for (int idx [[maybe_unused]] : {1, 2}) { - for (int64_t i = 0; i < 65536; ++i) + for (int32_t i = 0; i < 65536; ++i) { data.push_back(i); auto str = std::to_string(i); @@ -359,7 +359,7 @@ TEST(WeakHash32, ColumnString2) { size_t max_size = 3000; char letter = 'a'; - for (int64_t i = 0; i < 65536; ++i) + for (int32_t i = 0; i < 65536; ++i) { data.push_back(i); size_t s = (i % max_size) + 1; @@ -401,7 +401,7 @@ TEST(WeakHash32, ColumnString3) char letter = 'a'; for (int64_t i = 0; i < 65536; ++i) { - data.push_back(i); + data.push_back(static_cast(i)); size_t s = (i % max_size) + 1; std::string str(s,'\0'); str[0] = letter; @@ -430,7 +430,7 @@ TEST(WeakHash32, ColumnFixedString) char letter = 'a'; for (int64_t i = 0; i < 65536; ++i) { - data.push_back(i); + data.push_back(static_cast(i)); size_t s = (i % max_size) + 1; std::string str(s, letter); col->insertData(str.data(), str.size()); @@ -471,7 +471,7 @@ TEST(WeakHash32, ColumnArray) UInt32 cur = 0; for (int64_t i = 0; i < 65536; ++i) { - eq_data.push_back(i); + eq_data.push_back(static_cast(i)); size_t s = (i % max_size) + 1; cur_off += s; @@ -505,9 +505,9 @@ TEST(WeakHash32, ColumnArray2) UInt64 cur_off = 0; for (int idx [[maybe_unused]] : {1, 2}) { - for (int64_t i = 0; i < 1000; ++i) + for (int32_t i = 0; i < 1000; ++i) { - for (size_t j = 0; j < 1000; ++j) + for (uint32_t j = 0; j < 1000; ++j) { eq_data.push_back(i * 1000 + j); @@ -556,7 +556,7 @@ TEST(WeakHash32, ColumnArrayArray) UInt32 cur = 1; for (int64_t i = 0; i < 3000; ++i) { - eq_data.push_back(i); + eq_data.push_back(static_cast(i)); size_t s = (i % max_size) + 1; cur_off2 += s; @@ -667,7 +667,7 @@ TEST(WeakHash32, ColumnTupleUInt64UInt64) { data1.push_back(l); data2.push_back(i << 32u); - eq.push_back(l * 65536 + i); + eq.push_back(static_cast(l * 65536 + i)); } } @@ -695,7 +695,7 @@ TEST(WeakHash32, ColumnTupleUInt64String) size_t max_size = 3000; char letter = 'a'; - for (int64_t i = 0; i < 65536; ++i) + for (int32_t i = 0; i < 65536; ++i) { data1.push_back(l); eq.push_back(l * 65536 + i); @@ -737,7 +737,7 @@ TEST(WeakHash32, ColumnTupleUInt64FixedString) for (int64_t i = 0; i < 65536; ++i) { data1.push_back(l); - eq.push_back(l * 65536 + i); + eq.push_back(static_cast(l * 65536 + i)); size_t s = (i % max_size) + 1; std::string str(s, letter); @@ -778,7 +778,7 @@ TEST(WeakHash32, ColumnTupleUInt64Array) auto l = idx % 2; UInt32 cur = 0; - for (int64_t i = 0; i < 65536; ++i) + for (int32_t i = 0; i < 65536; ++i) { data1.push_back(l); eq_data.push_back(l * 65536 + i); diff --git a/src/Common/CombinedCardinalityEstimator.h b/src/Common/CombinedCardinalityEstimator.h index 3f4b481dce9..1911cafeaa2 100644 --- a/src/Common/CombinedCardinalityEstimator.h +++ b/src/Common/CombinedCardinalityEstimator.h @@ -65,7 +65,7 @@ public: private: using Small = SmallSet; using Medium = HashContainer; - using Large = HyperLogLogCounter; + using Large = HyperLogLogCounter; public: CombinedCardinalityEstimator() diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index c4b32a3466b..6eb8b47b114 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -149,9 +149,9 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_) /// Fill lookup table for years and months. size_t year_months_lut_index = 0; - size_t first_day_of_last_month = 0; + unsigned first_day_of_last_month = 0; - for (size_t day = 0; day < DATE_LUT_SIZE; ++day) + for (unsigned day = 0; day < DATE_LUT_SIZE; ++day) { const Values & values = lut[day]; diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 2deb477ca23..3afbb6735dc 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -73,7 +73,7 @@ private: return LUTIndex(0); if (index >= DATE_LUT_SIZE) return LUTIndex(DATE_LUT_SIZE - 1); - return LUTIndex{index}; + return LUTIndex{static_cast(index)}; } template @@ -229,12 +229,12 @@ private: if (t >= lut[guess].date) { if (guess + 1 >= DATE_LUT_SIZE || t < lut[guess + 1].date) - return LUTIndex(guess); + return LUTIndex(static_cast(guess)); - return LUTIndex(guess + 1); + return LUTIndex(static_cast(guess) + 1); } - return LUTIndex(guess ? guess - 1 : 0); + return LUTIndex(guess ? static_cast(guess) - 1 : 0); } static inline LUTIndex toLUTIndex(DayNum d) @@ -272,11 +272,11 @@ private: if (likely(offset_is_whole_number_of_hours_during_epoch)) { if (likely(x >= 0)) - return x / divisor * divisor; + return static_cast(x / divisor * divisor); /// Integer division for negative numbers rounds them towards zero (up). /// We will shift the number so it will be rounded towards -inf (down). - return (x + 1 - divisor) / divisor * divisor; + return static_cast((x + 1 - divisor) / divisor * divisor); } Time date = find(x).date; @@ -285,7 +285,7 @@ private: { if (unlikely(res < 0)) return 0; - return res; + return static_cast(res); } else return res; @@ -509,7 +509,7 @@ public: if (time >= lut[index].time_at_offset_change()) time += lut[index].amount_of_offset_change(); - unsigned res = time / 3600; + unsigned res = static_cast(time / 3600); /// In case time was changed backwards at the start of next day, we will repeat the hour 23. return res <= 23 ? res : 23; @@ -548,8 +548,8 @@ public: { Time res = t % 60; if (likely(res >= 0)) - return res; - return res + 60; + return static_cast(res); + return static_cast(res) + 60; } LUTIndex index = findIndex(t); @@ -973,7 +973,7 @@ public: if constexpr (std::is_same_v) return DayNum(4 + (d - 4) / days * days); else - return ExtendedDayNum(4 + (d - 4) / days * days); + return ExtendedDayNum(static_cast(4 + (d - 4) / days * days)); } template @@ -983,9 +983,9 @@ public: if (days == 1) return toDate(d); if constexpr (std::is_same_v) - return lut_saturated[toLUTIndex(ExtendedDayNum(d / days * days))].date; + return lut_saturated[toLUTIndex(ExtendedDayNum(static_cast(d / days * days)))].date; else - return lut[toLUTIndex(ExtendedDayNum(d / days * days))].date; + return lut[toLUTIndex(ExtendedDayNum(static_cast(d / days * days)))].date; } template @@ -1034,7 +1034,7 @@ public: { if (unlikely(res < 0)) return 0; - return res; + return static_cast(res); } else return res; @@ -1047,8 +1047,8 @@ public: if (likely(offset_is_whole_number_of_minutes_during_epoch)) { if (likely(t >= 0)) - return t / divisor * divisor; - return (t + 1 - divisor) / divisor * divisor; + return static_cast(t / divisor * divisor); + return static_cast((t + 1 - divisor) / divisor * divisor); } Time date = find(t).date; @@ -1057,7 +1057,7 @@ public: { if (unlikely(res < 0)) return 0; - return res; + return static_cast(res); } else return res; @@ -1071,7 +1071,7 @@ public: if (seconds % 60 == 0) return toStartOfMinuteInterval(t, seconds / 60); - return roundDown(t, seconds); + return static_cast(roundDown(t, seconds)); } inline LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const @@ -1249,9 +1249,9 @@ public: return lut[new_index].date + time; } - inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int32 delta) const + inline NO_SANITIZE_UNDEFINED Time addWeeks(Time t, Int64 delta) const { - return addDays(t, static_cast(delta) * 7); + return addDays(t, delta * 7); } inline UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const @@ -1331,9 +1331,9 @@ public: } template - inline auto addQuarters(DateOrTime d, Int32 delta) const + inline auto addQuarters(DateOrTime d, Int64 delta) const { - return addMonths(d, static_cast(delta) * 3); + return addMonths(d, delta * 3); } template diff --git a/src/Common/Dwarf.h b/src/Common/Dwarf.h index 09178c66d47..ef6364b6b18 100644 --- a/src/Common/Dwarf.h +++ b/src/Common/Dwarf.h @@ -218,7 +218,7 @@ private: // Offset from start to first attribute uint8_t attr_offset; // Offset within debug info. - uint32_t offset; + uint64_t offset; uint64_t code; DIEAbbreviation abbr; }; @@ -252,10 +252,10 @@ private: uint8_t unit_type = DW_UT_compile; // DW_UT_compile or DW_UT_skeleton uint8_t addr_size = 0; // Offset in .debug_info of this compilation unit. - uint32_t offset = 0; - uint32_t size = 0; + uint64_t offset = 0; + uint64_t size = 0; // Offset in .debug_info for the first DIE in this compilation unit. - uint32_t first_die = 0; + uint64_t first_die = 0; uint64_t abbrev_offset = 0; // The beginning of the CU's contribution to .debug_addr diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 4d798173698..1e98b9e4102 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -48,33 +48,36 @@ inline DB::UInt64 intHash64(DB::UInt64 x) #include #endif -inline DB::UInt64 intHashCRC32(DB::UInt64 x) +/// NOTE: Intel intrinsic can be confusing. +/// - https://code.google.com/archive/p/sse-intrinsics/wikis/PmovIntrinsicBug.wiki +/// - https://stackoverflow.com/questions/15752770/mm-crc32-u64-poorly-defined +inline DB::UInt32 intHashCRC32(DB::UInt64 x) { #ifdef __SSE4_2__ - return _mm_crc32_u64(-1ULL, x); + return static_cast(_mm_crc32_u64(-1ULL, x)); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(-1U, x); + return static_cast(__crc32cd(-1U, x)); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. - return intHash64(x); + /// NOTE: consider using intHash32() + return static_cast(intHash64(x)); #endif } - -inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) +inline DB::UInt32 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) { #ifdef __SSE4_2__ - return _mm_crc32_u64(updated_value, x); + return static_cast(_mm_crc32_u64(updated_value, x)); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(updated_value, x); + return __crc32cd(static_cast(updated_value), x); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. - return intHash64(x) ^ updated_value; + return static_cast(intHash64(x) ^ updated_value); #endif } template requires (sizeof(T) > sizeof(DB::UInt64)) -inline DB::UInt64 intHashCRC32(const T & x, DB::UInt64 updated_value) +inline DB::UInt32 intHashCRC32(const T & x, DB::UInt64 updated_value) { const auto * begin = reinterpret_cast(&x); for (size_t i = 0; i < sizeof(T); i += sizeof(UInt64)) @@ -83,7 +86,7 @@ inline DB::UInt64 intHashCRC32(const T & x, DB::UInt64 updated_value) begin += sizeof(DB::UInt64); } - return updated_value; + return static_cast(updated_value); } @@ -219,7 +222,7 @@ template struct HashCRC32; template requires (sizeof(T) <= sizeof(UInt64)) -inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1) +inline UInt32 hashCRC32(T key, DB::UInt64 updated_value = -1) { union { @@ -233,7 +236,7 @@ inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1) template requires (sizeof(T) > sizeof(UInt64)) -inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1) +inline UInt32 hashCRC32(T key, DB::UInt64 updated_value = -1) { return intHashCRC32(key, updated_value); } @@ -241,7 +244,7 @@ inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1) #define DEFINE_HASH(T) \ template <> struct HashCRC32\ {\ - size_t operator() (T key) const\ + UInt32 operator() (T key) const\ {\ return hashCRC32(key);\ }\ @@ -302,8 +305,8 @@ struct UInt128HashCRC32 size_t operator()(UInt128 x) const { UInt64 crc = -1ULL; - crc = __crc32cd(crc, x.items[0]); - crc = __crc32cd(crc, x.items[1]); + crc = __crc32cd(static_cast(crc), x.items[0]); + crc = __crc32cd(static_cast(crc), x.items[1]); return crc; } }; @@ -358,10 +361,10 @@ struct UInt256HashCRC32 size_t operator()(UInt256 x) const { UInt64 crc = -1ULL; - crc = __crc32cd(crc, x.items[0]); - crc = __crc32cd(crc, x.items[1]); - crc = __crc32cd(crc, x.items[2]); - crc = __crc32cd(crc, x.items[3]); + crc = __crc32cd(static_cast(crc), x.items[0]); + crc = __crc32cd(static_cast(crc), x.items[1]); + crc = __crc32cd(static_cast(crc), x.items[2]); + crc = __crc32cd(static_cast(crc), x.items[3]); return crc; } }; @@ -423,7 +426,7 @@ inline DB::UInt32 intHash32(DB::UInt64 key) key = key + (key << 6); key = key ^ ((key >> 22) | (key << 42)); - return key; + return static_cast(key); } diff --git a/src/Common/HashTable/TwoLevelHashTable.h b/src/Common/HashTable/TwoLevelHashTable.h index b8d5eedd430..5acc8b19195 100644 --- a/src/Common/HashTable/TwoLevelHashTable.h +++ b/src/Common/HashTable/TwoLevelHashTable.h @@ -44,8 +44,8 @@ protected: public: using Impl = ImplTable; - static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; - static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + static constexpr UInt32 NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; + static constexpr UInt32 MAX_BUCKET = NUM_BUCKETS - 1; size_t hash(const Key & x) const { return Hash::operator()(x); } @@ -286,13 +286,13 @@ public: void write(DB::WriteBuffer & wb) const { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) impls[i].write(wb); } void writeText(DB::WriteBuffer & wb) const { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { if (i != 0) DB::writeChar(',', wb); @@ -302,13 +302,13 @@ public: void read(DB::ReadBuffer & rb) { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) impls[i].read(rb); } void readText(DB::ReadBuffer & rb) { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { if (i != 0) DB::assertChar(',', rb); @@ -320,7 +320,7 @@ public: size_t size() const { size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) res += impls[i].size(); return res; @@ -328,7 +328,7 @@ public: bool empty() const { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) if (!impls[i].empty()) return false; @@ -338,7 +338,7 @@ public: size_t getBufferSizeInBytes() const { size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) res += impls[i].getBufferSizeInBytes(); return res; diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index 77acca5e707..ea1914348b2 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -13,8 +13,8 @@ public: using Key = StringRef; using Impl = ImplTable; - static constexpr size_t NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; - static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1; + static constexpr UInt32 NUM_BUCKETS = 1ULL << BITS_FOR_BUCKET; + static constexpr UInt32 MAX_BUCKET = NUM_BUCKETS - 1; // TODO: currently hashing contains redundant computations when doing distributed or external aggregations size_t hash(const Key & x) const @@ -175,13 +175,13 @@ public: void write(DB::WriteBuffer & wb) const { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) impls[i].write(wb); } void writeText(DB::WriteBuffer & wb) const { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { if (i != 0) DB::writeChar(',', wb); @@ -191,13 +191,13 @@ public: void read(DB::ReadBuffer & rb) { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) impls[i].read(rb); } void readText(DB::ReadBuffer & rb) { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) { if (i != 0) DB::assertChar(',', rb); @@ -208,7 +208,7 @@ public: size_t size() const { size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) res += impls[i].size(); return res; @@ -216,7 +216,7 @@ public: bool empty() const { - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) if (!impls[i].empty()) return false; @@ -226,7 +226,7 @@ public: size_t getBufferSizeInBytes() const { size_t res = 0; - for (size_t i = 0; i < NUM_BUCKETS; ++i) + for (UInt32 i = 0; i < NUM_BUCKETS; ++i) res += impls[i].getBufferSizeInBytes(); return res; diff --git a/src/Common/HyperLogLogCounter.h b/src/Common/HyperLogLogCounter.h index 36db00a5982..32c04d85d57 100644 --- a/src/Common/HyperLogLogCounter.h +++ b/src/Common/HyperLogLogCounter.h @@ -264,7 +264,8 @@ enum class HyperLogLogMode /// of Algorithms). template < UInt8 precision, - typename Hash = IntHash32, + typename Key = UInt64, + typename Hash = IntHash32, typename HashValueType = UInt32, typename DenominatorType = double, typename BiasEstimator = TrivialBiasEstimator, @@ -409,7 +410,9 @@ private: inline HashValueType getHash(Value key) const { - return Hash::operator()(key); + /// NOTE: this should be OK, since value is the same as key for HLL. + return static_cast( + Hash::operator()(static_cast(key))); } /// Update maximum rank for current bucket. @@ -532,6 +535,7 @@ private: template < UInt8 precision, + typename Key, typename Hash, typename HashValueType, typename DenominatorType, @@ -542,6 +546,7 @@ template details::LogLUT HyperLogLogCounter < precision, + Key, Hash, HashValueType, DenominatorType, @@ -555,6 +560,7 @@ details::LogLUT HyperLogLogCounter /// Serialization format must not be changed. using HLL12 = HyperLogLogCounter< 12, + UInt64, IntHash32, UInt32, double, diff --git a/src/Common/HyperLogLogWithSmallSetOptimization.h b/src/Common/HyperLogLogWithSmallSetOptimization.h index 39c00660ebe..1d2408186de 100644 --- a/src/Common/HyperLogLogWithSmallSetOptimization.h +++ b/src/Common/HyperLogLogWithSmallSetOptimization.h @@ -26,7 +26,7 @@ class HyperLogLogWithSmallSetOptimization : private boost::noncopyable { private: using Small = SmallSet; - using Large = HyperLogLogCounter; + using Large = HyperLogLogCounter; using LargeValueType = typename Large::value_type; Small small; diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 60efab69433..e95bc42a1ea 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -506,8 +506,16 @@ unsigned OptimizedRegularExpressionImpl::match(const char * subject DB::PODArrayWithStackMemory pieces(limit); - if (!re2->Match(StringPieceType(subject, subject_size), 0, subject_size, RegexType::UNANCHORED, pieces.data(), pieces.size())) + if (!re2->Match( + StringPieceType(subject, subject_size), + 0, + subject_size, + RegexType::UNANCHORED, + pieces.data(), + static_cast(pieces.size()))) + { return 0; + } else { matches.resize(limit); diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 42b5b3d0990..f26648bd213 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -339,7 +339,7 @@ struct PoolWithFailoverBase::PoolState Int64 config_priority = 1; /// Priority from the GetPriorityFunc. Int64 priority = 0; - UInt32 random = 0; + UInt64 random = 0; void randomize() { diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 7266b9b9553..b50e0c0ab49 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -132,11 +132,11 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t sev.sigev_signo = pause_signal; #if defined(OS_FREEBSD) - sev._sigev_un._threadid = thread_id; + sev._sigev_un._threadid = static_cast(thread_id); #elif defined(USE_MUSL) - sev.sigev_notify_thread_id = thread_id; + sev.sigev_notify_thread_id = static_cast(thread_id); #else - sev._sigev_un._tid = thread_id; + sev._sigev_un._tid = static_cast(thread_id); #endif timer_t local_timer_id; if (timer_create(clock_type, &sev, &local_timer_id)) diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index 9ca43bee30c..739bec8d9dd 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -273,13 +273,13 @@ private: { /// Replace the histograms with the accumulated sums: the value in position i is the sum of the previous positions minus one. - size_t sums[NUM_PASSES] = {0}; + CountType sums[NUM_PASSES] = {0}; for (size_t i = 0; i < HISTOGRAM_SIZE; ++i) { for (size_t pass = 0; pass < NUM_PASSES; ++pass) { - size_t tmp = histograms[pass * HISTOGRAM_SIZE + i] + sums[pass]; + CountType tmp = histograms[pass * HISTOGRAM_SIZE + i] + sums[pass]; histograms[pass * HISTOGRAM_SIZE + i] = sums[pass] - 1; sums[pass] = tmp; } diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index b8f8a9d3a88..048e7a1f34c 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -40,7 +40,7 @@ public: #ifdef __SSE2__ protected: static constexpr auto n = sizeof(__m128i); - const int page_size = ::getPageSize(); + const Int64 page_size = ::getPageSize(); bool pageSafe(const void * const ptr) const { diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 0b89139fa53..b70b1fc5e60 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -87,7 +87,7 @@ void ThreadPoolImpl::setQueueSize(size_t value) template template -ReturnType ThreadPoolImpl::scheduleImpl(Job job, int priority, std::optional wait_microseconds, bool propagate_opentelemetry_tracing_context) +ReturnType ThreadPoolImpl::scheduleImpl(Job job, ssize_t priority, std::optional wait_microseconds, bool propagate_opentelemetry_tracing_context) { auto on_error = [&](const std::string & reason) { @@ -163,19 +163,19 @@ ReturnType ThreadPoolImpl::scheduleImpl(Job job, int priority, std::opti } template -void ThreadPoolImpl::scheduleOrThrowOnError(Job job, int priority) +void ThreadPoolImpl::scheduleOrThrowOnError(Job job, ssize_t priority) { scheduleImpl(std::move(job), priority, std::nullopt); } template -bool ThreadPoolImpl::trySchedule(Job job, int priority, uint64_t wait_microseconds) noexcept +bool ThreadPoolImpl::trySchedule(Job job, ssize_t priority, uint64_t wait_microseconds) noexcept { return scheduleImpl(std::move(job), priority, wait_microseconds); } template -void ThreadPoolImpl::scheduleOrThrow(Job job, int priority, uint64_t wait_microseconds, bool propagate_opentelemetry_tracing_context) +void ThreadPoolImpl::scheduleOrThrow(Job job, ssize_t priority, uint64_t wait_microseconds, bool propagate_opentelemetry_tracing_context) { scheduleImpl(std::move(job), priority, wait_microseconds, propagate_opentelemetry_tracing_context); } diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 76ada9e0d75..6742a554a85 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -50,13 +50,13 @@ public: /// NOTE: Probably you should call wait() if exception was thrown. If some previously scheduled jobs are using some objects, /// located on stack of current thread, the stack must not be unwinded until all jobs finished. However, /// if ThreadPool is a local object, it will wait for all scheduled jobs in own destructor. - void scheduleOrThrowOnError(Job job, int priority = 0); + void scheduleOrThrowOnError(Job job, ssize_t priority = 0); /// Similar to scheduleOrThrowOnError(...). Wait for specified amount of time and schedule a job or return false. - bool trySchedule(Job job, int priority = 0, uint64_t wait_microseconds = 0) noexcept; + bool trySchedule(Job job, ssize_t priority = 0, uint64_t wait_microseconds = 0) noexcept; /// Similar to scheduleOrThrowOnError(...). Wait for specified amount of time and schedule a job or throw an exception. - void scheduleOrThrow(Job job, int priority = 0, uint64_t wait_microseconds = 0, bool propagate_opentelemetry_tracing_context = true); + void scheduleOrThrow(Job job, ssize_t priority = 0, uint64_t wait_microseconds = 0, bool propagate_opentelemetry_tracing_context = true); /// Wait for all currently active jobs to be done. /// You may call schedule and wait many times in arbitrary order. @@ -96,10 +96,10 @@ private: struct JobWithPriority { Job job; - int priority; + ssize_t priority; DB::OpenTelemetry::TracingContextOnThread thread_trace_context; - JobWithPriority(Job job_, int priority_, const DB::OpenTelemetry::TracingContextOnThread& thread_trace_context_) + JobWithPriority(Job job_, ssize_t priority_, const DB::OpenTelemetry::TracingContextOnThread& thread_trace_context_) : job(job_), priority(priority_), thread_trace_context(thread_trace_context_) {} bool operator< (const JobWithPriority & rhs) const @@ -113,7 +113,7 @@ private: std::exception_ptr first_exception; template - ReturnType scheduleImpl(Job job, int priority, std::optional wait_microseconds, bool propagate_opentelemetry_tracing_context = true); + ReturnType scheduleImpl(Job job, ssize_t priority, std::optional wait_microseconds, bool propagate_opentelemetry_tracing_context = true); void worker(typename std::list::iterator thread_it); diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index baa77468a13..76a4d8b1adf 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -121,7 +121,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p stats_getter = [metrics_provider = std::make_shared(), tid]() { ::taskstats result{}; - metrics_provider->getStat(result, tid); + metrics_provider->getStat(result, static_cast(tid)); return result; }; break; @@ -526,7 +526,7 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile continue; constexpr ssize_t bytes_to_read = sizeof(current_values[0]); - const int bytes_read = read(fd, ¤t_values[i], bytes_to_read); + const ssize_t bytes_read = read(fd, ¤t_values[i], bytes_to_read); if (bytes_read != bytes_to_read) { diff --git a/src/Common/UTF8Helpers.h b/src/Common/UTF8Helpers.h index ce90af3d5ce..623a62a6f79 100644 --- a/src/Common/UTF8Helpers.h +++ b/src/Common/UTF8Helpers.h @@ -99,7 +99,10 @@ requires (sizeof(CharT) == 1) size_t convertCodePointToUTF8(int code_point, CharT * out_bytes, size_t out_length) { static const Poco::UTF8Encoding utf8; - int res = utf8.convert(code_point, reinterpret_cast(out_bytes), out_length); + int res = utf8.convert( + code_point, + reinterpret_cast(out_bytes), + static_cast(out_length)); assert(res >= 0); return res; } @@ -109,7 +112,9 @@ requires (sizeof(CharT) == 1) std::optional convertUTF8ToCodePoint(const CharT * in_bytes, size_t in_length) { static const Poco::UTF8Encoding utf8; - int res = utf8.queryConvert(reinterpret_cast(in_bytes), in_length); + int res = utf8.queryConvert( + reinterpret_cast(in_bytes), + static_cast(in_length)); if (res >= 0) return res; diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 6f5948b6564..a27fd36f704 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -404,7 +404,8 @@ public: /// And also adding from the end guarantees that we will find first occurrence because we will lookup bigger offsets first. for (auto i = static_cast(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i) { - bool ok = VolnitskyTraits::putNGram(needle + i, i + 1, needle, needle_size, callback); + bool ok = VolnitskyTraits::putNGram( + needle + i, static_cast(i + 1), needle, needle_size, callback); /** `putNGramUTF8CaseInsensitive` does not work if characters with lower and upper cases * are represented by different number of bytes or code points. diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 098dc522eeb..134374f98d0 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -218,7 +218,7 @@ std::pair TestKeeperCreateRequest::process(TestKeeper::Contai created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1); created_node.stat.mtime = created_node.stat.ctime; created_node.stat.numChildren = 0; - created_node.stat.dataLength = data.length(); + created_node.stat.dataLength = static_cast(data.length()); created_node.data = data; created_node.is_ephemeral = is_ephemeral; created_node.is_sequental = is_sequential; diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 5098788fb2e..f7d5bccc0e0 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -45,7 +45,7 @@ struct ShuffleHost { String host; Int64 priority = 0; - UInt32 random = 0; + UInt64 random = 0; void randomize() { diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index a565a322790..f555ebb132e 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -724,7 +724,7 @@ void ZooKeeperRequest::createLogElements(LogElements & elems) const elem.has_watch = has_watch; elem.op_num = static_cast(getOpNum()); elem.path = getPath(); - elem.request_idx = elems.size() - 1; + elem.request_idx = static_cast(elems.size()) - 1; } @@ -762,7 +762,7 @@ void ZooKeeperCheckRequest::createLogElements(LogElements & elems) const void ZooKeeperMultiRequest::createLogElements(LogElements & elems) const { ZooKeeperRequest::createLogElements(elems); - elems.back().requests_size = requests.size(); + elems.back().requests_size = static_cast(requests.size()); for (const auto & request : requests) { auto & req = dynamic_cast(*request); diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index f70dac74a6a..ebab18b5ed7 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -546,7 +546,7 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data) if (read_xid != AUTH_XID) throw Exception(Error::ZMARSHALLINGERROR, "Unexpected event received in reply to auth request: {}", read_xid); - int32_t actual_length = in->count() - count_before_event; + int32_t actual_length = static_cast(in->count() - count_before_event); if (length != actual_length) throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length); @@ -821,7 +821,7 @@ void ZooKeeper::receiveEvent() } } - int32_t actual_length = in->count() - count_before_event; + int32_t actual_length = static_cast(in->count() - count_before_event); if (length != actual_length) throw Exception(Error::ZMARSHALLINGERROR, "Response length doesn't match. Expected: {}, actual: {}", length, actual_length); diff --git a/src/Common/examples/parallel_aggregation.cpp b/src/Common/examples/parallel_aggregation.cpp index f54c4cee12c..bd252b330f3 100644 --- a/src/Common/examples/parallel_aggregation.cpp +++ b/src/Common/examples/parallel_aggregation.cpp @@ -492,7 +492,7 @@ int main(int argc, char ** argv) watch.restart(); - for (size_t i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i) + for (unsigned i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i) pool.scheduleOrThrowOnError([&] { merge2(maps.data(), num_threads, i); }); pool.wait(); @@ -545,7 +545,7 @@ int main(int argc, char ** argv) watch.restart(); - for (size_t i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i) + for (unsigned i = 0; i < MapTwoLevel::NUM_BUCKETS; ++i) pool.scheduleOrThrowOnError([&] { merge2(maps.data(), num_threads, i); }); pool.wait(); diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 4c60a6ddac0..07a08dc7fbc 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -258,7 +258,7 @@ size_t getSizeFromFileDescriptor(int fd, const String & file_name) return buf.st_size; } -int getINodeNumberFromPath(const String & path) +Int64 getINodeNumberFromPath(const String & path) { struct stat file_stat; if (stat(path.data(), &file_stat)) diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index 9faaabb42f2..0e6e16941bb 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -74,7 +74,7 @@ std::optional tryGetSizeFromFilePath(const String & path); /// Get inode number for a file path. /// Will not work correctly if filesystem does not support inodes. -int getINodeNumberFromPath(const String & path); +Int64 getINodeNumberFromPath(const String & path); } diff --git a/src/Common/formatIPv6.cpp b/src/Common/formatIPv6.cpp index 2e08828f724..bc1878c0bc6 100644 --- a/src/Common/formatIPv6.cpp +++ b/src/Common/formatIPv6.cpp @@ -80,7 +80,7 @@ static void printInteger(char *& out, T value) void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_bytes_count) { - struct { int base, len; } best{-1, 0}, cur{-1, 0}; + struct { Int64 base, len; } best{-1, 0}, cur{-1, 0}; std::array words{}; /** Preprocess: @@ -122,14 +122,18 @@ void formatIPv6(const unsigned char * src, char *& dst, uint8_t zeroed_tail_byte best.base = -1; /// Format the result. - for (const int i : collections::range(0, words.size())) + for (const size_t i : collections::range(0, words.size())) { /// Are we inside the best run of 0x00's? - if (best.base != -1 && i >= best.base && i < (best.base + best.len)) + if (best.base != -1) { - if (i == best.base) - *dst++ = ':'; - continue; + size_t best_base = static_cast(best.base); + if (i >= best_base && i < (best_base + best.len)) + { + if (i == best_base) + *dst++ = ':'; + continue; + } } /// Are we following an initial run of 0x00s or any real hex? diff --git a/src/Common/getCurrentProcessFDCount.cpp b/src/Common/getCurrentProcessFDCount.cpp index 4c0f328c853..6217d92fbc1 100644 --- a/src/Common/getCurrentProcessFDCount.cpp +++ b/src/Common/getCurrentProcessFDCount.cpp @@ -7,10 +7,10 @@ #include -int getCurrentProcessFDCount() +Int64 getCurrentProcessFDCount() { namespace fs = std::filesystem; - int result = -1; + Int64 result = -1; #if defined(OS_LINUX) || defined(OS_DARWIN) using namespace DB; diff --git a/src/Common/getCurrentProcessFDCount.h b/src/Common/getCurrentProcessFDCount.h index 583b99f6c13..f6273580df7 100644 --- a/src/Common/getCurrentProcessFDCount.h +++ b/src/Common/getCurrentProcessFDCount.h @@ -1,5 +1,7 @@ #pragma once +#include + /// Get current process file descriptor count /// @return -1 os doesn't support "lsof" command or some error occurs. -int getCurrentProcessFDCount(); +Int64 getCurrentProcessFDCount(); diff --git a/src/Common/intExp.h b/src/Common/intExp.h index 3529990ef3b..4d39a56670d 100644 --- a/src/Common/intExp.h +++ b/src/Common/intExp.h @@ -232,7 +232,7 @@ template constexpr inline T intExp10OfSize(int x) { if constexpr (sizeof(T) <= 8) - return intExp10(x); + return static_cast(intExp10(x)); else if constexpr (sizeof(T) <= 16) return common::exp10_i128(x); else diff --git a/src/Common/mysqlxx/Value.cpp b/src/Common/mysqlxx/Value.cpp index 85b63b722a2..6954080f864 100644 --- a/src/Common/mysqlxx/Value.cpp +++ b/src/Common/mysqlxx/Value.cpp @@ -124,7 +124,7 @@ double Value::readFloatText(const char * buf, size_t length) const case 'E': { ++buf; - Int32 exponent = readIntText(buf, end - buf); + Int32 exponent = static_cast(readIntText(buf, end - buf)); x *= preciseExp10(exponent); if (negative) x = -x; diff --git a/src/Common/mysqlxx/mysqlxx/Row.h b/src/Common/mysqlxx/mysqlxx/Row.h index 6ef40ff2060..5690389d1cf 100644 --- a/src/Common/mysqlxx/mysqlxx/Row.h +++ b/src/Common/mysqlxx/mysqlxx/Row.h @@ -44,9 +44,9 @@ public: /** Получить значение по индекÑу. * ЗдеÑÑŒ иÑпользуетÑÑ int, а не unsigned, чтобы не было неоднозначноÑти Ñ Ñ‚ÐµÐ¼ же методом, принимающим const char *. */ - Value operator[] (int n) const + Value operator[] (size_t n) const { - if (unlikely(static_cast(n) >= res->getNumFields())) + if (unlikely(n >= res->getNumFields())) throw Exception("Index of column is out of range."); return Value(row[n], lengths[n], res); } diff --git a/src/Common/mysqlxx/mysqlxx/Value.h b/src/Common/mysqlxx/mysqlxx/Value.h index 46fcee0753f..892e5a19d93 100644 --- a/src/Common/mysqlxx/mysqlxx/Value.h +++ b/src/Common/mysqlxx/mysqlxx/Value.h @@ -242,8 +242,8 @@ template <> inline unsigned char Value::get() cons template <> inline char8_t Value::get() const { return getUInt(); } template <> inline short Value::get() const { return getInt(); } /// NOLINT template <> inline unsigned short Value::get() const { return getUInt(); } /// NOLINT -template <> inline int Value::get() const { return getInt(); } -template <> inline unsigned int Value::get() const { return getUInt(); } +template <> inline int Value::get() const { return static_cast(getInt()); } +template <> inline unsigned int Value::get() const { return static_cast(getUInt()); } template <> inline long Value::get() const { return getInt(); } /// NOLINT template <> inline unsigned long Value::get() const { return getUInt(); } /// NOLINT template <> inline long long Value::get() const { return getInt(); } /// NOLINT diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index 8e9195f9842..439852907a1 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -68,14 +68,14 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob output_width = std::max(range_begin_width, range_end_width); if (leading_zeros) - oss_for_replacing << std::setfill('0') << std::setw(output_width); + oss_for_replacing << std::setfill('0') << std::setw(static_cast(output_width)); oss_for_replacing << range_begin; for (size_t i = range_begin + 1; i <= range_end; ++i) { oss_for_replacing << '|'; if (leading_zeros) - oss_for_replacing << std::setfill('0') << std::setw(output_width); + oss_for_replacing << std::setfill('0') << std::setw(static_cast(output_width)); oss_for_replacing << i; } } diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp index 1f614945491..c8434b0993e 100644 --- a/src/Common/parseRemoteDescription.cpp +++ b/src/Common/parseRemoteDescription.cpp @@ -82,8 +82,8 @@ std::vector parseRemoteDescription(const String & description, size_t l, /// Either the numeric interval (8..10) or equivalent expression in brackets if (description[i] == '{') { - int cnt = 1; - int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two + ssize_t cnt = 1; + ssize_t last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two size_t m; std::vector buffer; bool have_splitter = false; diff --git a/src/Common/safe_cast.h b/src/Common/safe_cast.h new file mode 100644 index 00000000000..133808ca259 --- /dev/null +++ b/src/Common/safe_cast.h @@ -0,0 +1,22 @@ +#pragma once + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +template +To safe_cast(From from) +{ + constexpr auto max = std::numeric_limits::max(); + if (from > max) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Overflow ({} > {})", from, max); + return static_cast(from); +} + +} diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index aca17ae4f93..6ae934b2296 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -58,7 +58,7 @@ FailuresCount countFailures(const ::testing::TestResult & test_result) const size_t count = test_result.total_part_count(); for (size_t i = 0; i < count; ++i) { - const auto & part = test_result.GetTestPartResult(i); + const auto & part = test_result.GetTestPartResult(static_cast(i)); if (part.nonfatally_failed()) { ++failures.non_fatal; diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp index b06ee5a666e..fd0b2495fde 100644 --- a/src/Common/tests/gtest_hash_table.cpp +++ b/src/Common/tests/gtest_hash_table.cpp @@ -216,27 +216,27 @@ TEST(HashTable, Erase) using Cont = HashSet, HashTableGrowerWithPrecalculation<1>>; Cont cont; - for (size_t i = 0; i < 5000; ++i) + for (int i = 0; i < 5000; ++i) { cont.insert(i); } - for (size_t i = 0; i < 2500; ++i) + for (int i = 0; i < 2500; ++i) { cont.erase(i); } - for (size_t i = 5000; i < 10000; ++i) + for (int i = 5000; i < 10000; ++i) { cont.insert(i); } - for (size_t i = 5000; i < 10000; ++i) + for (int i = 5000; i < 10000; ++i) { cont.erase(i); } - for (size_t i = 2500; i < 5000; ++i) + for (int i = 2500; i < 5000; ++i) { cont.erase(i); } diff --git a/src/Common/tests/gtest_lru_hash_map.cpp b/src/Common/tests/gtest_lru_hash_map.cpp index 562ee667b7b..f45a503be43 100644 --- a/src/Common/tests/gtest_lru_hash_map.cpp +++ b/src/Common/tests/gtest_lru_hash_map.cpp @@ -26,7 +26,7 @@ void testInsert(size_t elements_to_insert_size, size_t map_size) std::vector expected; for (size_t i = 0; i < elements_to_insert_size; ++i) - map.insert(i, i); + map.insert(static_cast(i), static_cast(i)); for (size_t i = elements_to_insert_size - map_size; i < elements_to_insert_size; ++i) expected.emplace_back(i); diff --git a/src/Common/waitForPid.cpp b/src/Common/waitForPid.cpp index 73d88c68adb..2cf80de644d 100644 --- a/src/Common/waitForPid.cpp +++ b/src/Common/waitForPid.cpp @@ -54,7 +54,7 @@ namespace DB static int syscall_pidfd_open(pid_t pid) { - return syscall(SYS_pidfd_open, pid, 0); + return static_cast(syscall(SYS_pidfd_open, pid, 0)); } static bool supportsPidFdOpen() @@ -170,7 +170,8 @@ bool waitForPid(pid_t pid, size_t timeout_in_seconds) /// If timeout is positive try waitpid without block in loop until /// process is normally terminated or waitpid return error - int timeout_in_ms = timeout_in_seconds * 1000; + /// NOTE: timeout casted to int, since poll() accept int for timeout + int timeout_in_ms = static_cast(timeout_in_seconds * 1000); while (timeout_in_ms > 0) { int waitpid_res = HANDLE_EINTR(waitpid(pid, &status, WNOHANG)); diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index 9101caf568e..0492b7faec5 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -279,7 +279,7 @@ static void readHeaderAndGetCodec(const char * compressed_buffer, size_t size_de void CompressedReadBufferBase::decompressTo(char * to, size_t size_decompressed, size_t size_compressed_without_checksum) { readHeaderAndGetCodec(compressed_buffer, size_decompressed, codec, allow_different_codecs); - codec->decompress(compressed_buffer, size_compressed_without_checksum, to); + codec->decompress(compressed_buffer, static_cast(size_compressed_without_checksum), to); } void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_decompressed, size_t size_compressed_without_checksum) @@ -300,7 +300,7 @@ void CompressedReadBufferBase::decompress(BufferBase::Buffer & to, size_t size_d to = BufferBase::Buffer(compressed_buffer + header_size, compressed_buffer + size_compressed_without_checksum); } else - codec->decompress(compressed_buffer, size_compressed_without_checksum, to.begin()); + codec->decompress(compressed_buffer, static_cast(size_compressed_without_checksum), to.begin()); } void CompressedReadBufferBase::flushAsynchronousDecompressRequests() const diff --git a/src/Compression/CompressedWriteBuffer.cpp b/src/Compression/CompressedWriteBuffer.cpp index 6c1dbd9e00c..82beeea37cd 100644 --- a/src/Compression/CompressedWriteBuffer.cpp +++ b/src/Compression/CompressedWriteBuffer.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include "CompressedWriteBuffer.h" @@ -22,7 +23,8 @@ void CompressedWriteBuffer::nextImpl() if (!offset()) return; - size_t decompressed_size = offset(); + chassert(offset() <= INT_MAX); + UInt32 decompressed_size = static_cast(offset()); UInt32 compressed_reserve_size = codec->getCompressedReserveSize(decompressed_size); /** During compression we need buffer with capacity >= compressed_reserve_size + CHECKSUM_SIZE. diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index 816f242672a..c1278cb88de 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -353,7 +353,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest) writer.flush(); - return (dest - dest_start) + (writer.count() + 7) / 8; + return static_cast((dest - dest_start) + (writer.count() + 7) / 8); } template @@ -414,7 +414,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest, if (write_spec.data_bits != 0) { const UInt8 sign = reader.readBit(); - double_delta = reader.readBits(write_spec.data_bits - 1) + 1; + double_delta = static_cast(reader.readBits(write_spec.data_bits - 1) + 1); if (sign) { /// It's well defined for unsigned data types. diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index a9550c9e28d..02b640ae402 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -7,6 +7,7 @@ #include #include #include +#include // This depends on BoringSSL-specific API, notably . #if USE_SSL @@ -480,7 +481,8 @@ UInt32 CompressionCodecEncrypted::doCompressData(const char * source, UInt32 sou if (out_len != source_size + tag_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't encrypt data, length after encryption {} is wrong, expected {}", out_len, source_size + tag_size); - return out_len + keyid_size + nonce_size; + size_t out_size = out_len + keyid_size + nonce_size; + return safe_cast(out_size); } void CompressionCodecEncrypted::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp index 247eb73b65b..48eba210b60 100644 --- a/src/Compression/CompressionCodecFPC.cpp +++ b/src/Compression/CompressionCodecFPC.cpp @@ -453,9 +453,9 @@ UInt32 CompressionCodecFPC::doCompressData(const char * source, UInt32 source_si switch (float_width) { case sizeof(Float64): - return HEADER_SIZE + FPCOperation(destination, level).encode(src); + return static_cast(HEADER_SIZE + FPCOperation(destination, level).encode(src)); case sizeof(Float32): - return HEADER_SIZE + FPCOperation(destination, level).encode(src); + return static_cast(HEADER_SIZE + FPCOperation(destination, level).encode(src)); default: break; } diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp index 0ca3e5660e0..0da6ff46dbc 100644 --- a/src/Compression/CompressionCodecGorilla.cpp +++ b/src/Compression/CompressionCodecGorilla.cpp @@ -259,7 +259,7 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest, writer.flush(); - return (dest - dest_start) + (writer.count() + 7) / 8; + return static_cast((dest - dest_start) + (writer.count() + 7) / 8); } template @@ -320,7 +320,7 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest) ErrorCodes::CANNOT_DECOMPRESS); } - xored_data = reader.readBits(curr_xored_info.data_bits); + xored_data = static_cast(reader.readBits(curr_xored_info.data_bits)); xored_data <<= curr_xored_info.trailing_zero_bits; curr_value = prev_value ^ xored_data; } diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp index 12f138dc95a..5b93e7ef60f 100644 --- a/src/Compression/CompressionCodecLZ4.cpp +++ b/src/Compression/CompressionCodecLZ4.cpp @@ -134,7 +134,7 @@ void registerCodecLZ4HC(CompressionCodecFactory & factory) if (!literal) throw Exception("LZ4HC codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER); - level = literal->value.safeGet(); + level = static_cast(literal->value.safeGet()); } return std::make_shared(level); diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp index 8ad054673e1..628c2d97d86 100644 --- a/src/Compression/CompressionCodecMultiple.cpp +++ b/src/Compression/CompressionCodecMultiple.cpp @@ -48,7 +48,7 @@ UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_si compressed_size = codec->getCompressedReserveSize(compressed_size); /// TotalCodecs ByteForEachCodec data - return sizeof(UInt8) + codecs.size() + compressed_size; + return static_cast(sizeof(UInt8) + codecs.size() + compressed_size); } UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 source_size, char * dest) const @@ -73,7 +73,7 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour memcpy(&dest[1 + codecs.size()], uncompressed_buf.data(), source_size); - return 1 + codecs.size() + source_size; + return static_cast(1 + codecs.size() + source_size); } void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index bfcebad9676..cc8ce24476f 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -550,7 +550,7 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco UInt32 num_bits = getValuableBitsNumber(min, max); if (!num_bits) { - T min_value = min; + T min_value = static_cast(min); for (UInt32 i = 0; i < num_elements; ++i, dst += sizeof(T)) unalignedStore(dst, min_value); return; @@ -572,14 +572,14 @@ void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 unco T upper_max [[maybe_unused]] = 0; T sign_bit [[maybe_unused]] = 0; if (num_bits < 64) - upper_min = static_cast(min) >> num_bits << num_bits; + upper_min = static_cast(static_cast(min) >> num_bits << num_bits); if constexpr (is_signed_v) { if (min < 0 && max >= 0 && num_bits < 64) { - sign_bit = 1ull << (num_bits - 1); - upper_max = static_cast(max) >> num_bits << num_bits; + sign_bit = static_cast(1ull << (num_bits - 1)); + upper_max = static_cast(static_cast(max) >> num_bits << num_bits); } } diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp index b47c8c4b080..f1c50840e54 100644 --- a/src/Compression/CompressionCodecZSTD.cpp +++ b/src/Compression/CompressionCodecZSTD.cpp @@ -65,7 +65,7 @@ void CompressionCodecZSTD::updateHash(SipHash & hash) const UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size) const { - return ZSTD_compressBound(uncompressed_size); + return static_cast(ZSTD_compressBound(uncompressed_size)); } @@ -84,7 +84,7 @@ UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_s if (ZSTD_isError(compressed_size)) throw Exception("Cannot compress block with ZSTD: " + std::string(ZSTD_getErrorName(compressed_size)), ErrorCodes::CANNOT_COMPRESS); - return compressed_size; + return static_cast(compressed_size); } @@ -124,18 +124,20 @@ void registerCodecZSTD(CompressionCodecFactory & factory) if (!literal) throw Exception("ZSTD codec argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER); - level = literal->value.safeGet(); + level = static_cast(literal->value.safeGet()); if (level > ZSTD_maxCLevel()) - throw Exception( - "ZSTD codec can't have level more than " + toString(ZSTD_maxCLevel()) + ", given " + toString(level), - ErrorCodes::ILLEGAL_CODEC_PARAMETER); + { + throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, + "ZSTD codec can't have level more than {}, given {}", + ZSTD_maxCLevel(), level); + } if (arguments->children.size() > 1) { const auto * window_literal = children[1]->as(); if (!window_literal) throw Exception("ZSTD codec second argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER); - const int window_log = window_literal->value.safeGet(); + const int window_log = static_cast(window_literal->value.safeGet()); ZSTD_bounds window_log_bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog); if (ZSTD_isError(window_log_bounds.error)) diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 2df3edb23ad..e5e50cd5320 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -391,7 +391,7 @@ CodecTestSequence generateSeq(Generator gen, const char* gen_name, B Begin = 0, for (auto i = Begin; i < End; i += direction) { - const T v = gen(static_cast(i)); + const T v = static_cast(gen(i)); unalignedStoreLE(write_pos, v); write_pos += sizeof(v); @@ -464,13 +464,15 @@ void testTranscoding(Timer & timer, ICompressionCodec & codec, const CodecTestSe { const auto & source_data = test_sequence.serialized_data; - const UInt32 encoded_max_size = codec.getCompressedReserveSize(source_data.size()); + const UInt32 encoded_max_size = codec.getCompressedReserveSize( + static_cast(source_data.size())); PODArray encoded(encoded_max_size); timer.start(); assert(source_data.data() != nullptr); // Codec assumes that source buffer is not null. - const UInt32 encoded_size = codec.compress(source_data.data(), source_data.size(), encoded.data()); + const UInt32 encoded_size = codec.compress( + source_data.data(), static_cast(source_data.size()), encoded.data()); timer.report("encoding"); encoded.resize(encoded_size); @@ -478,7 +480,8 @@ void testTranscoding(Timer & timer, ICompressionCodec & codec, const CodecTestSe PODArray decoded(source_data.size()); timer.start(); - const UInt32 decoded_size = codec.decompress(encoded.data(), encoded.size(), decoded.data()); + const UInt32 decoded_size = codec.decompress( + encoded.data(), static_cast(encoded.size()), decoded.data()); timer.report("decoding"); decoded.resize(decoded_size); @@ -542,10 +545,12 @@ TEST_P(CodecTestCompatibility, Encoding) const auto & source_data = data_sequence.serialized_data; // Just encode the data with codec - const UInt32 encoded_max_size = codec->getCompressedReserveSize(source_data.size()); + const UInt32 encoded_max_size = codec->getCompressedReserveSize( + static_cast(source_data.size())); PODArray encoded(encoded_max_size); - const UInt32 encoded_size = codec->compress(source_data.data(), source_data.size(), encoded.data()); + const UInt32 encoded_size = codec->compress( + source_data.data(), static_cast(source_data.size()), encoded.data()); encoded.resize(encoded_size); SCOPED_TRACE(::testing::Message("encoded: ") << AsHexString(encoded)); @@ -560,7 +565,8 @@ TEST_P(CodecTestCompatibility, Decoding) const auto codec = makeCodec(codec_spec.codec_statement, expected.data_type); PODArray decoded(expected.serialized_data.size()); - const UInt32 decoded_size = codec->decompress(encoded_data.c_str(), encoded_data.size(), decoded.data()); + const UInt32 decoded_size = codec->decompress( + encoded_data.c_str(), static_cast(encoded_data.size()), decoded.data()); decoded.resize(decoded_size); ASSERT_TRUE(EqualByteContainers(expected.data_type->getSizeOfValueInMemory(), expected.serialized_data, decoded)); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 1c8959379da..a097cb57bc6 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -284,8 +284,9 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co params.client_req_timeout_ = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds(), "operation_timeout_ms", log); params.auto_forwarding_ = coordination_settings->auto_forwarding; - params.auto_forwarding_req_timeout_ - = std::max(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, std::numeric_limits::max()); + params.auto_forwarding_req_timeout_ = std::max( + static_cast(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2), + std::numeric_limits::max()); params.auto_forwarding_req_timeout_ = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, "operation_timeout_ms", log); params.max_append_size_ diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index a30a32b5735..d6b75baa90e 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -922,7 +922,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr stat.version = 0; stat.aversion = 0; stat.cversion = 0; - stat.dataLength = request.data.length(); + stat.dataLength = static_cast(request.data.length()); stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; new_deltas.emplace_back( @@ -1222,7 +1222,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce value.stat.version++; value.stat.mzxid = zxid; value.stat.mtime = time; - value.stat.dataLength = data.length(); + value.stat.dataLength = static_cast(data.length()); value.setData(data); }, request.version}); diff --git a/src/Coordination/SummingStateMachine.cpp b/src/Coordination/SummingStateMachine.cpp index ae3d2b06d75..4151b727744 100644 --- a/src/Coordination/SummingStateMachine.cpp +++ b/src/Coordination/SummingStateMachine.cpp @@ -71,10 +71,10 @@ void SummingStateMachine::createSnapshotInternal(nuraft::snapshot & s) snapshots[s.get_last_log_idx()] = ctx; // Maintain last 3 snapshots only. - int num = snapshots.size(); + ssize_t num = snapshots.size(); auto entry = snapshots.begin(); - for (int ii = 0; ii < num - MAX_SNAPSHOTS; ++ii) + for (ssize_t ii = 0; ii < num - MAX_SNAPSHOTS; ++ii) { if (entry == snapshots.end()) break; diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 6702c4cc718..82fce5297a1 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -119,7 +119,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L Coordination::read(node.stat.pzxid, in); if (!path.empty()) { - node.stat.dataLength = node.getData().length(); + node.stat.dataLength = static_cast(node.getData().length()); node.seq_num = node.stat.cversion; storage.container.insertOrReplace(path, node); diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index b1d27d4541d..fa4c42dd82a 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -941,7 +941,7 @@ TEST_P(CoordinationTest, SnapshotableHashMapTrySnapshot) EXPECT_EQ(itr->active_in_map, true); itr = std::next(itr); EXPECT_EQ(itr, map_snp.end()); - for (size_t i = 0; i < 5; ++i) + for (int i = 0; i < 5; ++i) { EXPECT_TRUE(map_snp.insert("/hello" + std::to_string(i), i).second); } @@ -1982,7 +1982,7 @@ TEST_P(CoordinationTest, TestListRequestTypes) KeeperStorage storage{500, "", keeper_context}; - int64_t zxid = 0; + int32_t zxid = 0; static constexpr std::string_view test_path = "/list_request_type/node"; diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h index 530722a2519..953c98f80b4 100644 --- a/src/Core/DecimalComparison.h +++ b/src/Core/DecimalComparison.h @@ -230,7 +230,7 @@ private: if constexpr (is_decimal) y = b.value; else - y = b; + y = static_cast(b); if constexpr (_check_overflow) { diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index 0f2158fb83b..263f78af5cc 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -241,7 +241,7 @@ inline DecimalComponents split(const DecimalType & decimal, UInt32 * If scale is to big, result is undefined. */ template -inline typename DecimalType::NativeType getWholePart(const DecimalType & decimal, size_t scale) +inline typename DecimalType::NativeType getWholePart(const DecimalType & decimal, UInt32 scale) { if (scale == 0) return decimal.value; @@ -273,7 +273,7 @@ inline typename DecimalType::NativeType getFractionalPartWithScaleMultiplier( * If scale is to big, result is undefined. */ template -inline typename DecimalType::NativeType getFractionalPart(const DecimalType & decimal, size_t scale) +inline typename DecimalType::NativeType getFractionalPart(const DecimalType & decimal, UInt32 scale) { if (scale == 0) return 0; @@ -283,7 +283,7 @@ inline typename DecimalType::NativeType getFractionalPart(const DecimalType & de /// Decimal to integer/float conversion template -ReturnType convertToImpl(const DecimalType & decimal, size_t scale, To & result) +ReturnType convertToImpl(const DecimalType & decimal, UInt32 scale, To & result) { using DecimalNativeType = typename DecimalType::NativeType; static constexpr bool throw_exception = std::is_void_v; @@ -334,7 +334,7 @@ ReturnType convertToImpl(const DecimalType & decimal, size_t scale, To & result) template -To convertTo(const DecimalType & decimal, size_t scale) +To convertTo(const DecimalType & decimal, UInt32 scale) { To result; convertToImpl(decimal, scale, result); @@ -342,7 +342,7 @@ To convertTo(const DecimalType & decimal, size_t scale) } template -bool tryConvertTo(const DecimalType & decimal, size_t scale, To & result) +bool tryConvertTo(const DecimalType & decimal, UInt32 scale, To & result) { return convertToImpl(decimal, scale, result); } diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 87566eade54..6f3ac1b40e9 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -161,7 +161,7 @@ namespace MySQLReplication /// https://dev.mysql.com/doc/internals/en/table-map-event.html void TableMapEvent::parseImpl(ReadBuffer & payload) { - column_count = readLengthEncodedNumber(payload); + column_count = static_cast(readLengthEncodedNumber(payload)); for (auto i = 0U; i < column_count; ++i) { UInt8 v = 0x00; @@ -283,7 +283,7 @@ namespace MySQLReplication void RowsEvent::parseImpl(ReadBuffer & payload) { - number_columns = readLengthEncodedNumber(payload); + number_columns = static_cast(readLengthEncodedNumber(payload)); size_t columns_bitmap_size = (number_columns + 7) / 8; switch (header.type) { @@ -494,7 +494,7 @@ namespace MySQLReplication readBigEndianStrict(payload, reinterpret_cast(&uintpart), 6); intpart = uintpart - 0x800000000000L; ltime = intpart; - frac = std::abs(intpart % (1L << 24)); + frac = static_cast(std::abs(intpart % (1L << 24))); break; } default: @@ -536,7 +536,7 @@ namespace MySQLReplication readBigEndianStrict(payload, reinterpret_cast(&val), 5); readTimeFractionalPart(payload, fsp, meta); - UInt32 year_month = readBits(val, 1, 17, 40); + UInt32 year_month = static_cast(readBits(val, 1, 17, 40)); time_t date_time = DateLUT::instance().makeDateTime( year_month / 13, year_month % 13, readBits(val, 18, 5, 40) , readBits(val, 23, 5, 40), readBits(val, 28, 6, 40), readBits(val, 34, 6, 40) @@ -625,7 +625,7 @@ namespace MySQLReplication { UInt32 val = 0; readBigEndianStrict(payload, reinterpret_cast(&val), 4); - res *= intExp10OfSize(digits_per_integer); + res *= intExp10OfSize(static_cast(digits_per_integer)); res += (val ^ mask); } } @@ -638,7 +638,7 @@ namespace MySQLReplication { UInt32 val = 0; readBigEndianStrict(payload, reinterpret_cast(&val), 4); - res *= intExp10OfSize(digits_per_integer); + res *= intExp10OfSize(static_cast(digits_per_integer)); res += (val ^ mask); } @@ -651,7 +651,7 @@ namespace MySQLReplication if (to_read) //-V547 { readBigEndianStrict(payload, reinterpret_cast(&val), to_read); - res *= intExp10OfSize(compressed_decimals); + res *= intExp10OfSize(static_cast(compressed_decimals)); res += (val ^ (mask & compressed_integer_align_numbers[compressed_decimals])); } } diff --git a/src/Core/MySQL/PacketsReplication.cpp b/src/Core/MySQL/PacketsReplication.cpp index ec5e8868cc5..74c6ca2d81f 100644 --- a/src/Core/MySQL/PacketsReplication.cpp +++ b/src/Core/MySQL/PacketsReplication.cpp @@ -57,7 +57,7 @@ void BinlogDumpGTID::writePayloadImpl(WriteBuffer & buffer) const const UInt64 position = 4; buffer.write(reinterpret_cast(&position), 8); - UInt32 gtid_size = gtid_datas.size(); + UInt32 gtid_size = static_cast(gtid_datas.size()); buffer.write(reinterpret_cast(>id_size), 4); buffer.write(gtid_datas.data(), gtid_datas.size()); } diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index 61a0a91ab2e..e5f3a0f91c3 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -100,7 +100,7 @@ void insertPostgreSQLValue( readDateTimeText(time, in, assert_cast(data_type.get())->getTimeZone()); if (time < 0) time = 0; - assert_cast(column).insertValue(time); + assert_cast(column).insertValue(static_cast(time)); break; } case ExternalResultDescription::ValueType::vtDateTime64: diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index 908518eff5e..994494fc92f 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -336,7 +336,12 @@ public: Int32 size() const override { // message length part + (1 + sizes of other fields + 1) + null byte in the end of the message - return 4 + (1 + enum_to_string[severity].size() + 1) + (1 + sql_state.size() + 1) + (1 + message.size() + 1) + 1; + return static_cast( + 4 + + (1 + enum_to_string[severity].size() + 1) + + (1 + sql_state.size() + 1) + + (1 + message.size() + 1) + + 1); } MessageType getMessageType() const override @@ -518,7 +523,7 @@ public: Int32 size() const override { - return 4 + name.size() + 1 + value.size() + 1; + return static_cast(4 + name.size() + 1 + value.size() + 1); } MessageType getMessageType() const override @@ -633,7 +638,7 @@ public: // + object ID of the table (Int32 and always zero) + attribute number of the column (Int16 and always zero) // + type object id (Int32) + data type size (Int16) // + type modifier (Int32 and always -1) + format code (Int16) - return (name.size() + 1) + 4 + 2 + 4 + 2 + 4 + 2; + return static_cast((name.size() + 1) + 4 + 2 + 4 + 2 + 4 + 2); } }; @@ -682,7 +687,7 @@ public: Int32 size() const override { - return str.size(); + return static_cast(str.size()); } }; @@ -762,7 +767,7 @@ public: Int32 size() const override { - return 4 + value.size() + 1; + return static_cast(4 + value.size() + 1); } MessageType getMessageType() const override diff --git a/src/Core/tests/gtest_DecimalFunctions.cpp b/src/Core/tests/gtest_DecimalFunctions.cpp index 1712785488e..aeaf3a1fac9 100644 --- a/src/Core/tests/gtest_DecimalFunctions.cpp +++ b/src/Core/tests/gtest_DecimalFunctions.cpp @@ -28,7 +28,7 @@ class DecimalUtilsSplitAndCombineTest : public ::testing::TestWithParam void testSplit(const DecimalUtilsSplitAndCombineTestParam & param) { - const DecimalType decimal_value = param.decimal_value; + const DecimalType decimal_value(static_cast(param.decimal_value)); const auto & actual_components = DecimalUtils::split(decimal_value, param.scale); EXPECT_EQ(param.components.whole, actual_components.whole); @@ -39,21 +39,28 @@ template void testDecimalFromComponents(const DecimalUtilsSplitAndCombineTestParam & param) { EXPECT_EQ(param.decimal_value, - DecimalUtils::decimalFromComponents(param.components.whole, param.components.fractional, param.scale)); + DecimalUtils::decimalFromComponents( + static_cast(param.components.whole), + static_cast(param.components.fractional), + param.scale)); } template void testGetWhole(const DecimalUtilsSplitAndCombineTestParam & param) { EXPECT_EQ(param.components.whole, - DecimalUtils::getWholePart(DecimalType{param.decimal_value}, param.scale)); + DecimalUtils::getWholePart( + DecimalType{static_cast(param.decimal_value)}, + param.scale)); } template void testGetFractional(const DecimalUtilsSplitAndCombineTestParam & param) { EXPECT_EQ(param.components.fractional, - DecimalUtils::getFractionalPart(DecimalType{param.decimal_value}, param.scale)); + DecimalUtils::getFractionalPart( + DecimalType{static_cast(param.decimal_value)}, + param.scale)); } // Unfortunately typed parametrized tests () are not supported in this version of gtest, so I have to emulate by hand. diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 2dddfd6874a..758f85e688f 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -575,7 +575,8 @@ void BaseDaemon::closeFDs() { int max_fd = -1; #if defined(_SC_OPEN_MAX) - max_fd = sysconf(_SC_OPEN_MAX); + // fd cannot be > INT_MAX + max_fd = static_cast(sysconf(_SC_OPEN_MAX)); if (max_fd == -1) #endif max_fd = 256; /// bad fallback diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index a6033af6fe3..bb330162818 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -189,7 +189,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta sentry_value_set_by_key(sentry_frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); if (current_frame.line.has_value()) - sentry_value_set_by_key(sentry_frame, "lineno", sentry_value_new_int32(current_frame.line.value())); + sentry_value_set_by_key(sentry_frame, "lineno", sentry_value_new_int32(static_cast(current_frame.line.value()))); sentry_value_append(sentry_frames, sentry_frame); } diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index c58e186b980..aab328eaa33 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -102,7 +102,7 @@ bool DataTypeEnum::textCanContainOnlyValidUTF8() const if (pos + length > end) return false; - if (Poco::UTF8Encoding::isLegal(reinterpret_cast(pos), length)) + if (Poco::UTF8Encoding::isLegal(reinterpret_cast(pos), static_cast(length))) pos += length; else return false; diff --git a/src/DataTypes/Native.h b/src/DataTypes/Native.h index 40086b14a0c..9782c5d64e9 100644 --- a/src/DataTypes/Native.h +++ b/src/DataTypes/Native.h @@ -224,7 +224,7 @@ static inline std::pair nativeCastToCommon(llvm::I size_t rhs_bit_width = rhs->getType()->getIntegerBitWidth() + (!rhs_is_signed && lhs_is_signed); size_t max_bit_width = std::max(lhs_bit_width, rhs_bit_width); - common = b.getIntNTy(max_bit_width); + common = b.getIntNTy(static_cast(max_bit_width)); } else { diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index fd56c1baebd..7238d3ce190 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -75,7 +75,7 @@ void SerializationDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer readText(x, istr, settings, time_zone, utc_time_zone); if (x < 0) x = 0; - assert_cast(column).getData().push_back(x); + assert_cast(column).getData().push_back(static_cast(x)); } void SerializationDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -99,7 +99,9 @@ void SerializationDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & } if (x < 0) x = 0; - assert_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. + + /// It's important to do this at the end - for exception safety. + assert_cast(column).getData().push_back(static_cast(x)); } void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -123,7 +125,7 @@ void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & i } if (x < 0) x = 0; - assert_cast(column).getData().push_back(x); + assert_cast(column).getData().push_back(static_cast(x)); } void SerializationDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const @@ -153,7 +155,7 @@ void SerializationDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & is if (x < 0) x = 0; - assert_cast(column).getData().push_back(x); + assert_cast(column).getData().push_back(static_cast(x)); } } diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/src/DataTypes/Serializations/SerializationInfoTuple.cpp index d0fa5572a48..6c326743e8a 100644 --- a/src/DataTypes/Serializations/SerializationInfoTuple.cpp +++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp @@ -124,7 +124,7 @@ void SerializationInfoTuple::fromJSON(const Poco::JSON::Object & object) "Expected: {}, got: {}", elems.size(), subcolumns->size()); for (size_t i = 0; i < elems.size(); ++i) - elems[i]->fromJSON(*subcolumns->getObject(i)); + elems[i]->fromJSON(*subcolumns->getObject(static_cast(i))); } } diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index dfe0188c8e7..761adf3b765 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -385,13 +385,13 @@ namespace } else if (map[val] == 0 && val != zero_pos_value) { - map[val] = cur_pos; + map[val] = static_cast(cur_pos); ++cur_pos; } } else { - T shifted_val = val - dict_size; + T shifted_val = static_cast(val - dict_size); if (cur_overflowed_pos == 0) { zero_pos_overflowed_value = shifted_val; @@ -399,7 +399,7 @@ namespace } else if (overflow_map[shifted_val] == 0 && shifted_val != zero_pos_overflowed_value) { - overflow_map[shifted_val] = cur_overflowed_pos; + overflow_map[shifted_val] = static_cast(cur_overflowed_pos); ++cur_overflowed_pos; } } @@ -429,7 +429,7 @@ namespace if (val < dict_size) val = map[val]; else - val = overflow_map[val - dict_size] + cur_pos; + val = overflow_map[val - dict_size] + static_cast(cur_pos); } return {std::move(dictionary_map), std::move(additional_keys_map)}; diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index fee3cf1553e..87c6055c35a 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -447,8 +447,8 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// For String and FixedString, or for different FixedStrings, the common type is String. /// No other types are compatible with Strings. TODO Enums? { - UInt32 have_string = type_ids.count(TypeIndex::String); - UInt32 have_fixed_string = type_ids.count(TypeIndex::FixedString); + size_t have_string = type_ids.count(TypeIndex::String); + size_t have_fixed_string = type_ids.count(TypeIndex::FixedString); if (have_string || have_fixed_string) { @@ -462,10 +462,10 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// For Date and DateTime/DateTime64, the common type is DateTime/DateTime64. No other types are compatible. { - UInt32 have_date = type_ids.count(TypeIndex::Date); - UInt32 have_date32 = type_ids.count(TypeIndex::Date32); - UInt32 have_datetime = type_ids.count(TypeIndex::DateTime); - UInt32 have_datetime64 = type_ids.count(TypeIndex::DateTime64); + size_t have_date = type_ids.count(TypeIndex::Date); + size_t have_date32 = type_ids.count(TypeIndex::Date32); + size_t have_datetime = type_ids.count(TypeIndex::DateTime); + size_t have_datetime64 = type_ids.count(TypeIndex::DateTime64); if (have_date || have_date32 || have_datetime || have_datetime64) { @@ -526,23 +526,21 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// Decimals { - UInt32 have_decimal32 = type_ids.count(TypeIndex::Decimal32); - UInt32 have_decimal64 = type_ids.count(TypeIndex::Decimal64); - UInt32 have_decimal128 = type_ids.count(TypeIndex::Decimal128); + size_t have_decimal32 = type_ids.count(TypeIndex::Decimal32); + size_t have_decimal64 = type_ids.count(TypeIndex::Decimal64); + size_t have_decimal128 = type_ids.count(TypeIndex::Decimal128); if (have_decimal32 || have_decimal64 || have_decimal128) { - UInt32 num_supported = have_decimal32 + have_decimal64 + have_decimal128; + size_t num_supported = have_decimal32 + have_decimal64 + have_decimal128; std::vector int_ids = {TypeIndex::Int8, TypeIndex::UInt8, TypeIndex::Int16, TypeIndex::UInt16, - TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64}; - std::vector num_ints(int_ids.size(), 0); + TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64}; TypeIndex max_int = TypeIndex::Nothing; for (size_t i = 0; i < int_ids.size(); ++i) { - UInt32 num = type_ids.count(int_ids[i]); - num_ints[i] = num; + size_t num = type_ids.count(int_ids[i]); num_supported += num; if (num) max_int = int_ids[i]; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 796142884a3..88aa086fe65 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -716,8 +716,12 @@ ASTPtr DatabaseOnDisk::getCreateQueryFromStorage(const String & table_name, cons auto ast_storage = std::make_shared(); ast_storage->set(ast_storage->engine, ast_engine); - auto create_table_query = DB::getCreateQueryFromStorage(storage, ast_storage, false, - getContext()->getSettingsRef().max_parser_depth, throw_on_error); + unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + auto create_table_query = DB::getCreateQueryFromStorage(storage, + ast_storage, + false, + max_parser_depth, + throw_on_error); create_table_query->set(create_table_query->as()->comment, std::make_shared("SYSTEM TABLE is built on the fly.")); diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 80301732ff8..5f59f6497e2 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -164,8 +164,13 @@ ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, Context std::erase_if(storage_children, [&](const ASTPtr & element) { return element.get() == ast_storage->settings; }); ast_storage->settings = nullptr; } - auto create_table_query = DB::getCreateQueryFromStorage(storage, table_storage_define, true, - getContext()->getSettingsRef().max_parser_depth, throw_on_error); + + unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + auto create_table_query = DB::getCreateQueryFromStorage(storage, + table_storage_define, + true, + max_parser_depth, + throw_on_error); return create_table_query; } diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 604dc220fed..ed9199a359f 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -478,8 +478,9 @@ static inline UInt32 randomNumber() { std::mt19937 rng; rng.seed(std::random_device()()); - std::uniform_int_distribution dist6(std::numeric_limits::min(), std::numeric_limits::max()); - return dist6(rng); + std::uniform_int_distribution dist6( + std::numeric_limits::min(), std::numeric_limits::max()); + return static_cast(dist6(rng)); } bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata) @@ -679,11 +680,11 @@ static void writeFieldsToColumn( if (write_data_to_null_map(value, index)) { if (value.getType() == Field::Types::UInt64) - casted_int32_column->insertValue(value.get()); + casted_int32_column->insertValue(static_cast(value.get())); else if (value.getType() == Field::Types::Int64) { /// For MYSQL_TYPE_INT24 - const Int32 & num = value.get(); + const Int32 & num = static_cast(value.get()); casted_int32_column->insertValue(num & 0x800000 ? num | 0xFF000000 : num); } else diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index 44a392ce1f2..83c5ebe00d3 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -192,8 +192,10 @@ ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, Contex /// Add table_name to engine arguments storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 1, std::make_shared(table_id.table_name)); + unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); auto create_table_query = DB::getCreateQueryFromStorage(storage, table_storage_define, true, - getContext()->getSettingsRef().max_parser_depth, throw_on_error); + max_parser_depth, + throw_on_error); return create_table_query; } diff --git a/src/Dictionaries/CassandraSource.cpp b/src/Dictionaries/CassandraSource.cpp index fd5982443fa..878921c53d0 100644 --- a/src/Dictionaries/CassandraSource.cpp +++ b/src/Dictionaries/CassandraSource.cpp @@ -32,7 +32,7 @@ CassandraSource::CassandraSource( , has_more_pages(cass_true) { description.init(sample_block); - cassandraCheck(cass_statement_set_paging_size(statement, max_block_size)); + cassandraCheck(cass_statement_set_paging_size(statement, static_cast(max_block_size))); } void CassandraSource::insertValue(IColumn & column, ValueType type, const CassValue * cass_value) diff --git a/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp b/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp index f91bbaa12a6..68bd6142416 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp +++ b/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.cpp @@ -31,7 +31,7 @@ bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry) UInt64 population_big = 0; DB::readIntText(population_big, *input); population = population_big > std::numeric_limits::max() ? std::numeric_limits::max() - : population_big; + : static_cast(population_big); } DB::assertChar('\n', *input); diff --git a/src/Dictionaries/RedisSource.cpp b/src/Dictionaries/RedisSource.cpp index 4208d5fa63b..50a73e52010 100644 --- a/src/Dictionaries/RedisSource.cpp +++ b/src/Dictionaries/RedisSource.cpp @@ -109,7 +109,7 @@ namespace DB readDateTimeText(time, in); if (time < 0) time = 0; - assert_cast(column).insertValue(time); + assert_cast(column).insertValue(static_cast(time)); break; } case ValueType::vtUUID: diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index 5f73352a4c9..428b4321ffd 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -675,7 +675,7 @@ public: pointers.push_back(&requests.back()); } - AIOContext aio_context(read_from_file_buffer_blocks_size); + AIOContext aio_context(static_cast(read_from_file_buffer_blocks_size)); PaddedPODArray processed(requests.size(), false); PaddedPODArray events; @@ -735,7 +735,8 @@ public: ++to_pop; /// add new io tasks - const int new_tasks_count = std::min(read_from_file_buffer_blocks_size - (to_push - to_pop), requests.size() - to_push); + const int new_tasks_count = static_cast(std::min( + read_from_file_buffer_blocks_size - (to_push - to_pop), requests.size() - to_push)); int pushed = 0; while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) <= 0) diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index ba041efe24a..899d06b4ed7 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -68,7 +68,7 @@ private: IAsynchronousReader & reader; - Int32 priority; + Int64 priority; std::shared_ptr impl; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp index 56cc20098ba..dc4898559c0 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.cpp @@ -67,14 +67,6 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) } } -void DiskObjectStorageMetadata::createFromSingleObject(const std::string & relative_path, size_t bytes_size, size_t ref_count_, bool read_only_) -{ - storage_objects.emplace_back(relative_path, bytes_size); - total_size = bytes_size; - ref_count = ref_count_; - read_only = read_only_; -} - void DiskObjectStorageMetadata::deserializeFromString(const std::string & data) { ReadBufferFromString buf(data); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h index 09e0f4ee85b..d3ea5795dd3 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageMetadata.h @@ -50,7 +50,6 @@ public: void deserialize(ReadBuffer & buf); void deserializeFromString(const std::string & data); - void createFromSingleObject(const std::string & relative_path, size_t bytes_size, size_t ref_count_, bool is_read_only_); void serialize(WriteBuffer & buf, bool sync) const; std::string serializeToString() const; diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 213f744d84f..e627a807bd8 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -482,7 +482,7 @@ void S3ObjectStorage::copyObjectMultipartImpl( part_request.SetBucket(dst_bucket); part_request.SetKey(dst_key); part_request.SetUploadId(multipart_upload_id); - part_request.SetPartNumber(part_number); + part_request.SetPartNumber(static_cast(part_number)); part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1)); auto outcome = client_ptr->UploadPartCopy(part_request); @@ -515,7 +515,7 @@ void S3ObjectStorage::copyObjectMultipartImpl( for (size_t i = 0; i < part_tags.size(); ++i) { Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1)); + multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(static_cast(i) + 1)); } req.SetMultipartUpload(multipart_upload); diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 1635cb5c552..e61987163d2 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -116,7 +116,8 @@ std::unique_ptr getClient(const Poco::Util::AbstractConfigura { S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), - context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects, + context->getRemoteHostFilter(), + static_cast(context->getGlobalContext()->getSettingsRef().s3_max_redirects), context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ true); diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp index b8702380aa7..a7ff065aca5 100644 --- a/src/Formats/CapnProtoUtils.cpp +++ b/src/Formats/CapnProtoUtils.cpp @@ -138,7 +138,7 @@ static String getCapnProtoFullTypeName(const capnp::Type & type) auto enum_schema = type.asEnum(); String enum_name = "Enum("; auto enumerants = enum_schema.getEnumerants(); - for (size_t i = 0; i != enumerants.size(); ++i) + for (unsigned i = 0; i != enumerants.size(); ++i) { enum_name += String(enumerants[i].getProto().getName()) + " = " + std::to_string(enumerants[i].getOrdinal()); if (i + 1 != enumerants.size()) diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 567a2a9ee98..2f56c4242e5 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -864,7 +864,7 @@ namespace case FieldTypeId::TYPE_ENUM: { write_function = [this](std::string_view str) { writeInt(stringToProtobufEnumValue(str)); }; - read_function = [this](PaddedPODArray & str) { protobufEnumValueToStringAppend(readInt(), str); }; + read_function = [this](PaddedPODArray & str) { protobufEnumValueToStringAppend(static_cast(readInt()), str); }; default_function = [this]() -> String { return field_descriptor.default_value_enum()->name(); }; break; } @@ -1029,7 +1029,7 @@ namespace case FieldTypeId::TYPE_ENUM: { this->write_function = [this](NumberType value) { writeInt(enumDataTypeValueToProtobufEnumValue(value)); }; - this->read_function = [this]() -> NumberType { return protobufEnumValueToEnumDataTypeValue(readInt()); }; + this->read_function = [this]() -> NumberType { return protobufEnumValueToEnumDataTypeValue(static_cast(readInt())); }; this->default_function = [this]() -> NumberType { return protobufEnumValueToEnumDataTypeValue(this->field_descriptor.default_value_enum()->number()); }; break; } @@ -1539,10 +1539,13 @@ namespace read_function = [this]() -> UInt32 { readStr(text_buffer); - return stringToDateTime(text_buffer, date_lut); + return static_cast(stringToDateTime(text_buffer, date_lut)); }; - default_function = [this]() -> UInt32 { return stringToDateTime(field_descriptor.default_value_string(), date_lut); }; + default_function = [this]() -> UInt32 + { + return static_cast(stringToDateTime(field_descriptor.default_value_string(), date_lut)); + }; break; } diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index 92f0130c19b..91b549873a3 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -15,7 +15,7 @@ struct CRCBase { for (size_t i = 0; i < 256; ++i) { - T c = i; + T c = static_cast(i); for (size_t j = 0; j < 8; ++j) c = c & 1 ? polynomial ^ (c >> 1) : c >> 1; tab[i] = c; @@ -58,7 +58,7 @@ struct CRC32ZLIBImpl static UInt32 makeCRC(const unsigned char *buf, size_t size) { - return crc32_z(0L, buf, size); + return static_cast(crc32_z(0L, buf, size)); } }; diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index b690463d456..d8745001c60 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -134,14 +134,17 @@ struct WeekTransformer void vector(const FromVectorType & vec_from, ToVectorType & vec_to, UInt8 week_mode, const DateLUTImpl & time_zone) const { + using ValueType = typename ToVectorType::value_type; size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) + { if constexpr (is_extended_result) - vec_to[i] = transform.executeExtendedResult(vec_from[i], week_mode, time_zone); + vec_to[i] = static_cast(transform.executeExtendedResult(vec_from[i], week_mode, time_zone)); else - vec_to[i] = transform.execute(vec_from[i], week_mode, time_zone); + vec_to[i] = static_cast(transform.execute(vec_from[i], week_mode, time_zone)); + } } private: diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index fa66cb2a891..b1d2979da85 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -107,11 +107,11 @@ struct ToStartOfDayImpl if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) return 0; - return time_zone.toDate(std::min(t.whole, Int64(0xffffffff))); + return static_cast(time_zone.toDate(std::min(t.whole, Int64(0xffffffff)))); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toDate(t); + return static_cast(time_zone.toDate(t)); } static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) { @@ -119,15 +119,12 @@ struct ToStartOfDayImpl return 0; auto date_time = time_zone.fromDayNum(ExtendedDayNum(d)); - if (date_time <= 0xffffffff) - return date_time; - else - return time_zone.toDate(0xffffffff); + return static_cast(date_time <= 0xffffffff ? date_time : time_zone.toDate(0xffffffff)); } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { auto date_time = time_zone.fromDayNum(ExtendedDayNum(d)); - return date_time < 0xffffffff ? date_time : time_zone.toDate(0xffffffff); + return static_cast(date_time < 0xffffffff ? date_time : time_zone.toDate(0xffffffff)); } static inline DecimalUtils::DecimalComponents executeExtendedResult(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { @@ -251,7 +248,8 @@ struct ToStartOfQuarterImpl static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone) { - return t < 0 ? 0 : time_zone.toFirstDayNumOfQuarter(ExtendedDayNum(std::min(Int64(time_zone.toDayNum(t)), Int64(DATE_LUT_MAX_DAY_NUM)))); + return t < 0 ? 0 : time_zone.toFirstDayNumOfQuarter( + ExtendedDayNum(std::min(time_zone.toDayNum(t), DATE_LUT_MAX_DAY_NUM))); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -316,11 +314,11 @@ struct ToTimeImpl static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { - return time_zone.toTime(t.whole) + 86400; + return static_cast(time_zone.toTime(t.whole) + 86400); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toTime(t) + 86400; + return static_cast(time_zone.toTime(t) + 86400); } static inline UInt32 execute(Int32, const DateLUTImpl &) { @@ -343,7 +341,7 @@ struct ToStartOfMinuteImpl if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) return 0; - return time_zone.toStartOfMinute(std::min(t.whole, Int64(0xffffffff))); + return static_cast(time_zone.toStartOfMinute(std::min(t.whole, Int64(0xffffffff)))); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -541,7 +539,7 @@ struct ToStartOfFiveMinutesImpl static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { - return time_zone.toStartOfFiveMinutes(t.whole); + return static_cast(time_zone.toStartOfFiveMinutes(t.whole)); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -573,7 +571,7 @@ struct ToStartOfTenMinutesImpl static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { - return time_zone.toStartOfTenMinutes(t.whole); + return static_cast(time_zone.toStartOfTenMinutes(t.whole)); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -605,7 +603,7 @@ struct ToStartOfFifteenMinutesImpl static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & time_zone) { - return time_zone.toStartOfFifteenMinutes(t.whole); + return static_cast(time_zone.toStartOfFifteenMinutes(t.whole)); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { @@ -638,7 +636,7 @@ struct TimeSlotImpl static inline UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl &) { - return t.whole / 1800 * 1800; + return static_cast(t.whole / 1800 * 1800); } static inline UInt32 execute(UInt32 t, const DateLUTImpl &) @@ -680,7 +678,7 @@ struct ToStartOfHourImpl if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) return 0; - return time_zone.toStartOfHour(std::min(t.whole, Int64(0xffffffff))); + return static_cast(time_zone.toStartOfHour(std::min(t.whole, Int64(0xffffffff)))); } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) @@ -1215,9 +1213,9 @@ struct ToRelativeHourNumImpl static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) - return time_zone.toStableRelativeHourNum(static_cast(t)); + return static_cast(time_zone.toStableRelativeHourNum(static_cast(t))); else - return time_zone.toRelativeHourNum(static_cast(t)); + return static_cast(time_zone.toRelativeHourNum(static_cast(t))); } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { @@ -1229,9 +1227,9 @@ struct ToRelativeHourNumImpl static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { if constexpr (precision_ == ResultPrecision::Extended) - return time_zone.toStableRelativeHourNum(DayNum(d)); + return static_cast(time_zone.toStableRelativeHourNum(DayNum(d))); else - return time_zone.toRelativeHourNum(DayNum(d)); + return static_cast(time_zone.toRelativeHourNum(DayNum(d))); } using FactorTransform = ZeroTransform; @@ -1251,7 +1249,7 @@ struct ToRelativeMinuteNumImpl } static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMinuteNum(static_cast(t)); + return static_cast(time_zone.toRelativeMinuteNum(static_cast(t))); } static inline auto execute(Int32 d, const DateLUTImpl & time_zone) { @@ -1262,7 +1260,7 @@ struct ToRelativeMinuteNumImpl } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toRelativeMinuteNum(DayNum(d)); + return static_cast(time_zone.toRelativeMinuteNum(DayNum(d))); } using FactorTransform = ZeroTransform; @@ -1290,7 +1288,7 @@ struct ToRelativeSecondNumImpl } static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.fromDayNum(DayNum(d)); + return static_cast(time_zone.fromDayNum(DayNum(d))); } using FactorTransform = ZeroTransform; @@ -1375,14 +1373,17 @@ struct Transformer template static void vector(const FromTypeVector & vec_from, ToTypeVector & vec_to, const DateLUTImpl & time_zone, const Transform & transform) { + using ValueType = typename ToTypeVector::value_type; size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) + { if constexpr (is_extended_result) - vec_to[i] = transform.executeExtendedResult(vec_from[i], time_zone); + vec_to[i] = static_cast(transform.executeExtendedResult(vec_from[i], time_zone)); else - vec_to[i] = transform.execute(vec_from[i], time_zone); + vec_to[i] = static_cast(transform.execute(vec_from[i], time_zone)); + } } }; diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index e120595c4d9..98e5c690eb9 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -163,7 +163,7 @@ struct ModuloImpl return static_cast(int_a % static_cast(int_b)); } else - return IntegerAType(a) % IntegerBType(b); + return static_cast(IntegerAType(a) % IntegerBType(b)); } } diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index baa3c65537d..4e77d085248 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -415,8 +415,8 @@ public: { for (size_t i = 0; i < size; ++i) c[i] = applyScaled( - unwrap(a, i), - unwrap(b, i), + static_cast(unwrap(a, i)), + static_cast(unwrap(b, i)), scale_a); return; } @@ -424,8 +424,8 @@ public: { for (size_t i = 0; i < size; ++i) c[i] = applyScaled( - unwrap(a, i), - unwrap(b, i), + static_cast(unwrap(a, i)), + static_cast(unwrap(b, i)), scale_b); return; } @@ -436,8 +436,8 @@ public: { for (size_t i = 0; i < size; ++i) c[i] = applyScaled( - unwrap(a, i), - unwrap(b, i), + static_cast(unwrap(a, i)), + static_cast(unwrap(b, i)), scale_a); return; } @@ -445,8 +445,8 @@ public: { for (size_t i = 0; i < size; ++i) c[i] = applyScaled( - unwrap(a, i), - unwrap(b, i), + static_cast(unwrap(a, i)), + static_cast(unwrap(b, i)), scale_b); return; } @@ -456,12 +456,20 @@ public: { processWithRightNullmapImpl(a, b, c, size, right_nullmap, [&scale_a](const auto & left, const auto & right) { - return applyScaledDiv(left, right, scale_a); + return applyScaledDiv( + static_cast(left), right, scale_a); }); return; } - processWithRightNullmapImpl(a, b, c, size, right_nullmap, [](const auto & left, const auto & right){ return apply(left, right); }); + processWithRightNullmapImpl( + a, b, c, size, right_nullmap, + [](const auto & left, const auto & right) + { + return apply( + static_cast(left), + static_cast(right)); + }); } template @@ -995,8 +1003,10 @@ class FunctionBinaryArithmetic : public IFunction /// non-vector result if (col_left_const && col_right_const) { - const NativeResultType const_a = helperGetOrConvert(col_left_const, left); - const NativeResultType const_b = helperGetOrConvert(col_right_const, right); + const NativeResultType const_a = static_cast( + helperGetOrConvert(col_left_const, left)); + const NativeResultType const_b = static_cast( + helperGetOrConvert(col_right_const, right)); ResultType res = {}; if (!right_nullmap || !(*right_nullmap)[0]) @@ -1020,14 +1030,16 @@ class FunctionBinaryArithmetic : public IFunction } else if (col_left_const && col_right) { - const NativeResultType const_a = helperGetOrConvert(col_left_const, left); + const NativeResultType const_a = static_cast( + helperGetOrConvert(col_left_const, left)); helperInvokeEither( const_a, col_right->getData(), vec_res, scale_a, scale_b, right_nullmap); } else if (col_left && col_right_const) { - const NativeResultType const_b = helperGetOrConvert(col_right_const, right); + const NativeResultType const_b = static_cast( + helperGetOrConvert(col_right_const, right)); helperInvokeEither( col_left->getData(), const_b, vec_res, scale_a, scale_b, right_nullmap); diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 29dcf87316d..2259cc71f07 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -63,7 +63,7 @@ struct AddNanosecondsImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(9); - return t * multiplier + delta; + return static_cast(t * multiplier + delta); } static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) @@ -107,7 +107,7 @@ struct AddMicrosecondsImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(6); - return t * multiplier + delta; + return static_cast(t * multiplier + delta); } static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) @@ -151,7 +151,7 @@ struct AddMillisecondsImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(3); - return t * multiplier + delta; + return static_cast(t * multiplier + delta); } static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) @@ -183,7 +183,7 @@ struct AddSecondsImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - return t + delta; + return static_cast(t + delta); } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -194,7 +194,7 @@ struct AddSecondsImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.fromDayNum(DayNum(d)) + delta; + return static_cast(time_zone.fromDayNum(DayNum(d)) + delta); } }; @@ -216,7 +216,7 @@ struct AddMinutesImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - return t + delta * 60; + return static_cast(t + delta * 60); } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -227,7 +227,7 @@ struct AddMinutesImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.fromDayNum(DayNum(d)) + delta * 60; + return static_cast(time_zone.fromDayNum(DayNum(d)) + delta * 60); } }; @@ -249,7 +249,7 @@ struct AddHoursImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - return t + delta * 3600; + return static_cast(t + delta * 3600); } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -260,7 +260,7 @@ struct AddHoursImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.fromDayNum(DayNum(d)) + delta * 3600; + return static_cast(time_zone.fromDayNum(DayNum(d)) + delta * 3600); } }; @@ -284,7 +284,7 @@ struct AddDaysImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.addDays(t, delta); + return static_cast(time_zone.addDays(t, delta)); } static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -294,7 +294,7 @@ struct AddDaysImpl static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - return d + delta; + return static_cast(d + delta); } }; @@ -303,32 +303,32 @@ struct AddWeeksImpl static constexpr auto name = "addWeeks"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addWeeks(t.whole, delta), t.fractional}; } static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.addWeeks(t, delta); + return static_cast(time_zone.addWeeks(t, delta)); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - return d + delta * 7; + return static_cast(d + delta * 7); } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - return d + delta * 7; + return static_cast(d + delta * 7); } }; @@ -352,7 +352,7 @@ struct AddMonthsImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.addMonths(t, delta); + return static_cast(time_zone.addMonths(t, delta)); } static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -371,30 +371,30 @@ struct AddQuartersImpl static constexpr auto name = "addQuarters"; static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addQuarters(t.whole, delta), t.fractional}; } static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem; } - static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.addQuarters(t, delta); + return static_cast(time_zone.addQuarters(t, delta)); } - static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(ExtendedDayNum(d), delta); } @@ -420,7 +420,7 @@ struct AddYearsImpl static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return time_zone.addYears(t, delta); + return static_cast(time_zone.addYears(t, delta)); } static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) diff --git a/src/Functions/FunctionIfBase.h b/src/Functions/FunctionIfBase.h index 2b0f8289914..4c9ecf78a12 100644 --- a/src/Functions/FunctionIfBase.h +++ b/src/Functions/FunctionIfBase.h @@ -67,7 +67,7 @@ public: b.SetInsertPoint(join); - auto * phi = b.CreatePHI(toNativeType(b, return_type), returns.size()); + auto * phi = b.CreatePHI(toNativeType(b, return_type), static_cast(returns.size())); for (const auto & [block, value] : returns) phi->addIncoming(value, block); diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index dce953ddc6f..3a0f4f483b3 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -164,7 +164,7 @@ public: /// 2. Create ASTPtr /// 3. Parser(Tokens, ASTPtr) -> complete AST /// 4. Execute functions: call getNextItem on generator and handle each item - uint32_t parse_depth = getContext()->getSettingsRef().max_parser_depth; + unsigned parse_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); #if USE_SIMDJSON if (getContext()->getSettingsRef().allow_simdjson) return FunctionSQLJSONHelpers::Executor::run(arguments, result_type, input_rows_count, parse_depth); diff --git a/src/Functions/FunctionSnowflake.h b/src/Functions/FunctionSnowflake.h index f4a62e509ed..998db98890a 100644 --- a/src/Functions/FunctionSnowflake.h +++ b/src/Functions/FunctionSnowflake.h @@ -109,7 +109,8 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { - result_data[i] = ((source_data[i] >> time_shift) + snowflake_epoch) / 1000; + result_data[i] = static_cast( + ((source_data[i] >> time_shift) + snowflake_epoch) / 1000); } return res_column; } diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 8c248d79c4b..d869ccccca8 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -57,7 +57,7 @@ public: const auto & source_data = typeid_cast &>(col).getData(); - const Int32 scale_diff = typeid_cast(*src.type).getScale() - target_scale; + const Int32 scale_diff = static_cast(typeid_cast(*src.type).getScale() - target_scale); if (scale_diff == 0) { for (size_t i = 0; i < input_rows_count; ++i) @@ -140,7 +140,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - auto result_column = ColumnDecimal::create(input_rows_count, target_scale); + auto result_column = ColumnDecimal::create(input_rows_count, static_cast(target_scale)); if (!((executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h index 94ff55d180b..ad129a315b3 100644 --- a/src/Functions/FunctionsAES.h +++ b/src/Functions/FunctionsAES.h @@ -2,6 +2,7 @@ #include "config.h" +#include #include #include #include @@ -336,7 +337,7 @@ private: if (EVP_EncryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr) != 1) onError("Failed to initialize encryption context with cipher"); - if (EVP_CIPHER_CTX_ctrl(evp_ctx, EVP_CTRL_AEAD_SET_IVLEN, iv_value.size, nullptr) != 1) + if (EVP_CIPHER_CTX_ctrl(evp_ctx, EVP_CTRL_AEAD_SET_IVLEN, safe_cast(iv_value.size), nullptr) != 1) onError("Failed to set custom IV length to " + std::to_string(iv_value.size)); if (EVP_EncryptInit_ex(evp_ctx, nullptr, nullptr, @@ -350,7 +351,7 @@ private: const auto aad_data = aad_column->getDataAt(row_idx); int tmp_len = 0; if (aad_data.size != 0 && EVP_EncryptUpdate(evp_ctx, nullptr, &tmp_len, - reinterpret_cast(aad_data.data), aad_data.size) != 1) + reinterpret_cast(aad_data.data), safe_cast(aad_data.size)) != 1) onError("Failed to set AAD data"); } } @@ -636,7 +637,7 @@ private: onError("Failed to initialize cipher context 1"); // 1.a.1 : Set custom IV length - if (EVP_CIPHER_CTX_ctrl(evp_ctx, EVP_CTRL_AEAD_SET_IVLEN, iv_value.size, nullptr) != 1) + if (EVP_CIPHER_CTX_ctrl(evp_ctx, EVP_CTRL_AEAD_SET_IVLEN, safe_cast(iv_value.size), nullptr) != 1) onError("Failed to set custom IV length to " + std::to_string(iv_value.size)); // 1.a.1 : Init CTX with key and IV @@ -651,7 +652,7 @@ private: StringRef aad_data = aad_column->getDataAt(row_idx); int tmp_len = 0; if (aad_data.size != 0 && EVP_DecryptUpdate(evp_ctx, nullptr, &tmp_len, - reinterpret_cast(aad_data.data), aad_data.size) != 1) + reinterpret_cast(aad_data.data), safe_cast(aad_data.size)) != 1) onError("Failed to sed AAD data"); } } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 8cbe3b0e532..b2f2fb0e25f 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -101,7 +101,7 @@ inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) Field field; named_column.column->get(0, field); - return field.get(); + return static_cast(field.get()); } /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. @@ -335,7 +335,7 @@ struct ToDateTimeImpl return 0; auto date_time = time_zone.fromDayNum(ExtendedDayNum(d)); - return date_time <= 0xffffffff ? date_time : 0xffffffff; + return date_time <= 0xffffffff ? static_cast(date_time) : 0xffffffff; } static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) @@ -349,7 +349,7 @@ struct ToDateTimeImpl return 0; auto date_time = time_zone.toDate(d); - return date_time <= 0xffffffff ? date_time : 0xffffffff; + return date_time <= 0xffffffff ? static_cast(date_time) : 0xffffffff; } static UInt32 execute(const DecimalUtils::DecimalComponents & t, const DateLUTImpl & /*time_zone*/) @@ -357,7 +357,7 @@ struct ToDateTimeImpl if (t.whole < 0 || (t.whole >= 0 && t.fractional < 0)) return 0; - return std::min(t.whole, Int64(0xFFFFFFFF)); + return t.whole > 0xffffffff ? 0xffffffff : static_cast(t.whole); } }; @@ -434,7 +434,7 @@ struct ToDate32Transform32Or64 static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - ? from + ? static_cast(from) : std::min(Int32(time_zone.toDayNum(from)), Int32(DATE_LUT_MAX_EXTEND_DAY_NUM)); } }; @@ -531,7 +531,7 @@ struct ToDateTimeTransform64 static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) { - return std::min(Int64(from), Int64(0xFFFFFFFF)); + return from > 0xffffffff ? 0xffffffff : static_cast(from); } }; @@ -558,7 +558,7 @@ struct ToDateTimeTransform64Signed if (from < 0) return 0; - return std::min(Int64(from), Int64(0xFFFFFFFF)); + return from > 0xffffffff ? 0xffffffff : static_cast(from); } }; @@ -618,8 +618,8 @@ struct ToDateTime64TransformSigned NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const { - from = std::max(from, LUT_MIN_TIME); - from = std::min(from, LUT_MAX_TIME); + from = static_cast(std::max(from, LUT_MIN_TIME)); + from = static_cast(std::min(from, LUT_MAX_TIME)); return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); } }; @@ -979,7 +979,7 @@ inline void convertFromTime(DataTypeDate::FieldType & x, time_t & template <> inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) { - x = time; + x = static_cast(time); } template <> @@ -990,7 +990,7 @@ inline void convertFromTime(DataTypeDateTime::FieldType & x, t else if (unlikely(time > 0xFFFFFFFF)) x = 0xFFFFFFFF; else - x = time; + x = static_cast(time); } /** Conversion of strings to numbers, dates, datetimes: through parsing. @@ -1070,7 +1070,7 @@ inline bool tryParseImpl(DataTypeDateTime::FieldType & x, Read time_t tmp = 0; if (!tryReadDateTimeText(tmp, rb, *time_zone)) return false; - x = tmp; + x = static_cast(tmp); return true; } diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index aefc82d2f5d..493fc36ca3c 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -231,7 +231,7 @@ private: { case MoveType::ConstIndex: { - if (!moveToElementByIndex(res_element, moves[j].index, key)) + if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) return false; break; } @@ -245,7 +245,7 @@ private: case MoveType::Index: { Int64 index = (*arguments[j + 1].column)[row].get(); - if (!moveToElementByIndex(res_element, index, key)) + if (!moveToElementByIndex(res_element, static_cast(index), key)) return false; break; } diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index ecc958a0a0c..ebdb2f1afaa 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -83,7 +83,10 @@ struct FunctionDetectLanguageImpl if (UTF8::isValidUTF8(str, str_len)) { - auto lang = CLD2::DetectLanguage(reinterpret_cast(str), str_len, true, &is_reliable); + auto lang = CLD2::DetectLanguage( + reinterpret_cast(str), + static_cast(str_len), + true, &is_reliable); res = codeISO(LanguageCode(lang)); } else @@ -178,7 +181,10 @@ public: if (UTF8::isValidUTF8(str, str_len)) { - CLD2::DetectLanguageSummary(reinterpret_cast(str), str_len, true, result_lang_top3, pc, bytes, &is_reliable); + CLD2::DetectLanguageSummary( + reinterpret_cast(str), + static_cast(str_len), + true, result_lang_top3, pc, bytes, &is_reliable); for (size_t j = 0; j < top_N; ++j) { diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index d7e8ffb0c9f..d2b9a96b4ad 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -193,7 +193,7 @@ public: auto * next = b.GetInsertBlock(); auto * stop = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); b.SetInsertPoint(stop); - auto * phi = b.CreatePHI(b.getInt8Ty(), values.size()); + auto * phi = b.CreatePHI(b.getInt8Ty(), static_cast(values.size())); for (size_t i = 0; i < types.size(); ++i) { b.SetInsertPoint(next); diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 96c41225242..40522fa1a6e 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -37,7 +37,7 @@ namespace UInt32 next() { current = current * a + c; - return current >> 16; + return static_cast(current >> 16); } }; diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h index ccab6e9feca..283f1ea5a43 100644 --- a/src/Functions/FunctionsRound.h +++ b/src/Functions/FunctionsRound.h @@ -178,7 +178,7 @@ struct IntegerRoundingComputation return; } } - *out = compute(*in, scale); + *out = compute(*in, static_cast(scale)); } static ALWAYS_INLINE void compute(const T * __restrict in, T scale, T * __restrict out) requires(!std::integral) @@ -436,7 +436,7 @@ public: scale_arg = in_scale - scale_arg; if (scale_arg > 0) { - auto scale = intExp10OfSize(scale_arg); + auto scale = intExp10OfSize(scale_arg); const NativeType * __restrict p_in = reinterpret_cast(in.data()); const NativeType * end_in = reinterpret_cast(in.data()) + in.size(); diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index e7dbe4087f2..949503e2367 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -35,13 +35,13 @@ struct Hash #ifdef __SSE4_2__ return _mm_crc32_u64(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) - return __crc32cd(crc, val); + return __crc32cd(static_cast(crc), val); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif } - static UInt64 crc32u32(UInt64 crc [[maybe_unused]], UInt32 val [[maybe_unused]]) + static UInt64 crc32u32(UInt32 crc [[maybe_unused]], UInt32 val [[maybe_unused]]) { #ifdef __SSE4_2__ return _mm_crc32_u32(crc, val); @@ -52,7 +52,7 @@ struct Hash #endif } - static UInt64 crc32u16(UInt64 crc [[maybe_unused]], UInt16 val [[maybe_unused]]) + static UInt64 crc32u16(UInt32 crc [[maybe_unused]], UInt16 val [[maybe_unused]]) { #ifdef __SSE4_2__ return _mm_crc32_u16(crc, val); @@ -63,7 +63,7 @@ struct Hash #endif } - static UInt64 crc32u8(UInt64 crc [[maybe_unused]], UInt8 val [[maybe_unused]]) + static UInt64 crc32u8(UInt32 crc [[maybe_unused]], UInt8 val [[maybe_unused]]) { #ifdef __SSE4_2__ return _mm_crc32_u8(crc, val); @@ -84,7 +84,7 @@ struct Hash if constexpr (CaseInsensitive) x |= 0x20u; /// see toLowerIfAlphaASCII from StringUtils.h - crc = crc32u8(crc, x); + crc = crc32u8(static_cast(crc), x); --size; ++start; } @@ -96,7 +96,7 @@ struct Hash if constexpr (CaseInsensitive) x |= 0x2020u; - crc = crc32u16(crc, x); + crc = crc32u16(static_cast(crc), x); size -= 2; start += 2; } @@ -108,7 +108,7 @@ struct Hash if constexpr (CaseInsensitive) x |= 0x20202020u; - crc = crc32u32(crc, x); + crc = crc32u32(static_cast(crc), x); size -= 4; start += 4; } diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 3ea397e4c7d..4346e691046 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -64,7 +64,7 @@ struct ToStartOfTransform; { static UInt32 execute(UInt32 t, UInt64 delta, const DateLUTImpl & time_zone) { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), delta); + return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), delta)); } }; @@ -74,7 +74,7 @@ struct ToStartOfTransform; { \ static UInt32 execute(UInt32 t, UInt64 delta, const DateLUTImpl & time_zone) \ { \ - return time_zone.toStartOf##INTERVAL_KIND##Interval(t, delta); \ + return static_cast(time_zone.toStartOf##INTERVAL_KIND##Interval(t, delta)); \ } \ }; TRANSFORM_TIME(Hour) @@ -114,7 +114,7 @@ template<> \ template <> \ struct AddTime \ { \ - static inline auto execute(UInt16 d, UInt64 delta, const DateLUTImpl & time_zone) \ + static inline auto execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) \ { \ return time_zone.add##INTERVAL_KIND##s(ExtendedDayNum(d), delta); \ } \ @@ -127,14 +127,18 @@ template<> \ template <> struct AddTime { - static inline NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &) { return ExtendedDayNum(d + delta * 7);} + static inline NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &) + { + return ExtendedDayNum(static_cast(d + delta * 7)); + } }; #define ADD_TIME(INTERVAL_KIND, INTERVAL) \ template <> \ struct AddTime \ { \ - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) { return t + delta * INTERVAL; } \ + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) \ + { return static_cast(t + delta * INTERVAL); } \ }; ADD_TIME(Day, 86400) ADD_TIME(Hour, 3600) diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index ef2b9e6eede..d6b80ff3836 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -340,7 +340,7 @@ namespace DB const auto d = gd::mod(c, 1461); const auto y = gd::min(gd::div(d, 365), 3); day_of_year_ = d - y * 365 + 1; - year_ = quad_cent * 400 + cent * 100 + quad * 4 + y + 1; + year_ = static_cast(quad_cent * 400 + cent * 100 + quad * 4 + y + 1); } template diff --git a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h index fe00f06bbbf..3d646a6ff31 100644 --- a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h +++ b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h @@ -89,7 +89,7 @@ public: for (size_t i = current_visitor; i < visitors.size(); ++i) { status = visitors[i]->visit(current); - current_visitor = i; + current_visitor = static_cast(i); if (status == VisitorStatus::Error || status == VisitorStatus::Ignore) { break; diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp index bc153b9d747..03c006774c0 100644 --- a/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp +++ b/src/Functions/JSONPath/Parsers/ParserJSONPathRange.cpp @@ -46,7 +46,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.first = number_ptr->as()->value.get(); + range_indices.first = static_cast(number_ptr->as()->value.get()); if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingSquareBracket) { @@ -63,7 +63,7 @@ bool ParserJSONPathRange::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte { return false; } - range_indices.second = number_ptr->as()->value.get(); + range_indices.second = static_cast(number_ptr->as()->value.get()); } else { diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index 3490c854f22..8e355405093 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -116,7 +116,7 @@ struct MultiMatchAllIndicesImpl err = hs_scan( regexps->getDB(), reinterpret_cast(haystack_data.data()) + offset, - length, + static_cast(length), 0, smart_scratch.get(), on_match, @@ -227,7 +227,7 @@ struct MultiMatchAllIndicesImpl err = hs_scan( regexps->getDB(), reinterpret_cast(haystack_data.data()) + prev_haystack_offset, - cur_haystack_length, + static_cast(cur_haystack_length), 0, smart_scratch.get(), on_match, diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 2d4db261bb4..4b02e78dc25 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -136,7 +136,7 @@ struct MultiMatchAnyImpl err = hs_scan( regexps->getDB(), reinterpret_cast(haystack_data.data()) + offset, - length, + static_cast(length), 0, smart_scratch.get(), on_match, @@ -260,7 +260,7 @@ struct MultiMatchAnyImpl err = hs_scan( regexps->getDB(), reinterpret_cast(haystack_data.data()) + prev_haystack_offset, - cur_haystack_length, + static_cast(cur_haystack_length), 0, smart_scratch.get(), on_match, diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 0ffe05fbffc..37517313879 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -625,7 +625,7 @@ UInt128 sipHash128(Polygon && polygon) auto hash_ring = [&hash](const auto & ring) { - UInt32 size = ring.size(); + UInt32 size = static_cast(ring.size()); hash.update(size); hash.update(reinterpret_cast(ring.data()), size * sizeof(ring[0])); }; diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 1e40c845788..c1ff83d04fe 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -207,7 +207,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb { ids.reset(new unsigned int[patterns.size()]); for (size_t i = 0; i < patterns.size(); ++i) - ids[i] = i + 1; + ids[i] = static_cast(i + 1); } hs_error_t err; @@ -216,7 +216,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb patterns.data(), flags.data(), ids.get(), - patterns.size(), + static_cast(patterns.size()), HS_MODE_BLOCK, nullptr, &db, @@ -227,7 +227,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb flags.data(), ids.get(), ext_exprs_ptrs.data(), - patterns.size(), + static_cast(patterns.size()), HS_MODE_BLOCK, nullptr, &db, diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 88bc48a6d8c..3325c7b8eb1 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -189,7 +189,7 @@ struct ReplaceRegexpImpl /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < size; ++i) { - int from = i > 0 ? offsets[i - 1] : 0; + ssize_t from = i > 0 ? offsets[i - 1] : 0; re2_st::StringPiece input(reinterpret_cast(data.data() + from), offsets[i] - from - 1); processString(input, res_data, res_offset, searcher, num_captures, instructions); @@ -220,7 +220,7 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < size; ++i) { - int from = i * n; + size_t from = i * n; re2_st::StringPiece input(reinterpret_cast(data.data() + from), n); processString(input, res_data, res_offset, searcher, num_captures, instructions); diff --git a/src/Functions/URL/CMakeLists.txt b/src/Functions/URL/CMakeLists.txt index 6328476543d..0e148e87604 100644 --- a/src/Functions/URL/CMakeLists.txt +++ b/src/Functions/URL/CMakeLists.txt @@ -2,6 +2,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions_url .) add_library(clickhouse_functions_url OBJECT ${clickhouse_functions_url_sources} ${clickhouse_functions_url_headers}) target_link_libraries(clickhouse_functions_url PRIVATE dbms) +set_source_files_properties(tldLookup.generated.cpp PROPERTIES COMPILE_FLAGS -Wno-shorten-64-to-32) if (OMIT_HEAVY_DEBUG_SYMBOLS) target_compile_options(clickhouse_functions_url PRIVATE "-g0") diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index 7b72060f0c0..c8eae78dfaa 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -223,7 +223,7 @@ struct ArrayAggregateImpl if (unlikely(result_scale > DecimalUtils::max_precision)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale); - res[i] = DecimalUtils::convertTo(product, result_scale); + res[i] = DecimalUtils::convertTo(product, static_cast(result_scale)); } else { @@ -332,7 +332,7 @@ struct ArrayAggregateImpl if (unlikely(result_scale > DecimalUtils::max_precision)) throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale); - res[i] = DecimalUtils::convertTo(aggregate_value, result_scale); + res[i] = DecimalUtils::convertTo(aggregate_value, static_cast(result_scale)); } else { diff --git a/src/Functions/array/arrayCount.cpp b/src/Functions/array/arrayCount.cpp index cb902206e8b..f7ded051e5e 100644 --- a/src/Functions/array/arrayCount.cpp +++ b/src/Functions/array/arrayCount.cpp @@ -49,7 +49,7 @@ struct ArrayCountImpl size_t pos = 0; for (size_t i = 0; i < offsets.size(); ++i) { - out_counts[i] = offsets[i] - pos; + out_counts[i] = static_cast(offsets[i] - pos); pos = offsets[i]; } @@ -73,7 +73,7 @@ struct ArrayCountImpl if (filter[pos]) ++count; } - out_counts[i] = count; + out_counts[i] = static_cast(count); } return out_column; diff --git a/src/Functions/array/arrayEnumerate.cpp b/src/Functions/array/arrayEnumerate.cpp index b20f91fe2dd..666e01899bd 100644 --- a/src/Functions/array/arrayEnumerate.cpp +++ b/src/Functions/array/arrayEnumerate.cpp @@ -60,7 +60,7 @@ public: for (auto off : offsets) { for (ColumnArray::Offset j = prev_off; j < off; ++j) - res_values[j] = j - prev_off + 1; + res_values[j] = static_cast(j - prev_off + 1); prev_off = off; } diff --git a/src/Functions/array/arrayEnumerateRanked.cpp b/src/Functions/array/arrayEnumerateRanked.cpp index 7c4b755e020..d19781f97c3 100644 --- a/src/Functions/array/arrayEnumerateRanked.cpp +++ b/src/Functions/array/arrayEnumerateRanked.cpp @@ -38,7 +38,7 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) if (depths.size() < array_num && prev_array_depth) depths.emplace_back(prev_array_depth); - prev_array_depth = type_array->getNumberOfDimensions(); + prev_array_depth = static_cast(type_array->getNumberOfDimensions()); ++array_num; } else @@ -55,7 +55,7 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) if (i == 0) { - clear_depth = value; + clear_depth = static_cast(value); } else { diff --git a/src/Functions/array/arrayFirstLastIndex.cpp b/src/Functions/array/arrayFirstLastIndex.cpp index f7355eb2b38..effcb04ab48 100644 --- a/src/Functions/array/arrayFirstLastIndex.cpp +++ b/src/Functions/array/arrayFirstLastIndex.cpp @@ -61,7 +61,7 @@ struct ArrayFirstLastIndexImpl if constexpr (strategy == ArrayFirstLastIndexStrategy::First) out_index[offset_index] = 1; else - out_index[offset_index] = end_offset - start_offset; + out_index[offset_index] = static_cast(end_offset - start_offset); } else { @@ -113,7 +113,7 @@ struct ArrayFirstLastIndexImpl } } - out_index[offset_index] = result_index; + out_index[offset_index] = static_cast(result_index); } return out_column; diff --git a/src/Functions/array/arrayUniq.cpp b/src/Functions/array/arrayUniq.cpp index ff75efaae71..a43c21508d9 100644 --- a/src/Functions/array/arrayUniq.cpp +++ b/src/Functions/array/arrayUniq.cpp @@ -233,7 +233,7 @@ void FunctionArrayUniq::executeMethodImpl( method.emplaceKey(set, j, pool); } - res_values[i] = set.size() + found_null; + res_values[i] = static_cast(set.size() + found_null); prev_off = off; } } diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 6b3d8ad1139..3b5bb686e60 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -97,7 +97,7 @@ private: for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx) { for (size_t elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx) - out_data[offset + elem_idx] = elem_idx; + out_data[offset + elem_idx] = static_cast(elem_idx); offset += in_data[row_idx]; out_offsets[row_idx] = offset; @@ -153,7 +153,7 @@ private: { for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -212,7 +212,7 @@ private: { for (size_t st = start_data[row_idx], ed = end_data[row_idx]; st < ed; st += step) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -271,7 +271,7 @@ private: { for (size_t st = start, ed = end_data[row_idx]; st < ed; st += step_data[row_idx]) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step_data[row_idx]) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -333,7 +333,7 @@ private: { for (size_t st = start_data[row_idx], ed = end_start[row_idx]; st < ed; st += step_data[row_idx]) { - out_data[offset++] = st; + out_data[offset++] = static_cast(st); if (st > st + step_data[row_idx]) throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", @@ -407,7 +407,7 @@ private: if ((res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || - (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count)) || + (res = executeConstStartStep(column_ptrs[1], static_cast(start), static_cast(step), input_rows_count)) || (res = executeConstStartStep(column_ptrs[1], start, step, input_rows_count))) { } @@ -418,7 +418,7 @@ private: if ((res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || - (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count)) || + (res = executeConstStart(column_ptrs[1], column_ptrs[2], static_cast(start), input_rows_count)) || (res = executeConstStart(column_ptrs[1], column_ptrs[2], start, input_rows_count))) { } @@ -429,7 +429,7 @@ private: if ((res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || - (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count)) || + (res = executeConstStep(column_ptrs[0], column_ptrs[1], static_cast(step), input_rows_count)) || (res = executeConstStep(column_ptrs[0], column_ptrs[1], step, input_rows_count))) { } diff --git a/src/Functions/divide/divideImpl.cpp b/src/Functions/divide/divideImpl.cpp index 6c151dfefb5..940f4b35df9 100644 --- a/src/Functions/divide/divideImpl.cpp +++ b/src/Functions/divide/divideImpl.cpp @@ -18,7 +18,7 @@ namespace NAMESPACE template void divideImpl(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) { - libdivide::divider divider(b); + libdivide::divider divider(static_cast(b)); const A * a_end = a_pos + size; #if defined(__SSE2__) diff --git a/src/Functions/errorCodeToName.cpp b/src/Functions/errorCodeToName.cpp index 1736311c6cc..0025d38c8f2 100644 --- a/src/Functions/errorCodeToName.cpp +++ b/src/Functions/errorCodeToName.cpp @@ -45,7 +45,8 @@ public: for (size_t i = 0; i < input_rows_count; ++i) { const Int64 error_code = input_column.getInt(i); - std::string_view error_name = ErrorCodes::getName(error_code); + std::string_view error_name = + ErrorCodes::getName(static_cast(error_code)); col_res->insertData(error_name.data(), error_name.size()); } diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index 06b16181c94..e077086a359 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -136,7 +136,8 @@ public: const auto * end = pos + current_row.size(); while (pos < end && regexp->Match({pos, static_cast(end - pos)}, - 0, end - pos, regexp->UNANCHORED, matched_groups.data(), matched_groups.size())) + 0, end - pos, regexp->UNANCHORED, + matched_groups.data(), static_cast(matched_groups.size()))) { // 1 is to exclude group #0 which is whole re match. for (size_t group = 1; group <= groups_count; ++group) @@ -179,7 +180,8 @@ public: const auto * end = pos + current_row.size; while (pos < end && regexp->Match({pos, static_cast(end - pos)}, - 0, end - pos, regexp->UNANCHORED, matched_groups.data(), matched_groups.size())) + 0, end - pos, regexp->UNANCHORED, matched_groups.data(), + static_cast(matched_groups.size()))) { // 1 is to exclude group #0 which is whole re match. for (size_t group = 1; group <= groups_count; ++group) diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index eb6e609a4be..8ec389827db 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -90,7 +90,8 @@ public: std::string_view current_row = column_haystack->getDataAt(i).toView(); if (re2->Match(re2_st::StringPiece(current_row.data(), current_row.size()), - 0, current_row.size(), re2_st::RE2::UNANCHORED, matched_groups.data(), matched_groups.size())) + 0, current_row.size(), re2_st::RE2::UNANCHORED, matched_groups.data(), + static_cast(matched_groups.size()))) { // 1 is to exclude group #0 which is whole re match. for (size_t group = 1; group <= groups_count; ++group) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 09071c5c1a0..a10c059b342 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -499,7 +499,7 @@ public: else { for (auto & instruction : instructions) - instruction.perform(pos, vec[i], time_zone); + instruction.perform(pos, static_cast(vec[i]), time_zone); } dst_offsets[i] = pos - begin; diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp index e2d93c0fdc9..c9571a7333d 100644 --- a/src/Functions/makeDate.cpp +++ b/src/Functions/makeDate.cpp @@ -294,7 +294,7 @@ public: else if (unlikely(date_time > 0x0ffffffffll)) date_time = 0x0ffffffffll; - result_data[i] = date_time; + result_data[i] = static_cast(date_time); } return res_column; @@ -365,7 +365,7 @@ public: fraction_data = &typeid_cast(*converted_arguments[6]).getData(); } - auto res_column = ColumnDecimal::create(input_rows_count, precision); + auto res_column = ColumnDecimal::create(input_rows_count, static_cast(precision)); auto & result_data = res_column->getData(); const auto & year_data = typeid_cast(*converted_arguments[0]).getData(); @@ -411,7 +411,10 @@ public: fraction = max_fraction; } - result_data[i] = DecimalUtils::decimalFromComponents(date_time, static_cast(fraction), precision); + result_data[i] = DecimalUtils::decimalFromComponents( + date_time, + static_cast(fraction), + static_cast(precision)); } return res_column; diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp index 3668e4afc18..04877a42b18 100644 --- a/src/Functions/minus.cpp +++ b/src/Functions/minus.cpp @@ -23,7 +23,7 @@ struct MinusImpl return static_cast(static_cast(a)) - static_cast(static_cast(b)); } else - return static_cast(a) - b; + return static_cast(a) - static_cast(b); } /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index 9a3aa12037f..b2411899160 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -80,7 +80,7 @@ struct ModuloByConstantImpl || (std::is_signed_v && std::is_signed_v && b < std::numeric_limits::lowest()))) { for (size_t i = 0; i < size; ++i) - dst[i] = src[i]; + dst[i] = static_cast(src[i]); return; } @@ -101,16 +101,19 @@ struct ModuloByConstantImpl if (b & (b - 1)) { - libdivide::divider divider(b); + libdivide::divider divider(static_cast(b)); for (size_t i = 0; i < size; ++i) - dst[i] = src[i] - (src[i] / divider) * b; /// NOTE: perhaps, the division semantics with the remainder of negative numbers is not preserved. + { + /// NOTE: perhaps, the division semantics with the remainder of negative numbers is not preserved. + dst[i] = static_cast(src[i] - (src[i] / divider) * b); + } } else { // gcc libdivide doesn't work well for pow2 division auto mask = b - 1; for (size_t i = 0; i < size; ++i) - dst[i] = src[i] & mask; + dst[i] = static_cast(src[i] & mask); } } diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index c5225d3317f..a6df4235d60 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -130,7 +130,7 @@ public: ". Expected const integer.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - scale = argument.column->get64(0); + scale = static_cast(argument.column->get64(0)); } if (arguments.size() == 2) { diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index db72e791587..c771d83225a 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -74,7 +74,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override { - return ColumnUInt32::create(input_rows_count, time(nullptr)); + return ColumnUInt32::create(input_rows_count, static_cast(time(nullptr))); } }; diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp index 4b81c23584c..cd9cf6cec5c 100644 --- a/src/Functions/plus.cpp +++ b/src/Functions/plus.cpp @@ -25,7 +25,7 @@ struct PlusImpl return static_cast(static_cast(a)) + static_cast(static_cast(b)); } else - return static_cast(a) + b; + return static_cast(a) + static_cast(b); } /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise. diff --git a/src/Functions/pointInEllipses.cpp b/src/Functions/pointInEllipses.cpp index f69886ad71f..07b7f013cac 100644 --- a/src/Functions/pointInEllipses.cpp +++ b/src/Functions/pointInEllipses.cpp @@ -102,7 +102,7 @@ private: Float64 ellipse_data[4]; for (const auto idx : collections::range(0, 4)) { - int arg_idx = 2 + 4 * ellipse_idx + idx; + size_t arg_idx = 2 + 4 * ellipse_idx + idx; const auto * column = arguments[arg_idx].column.get(); if (const auto * col = checkAndGetColumnConst>(column)) { diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp index 043db179d71..bcaa603b85d 100644 --- a/src/Functions/randomStringUTF8.cpp +++ b/src/Functions/randomStringUTF8.cpp @@ -116,8 +116,8 @@ public: { UInt64 rand = rng(); - UInt32 code_point1 = generate_code_point(rand); - UInt32 code_point2 = generate_code_point(rand >> 32); + UInt32 code_point1 = generate_code_point(static_cast(rand)); + UInt32 code_point2 = generate_code_point(static_cast(rand >> 32u)); /// We have padding in column buffers that we can overwrite. size_t length1 = UTF8::convertCodePointToUTF8(code_point1, pos, sizeof(int)); diff --git a/src/Functions/runningConcurrency.cpp b/src/Functions/runningConcurrency.cpp index 37fa11bce8f..c759476006f 100644 --- a/src/Functions/runningConcurrency.cpp +++ b/src/Functions/runningConcurrency.cpp @@ -43,6 +43,7 @@ namespace DB const typename ColVecArg::Container & vec_end = col_end->getData(); using ColVecConc = typename ConcurrencyDataType::ColumnType; + using FieldType = typename ConcurrencyDataType::FieldType; typename ColVecConc::MutablePtr col_concurrency = ColVecConc::create(input_rows_count); typename ColVecConc::Container & vec_concurrency = col_concurrency->getData(); @@ -74,7 +75,7 @@ namespace DB ongoing_until.erase( ongoing_until.begin(), ongoing_until.upper_bound(begin)); - vec_concurrency[i] = ongoing_until.size(); + vec_concurrency[i] = static_cast(ongoing_until.size()); } return col_concurrency; diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index 9c7ce895fce..91c98ec9b82 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -51,8 +51,8 @@ struct StemImpl /// Note that accessing -1th element is valid for PaddedPODArray. size_t original_size = offsets[i] - offsets[i - 1]; const sb_symbol * result = sb_stemmer_stem(stemmer, - reinterpret_cast(data.data() + offsets[i - 1]), - original_size - 1); + reinterpret_cast(data.data() + offsets[i - 1]), + static_cast(original_size - 1)); size_t new_size = sb_stemmer_length(stemmer) + 1; memcpy(res_data.data() + data_size, result, new_size); diff --git a/src/Functions/tests/gtest_has_all.cpp b/src/Functions/tests/gtest_has_all.cpp index ca7bc80b4aa..1776a461580 100644 --- a/src/Functions/tests/gtest_has_all.cpp +++ b/src/Functions/tests/gtest_has_all.cpp @@ -18,9 +18,9 @@ void arrayInit(T* elements_to_have, size_t nb_elements_to_have, T* array_element { for (size_t i = 0; i < array_size; ++i) { - array_elements[i] = i; + array_elements[i] = static_cast(i); } - auto [dist, gen] = uni_int_dist(0, array_size - 1); + auto [dist, gen] = uni_int_dist(0, static_cast(array_size - 1)); for (size_t i = 0; i < nb_elements_to_have; ++i) { elements_to_have[i] = array_elements[dist(gen)]; @@ -28,14 +28,14 @@ void arrayInit(T* elements_to_have, size_t nb_elements_to_have, T* array_element if (!all_elements_present) { /// make one element to be searched for missing from the target array - elements_to_have[nb_elements_to_have - 1] = array_size + 1; + elements_to_have[nb_elements_to_have - 1] = static_cast(array_size + 1); } } void nullMapInit(UInt8 * null_map, size_t null_map_size, size_t nb_null_elements) { /// -2 to keep the last element of the array non-null - auto [dist, gen] = uni_int_dist(0, null_map_size - 2); + auto [dist, gen] = uni_int_dist(0, static_cast(null_map_size - 2)); for (size_t i = 0; i < null_map_size; ++i) { null_map[i] = 0; diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index ac648b87448..32fe574f56a 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -133,17 +133,17 @@ namespace { static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) { - return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) { - return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); + return static_cast(time_zone.toStartOfDayInterval(ExtendedDayNum(d), days)); } static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); + return static_cast(time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days)); } static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) @@ -517,6 +517,7 @@ private: ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { using ToColumnType = typename ToDataType::ColumnType; + using ToFieldType = typename ToDataType::FieldType; const auto & time_data = time_column_type.getData(); size_t size = time_data.size(); @@ -529,7 +530,8 @@ private: Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); for (size_t i = 0; i != size; ++i) - result_data[i] = Transform::execute(time_data[i], num_units, time_zone, scale_multiplier); + result_data[i] = static_cast( + Transform::execute(time_data[i], num_units, time_zone, scale_multiplier)); return result_col; } diff --git a/src/Functions/toValidUTF8.cpp b/src/Functions/toValidUTF8.cpp index 9874e39baa4..4b79bc0bbda 100644 --- a/src/Functions/toValidUTF8.cpp +++ b/src/Functions/toValidUTF8.cpp @@ -106,7 +106,7 @@ struct ToValidUTF8Impl /// Sequence was not fully written to this buffer. break; } - else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), len)) + else if (Poco::UTF8Encoding::isLegal(reinterpret_cast(p), static_cast(len))) { /// Valid sequence. p += len; diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp index fb762271e4d..494ed3bae38 100644 --- a/src/IO/AIO.cpp +++ b/src/IO/AIO.cpp @@ -23,22 +23,22 @@ namespace DB int io_setup(unsigned nr, aio_context_t * ctxp) { - return syscall(__NR_io_setup, nr, ctxp); + return static_cast(syscall(__NR_io_setup, nr, ctxp)); } int io_destroy(aio_context_t ctx) { - return syscall(__NR_io_destroy, ctx); + return static_cast(syscall(__NR_io_destroy, ctx)); } int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]) // NOLINT { - return syscall(__NR_io_submit, ctx, nr, iocbpp); + return static_cast(syscall(__NR_io_submit, ctx, nr, iocbpp)); } int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout) // NOLINT { - return syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout); + return static_cast(syscall(__NR_io_getevents, ctx, min_nr, max_nr, events, timeout)); } diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 3127f299f5c..f48699a8408 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -281,7 +281,7 @@ private: bool nextImpl() override { RawHandle raw_handle = handle.getRawHandle(); - auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), internal_buffer.size()); + auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), static_cast(internal_buffer.size())); if (bytes_read < 0) checkResult(bytes_read); diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index 817e8132b64..d413783356d 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -134,7 +134,8 @@ private: if (!offset()) return; RawHandle raw_handle = handle.getRawHandle(); - checkResult(zipWriteInFileInZip(raw_handle, working_buffer.begin(), offset())); + int code = zipWriteInFileInZip(raw_handle, working_buffer.begin(), static_cast(offset())); + checkResult(code); } void checkResult(int code) const { handle.checkResult(code); } diff --git a/src/IO/Bzip2ReadBuffer.cpp b/src/IO/Bzip2ReadBuffer.cpp index 9d183393159..9970edcbcf3 100644 --- a/src/IO/Bzip2ReadBuffer.cpp +++ b/src/IO/Bzip2ReadBuffer.cpp @@ -85,11 +85,11 @@ bool Bzip2ReadBuffer::nextImpl() if (!bz->stream.avail_in) { in->nextIfAtEnd(); - bz->stream.avail_in = in->buffer().end() - in->position(); + bz->stream.avail_in = static_cast(in->buffer().end() - in->position()); bz->stream.next_in = in->position(); } - bz->stream.avail_out = internal_buffer.size(); + bz->stream.avail_out = static_cast(internal_buffer.size()); bz->stream.next_out = internal_buffer.begin(); ret = BZ2_bzDecompress(&bz->stream); @@ -99,7 +99,7 @@ bool Bzip2ReadBuffer::nextImpl() if (ret == BZ_STREAM_END && !in->eof()) { bz->reinitialize(); - bz->stream.avail_in = in->buffer().end() - in->position(); + bz->stream.avail_in = static_cast(in->buffer().end() - in->position()); bz->stream.next_in = in->position(); ret = BZ_OK; diff --git a/src/IO/Bzip2WriteBuffer.cpp b/src/IO/Bzip2WriteBuffer.cpp index 10a1803fec8..4b6bed70d35 100644 --- a/src/IO/Bzip2WriteBuffer.cpp +++ b/src/IO/Bzip2WriteBuffer.cpp @@ -58,7 +58,7 @@ void Bzip2WriteBuffer::nextImpl() } bz->stream.next_in = working_buffer.begin(); - bz->stream.avail_in = offset(); + bz->stream.avail_in = static_cast(offset()); try { @@ -66,7 +66,7 @@ void Bzip2WriteBuffer::nextImpl() { out->nextIfAtEnd(); bz->stream.next_out = out->position(); - bz->stream.avail_out = out->buffer().end() - out->position(); + bz->stream.avail_out = static_cast(out->buffer().end() - out->position()); int ret = BZ2_bzCompress(&bz->stream, BZ_RUN); @@ -95,7 +95,7 @@ void Bzip2WriteBuffer::finalizeBefore() out->nextIfAtEnd(); bz->stream.next_out = out->position(); - bz->stream.avail_out = out->buffer().end() - out->position(); + bz->stream.avail_out = static_cast(out->buffer().end() - out->position()); int ret = BZ2_bzCompress(&bz->stream, BZ_FINISH); diff --git a/src/IO/FileEncryptionCommon.cpp b/src/IO/FileEncryptionCommon.cpp index 13d8acb8c7b..5592da8721c 100644 --- a/src/IO/FileEncryptionCommon.cpp +++ b/src/IO/FileEncryptionCommon.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -84,10 +85,13 @@ namespace while (in_size < size) { out.nextIfAtEnd(); + size_t part_size = std::min(size - in_size, out.available()); + part_size = std::min(part_size, INT_MAX); + uint8_t * ciphertext = reinterpret_cast(out.position()); int ciphertext_size = 0; - if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, &in[in_size], part_size)) + if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, &in[in_size], static_cast(part_size))) throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); in_size += part_size; @@ -110,7 +114,7 @@ namespace uint8_t ciphertext[kBlockSize]; int ciphertext_size = 0; - if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, padded_data, padded_data_size)) + if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, padded_data, safe_cast(padded_data_size))) throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); if (!ciphertext_size) @@ -142,7 +146,7 @@ namespace const uint8_t * in = reinterpret_cast(data); uint8_t * plaintext = reinterpret_cast(out); int plaintext_size = 0; - if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, in, size)) + if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, in, safe_cast(size))) throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); return plaintext_size; } @@ -153,10 +157,9 @@ namespace uint8_t padded_data[kBlockSize] = {}; memcpy(&padded_data[pad_left], data, size); size_t padded_data_size = pad_left + size; - uint8_t plaintext[kBlockSize]; int plaintext_size = 0; - if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, padded_data, padded_data_size)) + if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, padded_data, safe_cast(padded_data_size))) throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); if (!plaintext_size) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 9fd48914f64..f33b2399492 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -142,7 +142,7 @@ namespace bool proxy_https_, size_t max_pool_size_, bool resolve_host_ = true) - : Base(max_pool_size_, &Poco::Logger::get("HTTPSessionPool")) + : Base(static_cast(max_pool_size_), &Poco::Logger::get("HTTPSessionPool")) , host(host_) , port(port_) , https(https_) @@ -271,7 +271,7 @@ namespace }; } -void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout) +void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout) { if (!response.getKeepAlive()) return; diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 18e83abb83b..51da17d4ca7 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -38,7 +38,7 @@ public: using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry; using HTTPSessionPtr = std::shared_ptr; -void setResponseDefaultHeaders(HTTPServerResponse & response, unsigned keep_alive_timeout); +void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true); diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 988ad75cdf4..7ba23dd1588 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -29,6 +29,7 @@ namespace ErrorCodes extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; extern const int CANNOT_READ_FROM_SOCKET; + extern const int LOGICAL_ERROR; } @@ -54,7 +55,10 @@ bool ReadBufferFromPocoSocket::nextImpl() while (async_callback && !socket.poll(0, Poco::Net::Socket::SELECT_READ)) async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), socket_description); - bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size()); + if (internal_buffer.size() > INT_MAX) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); + + bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast(internal_buffer.size())); } catch (const Poco::Net::NetException & e) { diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 3161ca6d8a8..816aa8fd057 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -83,14 +83,14 @@ inline void readVarUInt(UInt32 & x, ReadBuffer & istr) { UInt64 tmp; readVarUInt(tmp, istr); - x = tmp; + x = static_cast(tmp); } inline void readVarInt(Int32 & x, ReadBuffer & istr) { Int64 tmp; readVarInt(tmp, istr); - x = tmp; + x = static_cast(tmp); } inline void readVarUInt(UInt16 & x, ReadBuffer & istr) diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index fb4e5df9b59..95d532e9bd4 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; extern const int CANNOT_WRITE_TO_SOCKET; + extern const int LOGICAL_ERROR; } @@ -55,7 +56,11 @@ void WriteBufferFromPocoSocket::nextImpl() try { CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkSend); - res = socket.impl()->sendBytes(working_buffer.begin() + bytes_written, offset() - bytes_written); + char * pos = working_buffer.begin() + bytes_written; + size_t size = offset() - bytes_written; + if (size > INT_MAX) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow"); + res = socket.impl()->sendBytes(pos, static_cast(size)); } catch (const Poco::Net::NetException & e) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 14118c3c04e..f823015bd7d 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -305,7 +305,7 @@ void WriteBufferFromS3::writePart() UploadPartTask task; auto & tags = TSA_SUPPRESS_WARNING_FOR_WRITE(part_tags); /// Suppress warning because schedule == false. - fillUploadRequest(task.req, tags.size() + 1); + fillUploadRequest(task.req, static_cast(tags.size() + 1)); processUploadRequest(task); tags.push_back(task.tag); } @@ -362,7 +362,7 @@ void WriteBufferFromS3::completeMultipartUpload() for (size_t i = 0; i < tags.size(); ++i) { Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(tags[i]).WithPartNumber(i + 1)); + multipart_upload.AddParts(part.WithETag(tags[i]).WithPartNumber(static_cast(i + 1))); } req.SetMultipartUpload(multipart_upload); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 782e580d8be..28f831856d7 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -110,8 +110,8 @@ private: std::unique_ptr put_object_task; /// Does not need protection by mutex because of the logic around is_finished field. std::list TSA_GUARDED_BY(bg_tasks_mutex) upload_object_tasks; - size_t num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; - size_t num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + int num_added_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; + int num_finished_bg_tasks TSA_GUARDED_BY(bg_tasks_mutex) = 0; std::mutex bg_tasks_mutex; std::condition_variable bg_tasks_condvar; diff --git a/src/IO/WriteBufferValidUTF8.cpp b/src/IO/WriteBufferValidUTF8.cpp index 10e86f01343..4c8e172f43c 100644 --- a/src/IO/WriteBufferValidUTF8.cpp +++ b/src/IO/WriteBufferValidUTF8.cpp @@ -102,7 +102,7 @@ void WriteBufferValidUTF8::nextImpl() break; #endif - size_t len = length_of_utf8_sequence[static_cast(*p)]; + UInt8 len = length_of_utf8_sequence[static_cast(*p)]; if (len > 4) { // NOLINT diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index c3e1e59218f..42d84e080af 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -139,7 +139,7 @@ inline void writeBoolText(bool x, WriteBuffer & buf) template inline size_t writeFloatTextFastPath(T x, char * buffer) { - int result = 0; + Int64 result = 0; if constexpr (std::is_same_v) { diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp index c265791e38a..43014096e2a 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.cpp +++ b/src/IO/ZlibDeflatingWriteBuffer.cpp @@ -49,7 +49,7 @@ void ZlibDeflatingWriteBuffer::nextImpl() return; zstr.next_in = reinterpret_cast(working_buffer.begin()); - zstr.avail_in = offset(); + zstr.avail_in = static_cast(offset()); try { @@ -57,7 +57,7 @@ void ZlibDeflatingWriteBuffer::nextImpl() { out->nextIfAtEnd(); zstr.next_out = reinterpret_cast(out->position()); - zstr.avail_out = out->buffer().end() - out->position(); + zstr.avail_out = static_cast(out->buffer().end() - out->position()); int rc = deflate(&zstr, Z_NO_FLUSH); out->position() = out->buffer().end() - zstr.avail_out; @@ -96,7 +96,7 @@ void ZlibDeflatingWriteBuffer::finalizeBefore() { out->nextIfAtEnd(); zstr.next_out = reinterpret_cast(out->position()); - zstr.avail_out = out->buffer().end() - out->position(); + zstr.avail_out = static_cast(out->buffer().end() - out->position()); int rc = deflate(&zstr, Z_FULL_FLUSH); out->position() = out->buffer().end() - zstr.avail_out; @@ -110,7 +110,7 @@ void ZlibDeflatingWriteBuffer::finalizeBefore() { out->nextIfAtEnd(); zstr.next_out = reinterpret_cast(out->position()); - zstr.avail_out = out->buffer().end() - out->position(); + zstr.avail_out = static_cast(out->buffer().end() - out->position()); int rc = deflate(&zstr, Z_FINISH); out->position() = out->buffer().end() - zstr.avail_out; diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp index 4cb56bef6b1..9c2ee640cbe 100644 --- a/src/IO/ZlibInflatingReadBuffer.cpp +++ b/src/IO/ZlibInflatingReadBuffer.cpp @@ -61,11 +61,11 @@ bool ZlibInflatingReadBuffer::nextImpl() { in->nextIfAtEnd(); zstr.next_in = reinterpret_cast(in->position()); - zstr.avail_in = in->buffer().end() - in->position(); + zstr.avail_in = static_cast(in->buffer().end() - in->position()); } /// init output bytes (place, where decompressed data will be) zstr.next_out = reinterpret_cast(internal_buffer.begin()); - zstr.avail_out = internal_buffer.size(); + zstr.avail_out = static_cast(internal_buffer.size()); int rc = inflate(&zstr, Z_NO_FLUSH); diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp index 459f486af18..79fb4ccead5 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp @@ -149,7 +149,7 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeZstd() { try { - int err = ZSTD_freeCCtx(cctx); + size_t err = ZSTD_freeCCtx(cctx); /// This is just in case, since it is impossible to get an error by using this wrapper. if (unlikely(err)) throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(err), ZSTD_VERSION_STRING); diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp index 238645b16df..c7f9b0d718b 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingWriteBuffer.cpp @@ -100,7 +100,7 @@ void ZstdDeflatingWriteBuffer::finalizeAfter() { try { - int err = ZSTD_freeCCtx(cctx); + size_t err = ZSTD_freeCCtx(cctx); /// This is just in case, since it is impossible to get an error by using this wrapper. if (unlikely(err)) throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(err), ZSTD_VERSION_STRING); diff --git a/src/IO/readDecimalText.h b/src/IO/readDecimalText.h index 2e06acb2f3e..64374a20574 100644 --- a/src/IO/readDecimalText.h +++ b/src/IO/readDecimalText.h @@ -106,7 +106,7 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp exponent -= places; // TODO: accurate shift10 for big integers - x *= intExp10OfSize(places); + x *= intExp10OfSize(places); places = 0; x += (byte - '0'); diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index b8d0c1ba3c0..a72ff82008e 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -349,11 +349,11 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) constexpr int significant_digits = std::numeric_limits::digits10; readUIntTextUpToNSignificantDigits(before_point, in); - int read_digits = in.count() - count_after_sign; + size_t read_digits = in.count() - count_after_sign; if (unlikely(read_digits > significant_digits)) { - int before_point_additional_exponent = read_digits - significant_digits; + int before_point_additional_exponent = static_cast(read_digits) - significant_digits; x = static_cast(shift10(before_point, before_point_additional_exponent)); } else @@ -377,11 +377,11 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) ++in.position(); auto after_leading_zeros_count = in.count(); - auto after_point_num_leading_zeros = after_leading_zeros_count - after_point_count; + int after_point_num_leading_zeros = static_cast(after_leading_zeros_count - after_point_count); readUIntTextUpToNSignificantDigits(after_point, in); read_digits = in.count() - after_leading_zeros_count; - after_point_exponent = (read_digits > significant_digits ? -significant_digits : -read_digits) - after_point_num_leading_zeros; + after_point_exponent = (read_digits > significant_digits ? -significant_digits : static_cast(-read_digits)) - after_point_num_leading_zeros; } if (checkChar('e', in) || checkChar('E', in)) diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index a27745d2cfa..fea013fd075 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -140,7 +140,7 @@ public: * when we add lots of column with same prefix. One counter for all * prefixes is good enough. */ - int next_unique_suffix; + size_t next_unique_suffix; Data( ContextPtr context_, diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index e9a72ce0156..182f2292b28 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1638,14 +1638,14 @@ Block Aggregator::convertOneBucketToBlock( Method & method, Arena * arena, bool final, - size_t bucket) const + Int32 bucket) const { // Used in ConvertingAggregatedToChunksSource -> ConvertingAggregatedToChunksTransform (expects single chunk for each bucket_id). constexpr bool return_single_block = true; Block block = convertToBlockImpl( method, method.data.impls[bucket], arena, data_variants.aggregates_pools, final, method.data.impls[bucket].size()); - block.info.bucket_num = bucket; + block.info.bucket_num = static_cast(bucket); return block; } @@ -1653,7 +1653,7 @@ Block Aggregator::mergeAndConvertOneBucketToBlock( ManyAggregatedDataVariants & variants, Arena * arena, bool final, - size_t bucket, + Int32 bucket, std::atomic * is_cancelled) const { auto & merged_data = *variants[0]; @@ -1697,7 +1697,7 @@ void Aggregator::writeToTemporaryFileImpl( max_temporary_block_size_bytes = block_size_bytes; }; - for (size_t bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket) + for (UInt32 bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket) { Block block = convertOneBucketToBlock(data_variants, method, data_variants.aggregates_pool, false, bucket); out.write(block); @@ -3159,19 +3159,19 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( selector[i] = bucket; } - size_t num_buckets = destinations.size(); + UInt32 num_buckets = static_cast(destinations.size()); for (size_t column_idx = 0; column_idx < columns; ++column_idx) { const ColumnWithTypeAndName & src_col = source.getByPosition(column_idx); MutableColumns scattered_columns = src_col.column->scatter(num_buckets, selector); - for (size_t bucket = 0, size = num_buckets; bucket < size; ++bucket) + for (UInt32 bucket = 0, size = num_buckets; bucket < size; ++bucket) { if (!scattered_columns[bucket]->empty()) { Block & dst = destinations[bucket]; - dst.info.bucket_num = bucket; + dst.info.bucket_num = static_cast(bucket); dst.insert({std::move(scattered_columns[bucket]), src_col.type, src_col.name}); } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index b8aab7a3343..c81cfa2c0a2 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1301,13 +1301,13 @@ private: Method & method, Arena * arena, bool final, - size_t bucket) const; + Int32 bucket) const; Block mergeAndConvertOneBucketToBlock( ManyAggregatedDataVariants & variants, Arena * arena, bool final, - size_t bucket, + Int32 bucket, std::atomic * is_cancelled = nullptr) const; Block prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 6877c0ece06..b76434b23e7 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -423,7 +423,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, info.all_addresses.push_back(address); auto pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, + static_cast(settings.distributed_connections_pool_size), address.host_name, address.port, address.default_database, address.user, address.password, address.quota_key, address.cluster, address.cluster_secret, @@ -497,7 +497,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, for (const auto & replica : replica_addresses) { auto replica_pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, + static_cast(settings.distributed_connections_pool_size), replica.host_name, replica.port, replica.default_database, replica.user, replica.password, replica.quota_key, replica.cluster, replica.cluster_secret, @@ -585,11 +585,11 @@ Cluster::Cluster( for (const auto & replica : current) { auto replica_pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, - replica.host_name, replica.port, - replica.default_database, replica.user, replica.password, replica.quota_key, - replica.cluster, replica.cluster_secret, - "server", replica.compression, replica.secure, replica.priority); + static_cast(settings.distributed_connections_pool_size), + replica.host_name, replica.port, + replica.default_database, replica.user, replica.password, replica.quota_key, + replica.cluster, replica.cluster_secret, + "server", replica.compression, replica.secure, replica.priority); all_replicas.emplace_back(replica_pool); if (replica.is_local && !treat_local_as_remote) shard_local_addresses.push_back(replica); @@ -693,7 +693,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti info.all_addresses.push_back(address); auto pool = ConnectionPoolFactory::instance().get( - settings.distributed_connections_pool_size, + static_cast(settings.distributed_connections_pool_size), address.host_name, address.port, address.default_database, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index fce2e9b2f08..4653491aac9 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -69,7 +69,7 @@ void SelectStreamFactory::createForShard( query_ast, header, context, processed_stage, shard_info.shard_num, shard_count, /*replica_num=*/0, /*replica_count=*/0, /*coordinator=*/nullptr)); }; - auto emplace_remote_stream = [&](bool lazy = false, UInt32 local_delay = 0) + auto emplace_remote_stream = [&](bool lazy = false, time_t local_delay = 0) { remote_shards.emplace_back(Shard{ .query = query_ast, @@ -131,7 +131,7 @@ void SelectStreamFactory::createForShard( return; } - UInt32 local_delay = replicated_storage->getAbsoluteDelay(); + UInt64 local_delay = replicated_storage->getAbsoluteDelay(); if (local_delay < max_allowed_delay) { @@ -205,7 +205,7 @@ SelectStreamFactory::ShardPlans SelectStreamFactory::createForShardWithParallelR if (!max_allowed_delay) return false; - UInt32 local_delay = replicated_storage->getAbsoluteDelay(); + UInt64 local_delay = replicated_storage->getAbsoluteDelay(); return local_delay >= max_allowed_delay; }; diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 440017a8e80..8ebddea4988 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -46,7 +46,7 @@ public: /// If we connect to replicas lazily. /// (When there is a local replica with big delay). bool lazy = false; - UInt32 local_delay = 0; + time_t local_delay = 0; }; using Shards = std::vector; diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 923b4a767b7..0a62e04e090 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -180,7 +180,7 @@ void executeQuery( stream_factory.createForShard(shard_info, query_ast_for_shard, main_table, table_func_ptr, - new_context, plans, remote_shards, shards); + new_context, plans, remote_shards, static_cast(shards)); } if (!remote_shards.empty()) @@ -284,7 +284,8 @@ void executeQueryWithParallelReplicas( query_ast_for_shard = query_ast; auto shard_plans = stream_factory.createForShardWithParallelReplicas(shard_info, - query_ast_for_shard, main_table, table_func_ptr, throttler, context, shards, query_info.storage_limits); + query_ast_for_shard, main_table, table_func_ptr, throttler, context, + static_cast(shards), query_info.storage_limits); if (!shard_plans.local_plan && !shard_plans.remote_plan) throw Exception(ErrorCodes::LOGICAL_ERROR, "No plans were generated for reading from shard. This is a bug"); diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index b7ad56dca91..cc79a71245b 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -38,7 +38,7 @@ static UInt32 toPowerOfTwo(UInt32 x) ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) : context(context_) , table_join(table_join_) - , slots(toPowerOfTwo(std::min(slots_, 256))) + , slots(toPowerOfTwo(std::min(static_cast(slots_), 256))) { for (size_t i = 0; i < slots; ++i) { diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 0e4c658a1ee..f1fa0955c34 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -498,7 +498,7 @@ bool DDLWorker::tryExecuteQuery(const String & query, DDLTaskBase & task, const void DDLWorker::updateMaxDDLEntryID(const String & entry_name) { - UInt64 id = DDLTaskBase::getLogEntryNumber(entry_name); + UInt32 id = DDLTaskBase::getLogEntryNumber(entry_name); auto prev_id = max_id.load(std::memory_order_relaxed); while (prev_id < id) { diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index e3c1fa4c271..5aea460ad2e 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -159,7 +159,7 @@ protected: /// How many tasks could be in the queue size_t max_tasks_in_queue = 1000; - std::atomic max_id = 0; + std::atomic max_id = 0; const CurrentMetrics::Metric * max_entry_metric; const CurrentMetrics::Metric * max_pushed_entry_metric; }; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index ab6f088ac69..7ceb0bf3a00 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -144,9 +144,9 @@ StoragePtr TemporaryTableHolder::getTable() const void DatabaseCatalog::initializeAndLoadTemporaryDatabase() { drop_delay_sec = getContext()->getConfigRef().getInt("database_atomic_delay_before_drop_table_sec", default_drop_delay_sec); - unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec); - unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); - unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); + unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec); + unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); + unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 74ff055efd4..f41ae31b239 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1698,8 +1698,12 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr return; auto ast_storage = std::make_shared(); - auto query_from_storage = DB::getCreateQueryFromStorage(storage, ast_storage, false, - getContext()->getSettingsRef().max_parser_depth, true); + unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + auto query_from_storage = DB::getCreateQueryFromStorage(storage, + ast_storage, + false, + max_parser_depth, + true); auto & create_query_from_storage = query_from_storage->as(); if (!create.columns_list) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 51a3dde261a..107740c3b96 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -292,7 +292,7 @@ Chain InterpreterInsertQuery::buildChainImpl( out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0)); + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } auto counting = std::make_shared(out.getInputHeader(), thread_status, getContext()->getQuota()); diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index d3a7eb0cfaa..e12b4894eb0 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -614,7 +614,7 @@ static void compileSortDescription(llvm::Module & module, /** Join results from all comparator steps. * Result of columns comparison equals to first compare block where lhs is not equal to lhs or last compare block. */ - auto * compare_result_phi = b.CreatePHI(b.getInt8Ty(), comparator_steps_and_results.size()); + auto * compare_result_phi = b.CreatePHI(b.getInt8Ty(), static_cast(comparator_steps_and_results.size())); for (const auto & [block, result_value] : comparator_steps_and_results) compare_result_phi->addIncoming(result_value, block); diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index 40c42f7728e..a0fdafc976c 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -45,8 +45,7 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, SelectUnionModesSet current_set_of_modes; bool distinct_found = false; - int i; - for (i = union_modes.size() - 1; i >= 0; --i) + for (ssize_t i = union_modes.size() - 1; i >= 0; --i) { current_set_of_modes.insert(union_modes[i]); if (const auto * union_ast = typeid_cast(select_list[i + 1].get())) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index d5194a02513..d7e0ad23643 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -243,7 +243,12 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as } auto process_it = processes.emplace(processes.end(), - query_context, query_, client_info, priorities.insert(settings.priority), std::move(thread_group), query_kind); + query_context, + query_, + client_info, + priorities.insert(static_cast(settings.priority)), + std::move(thread_group), + query_kind); increaseQueryKindAmount(query_kind); diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 68076e1fec2..0e553ef145e 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -89,7 +89,7 @@ public: assert(!sorted.load(std::memory_order_acquire)); - entries.emplace_back(key, row_refs.size()); + entries.emplace_back(key, static_cast(row_refs.size())); row_refs.emplace_back(RowRef(block, row_num)); } diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 2c9f2062a82..197ac3ca00f 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -29,7 +29,10 @@ struct RowRef SizeT row_num = 0; RowRef() = default; - RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {} + RowRef(const Block * block_, size_t row_num_) + : block(block_) + , row_num(static_cast(row_num_)) + {} }; /// Single linked list of references to rows. Used for ALL JOINs (non-unique JOINs) diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 3eb93f1f20e..c5ae6f6c885 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; } -void TemporaryDataOnDiskScope::deltaAllocAndCheck(int compressed_delta, int uncompressed_delta) +void TemporaryDataOnDiskScope::deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta) { if (parent) parent->deltaAllocAndCheck(compressed_delta, uncompressed_delta); diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index 81bd2067650..11edc8700d2 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -53,7 +53,7 @@ public: VolumePtr getVolume() const { return volume; } protected: - void deltaAllocAndCheck(int compressed_delta, int uncompressed_delta); + void deltaAllocAndCheck(ssize_t compressed_delta, ssize_t uncompressed_delta); TemporaryDataOnDiskScopePtr parent = nullptr; VolumePtr volume; diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 4810174e395..ee126f2da11 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -55,12 +55,12 @@ void ThreadStatus::applyQuerySettings() #if defined(OS_LINUX) /// Set "nice" value if required. - Int32 new_os_thread_priority = settings.os_thread_priority; + Int32 new_os_thread_priority = static_cast(settings.os_thread_priority); if (new_os_thread_priority && hasLinuxCapability(CAP_SYS_NICE)) { LOG_TRACE(log, "Setting nice to {}", new_os_thread_priority); - if (0 != setpriority(PRIO_PROCESS, thread_id, new_os_thread_priority)) + if (0 != setpriority(PRIO_PROCESS, static_cast(thread_id), new_os_thread_priority)) throwFromErrno("Cannot 'setpriority'", ErrorCodes::CANNOT_SET_THREAD_PRIORITY); os_thread_priority = new_os_thread_priority; @@ -349,7 +349,7 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) { LOG_TRACE(log, "Resetting nice"); - if (0 != setpriority(PRIO_PROCESS, thread_id, 0)) + if (0 != setpriority(PRIO_PROCESS, static_cast(thread_id), 0)) LOG_ERROR(log, "Cannot 'setpriority' back to zero: {}", errnoToString()); os_thread_priority = 0; diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 55156cde7be..9ba171d2665 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -88,7 +88,7 @@ Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & typ if (!type.canStoreWhole(value)) throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - T scaled_value = type.getScaleMultiplier() * static_cast(value); + T scaled_value = type.getScaleMultiplier() * T(static_cast(value)); return DecimalField(scaled_value, type.getScale()); } diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index b1a9a4e9e35..fce9833ddfb 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -50,7 +50,7 @@ IColumn::Selector createBlockSelector( /// libdivide support only UInt32 and UInt64. using TUInt32Or64 = std::conditional_t; - libdivide::divider divider(total_weight); + libdivide::divider divider(static_cast(total_weight)); const auto & data = typeid_cast &>(column).getData(); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index abca563de55..1a9940307d3 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -944,9 +944,10 @@ static std::tuple executeQueryImpl( processor_elem.processor_name = processor->getName(); - processor_elem.elapsed_us = processor->getElapsedUs(); - processor_elem.input_wait_elapsed_us = processor->getInputWaitElapsedUs(); - processor_elem.output_wait_elapsed_us = processor->getOutputWaitElapsedUs(); + /// NOTE: convert this to UInt64 + processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); auto stats = processor->getProcessorDataStats(); processor_elem.input_rows = stats.input_rows; diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp index 90278e27c0c..86dd85e0eb8 100644 --- a/src/Parsers/ASTTTLElement.cpp +++ b/src/Parsers/ASTTTLElement.cpp @@ -93,7 +93,7 @@ void ASTTTLElement::setExpression(int & pos, ASTPtr && ast) { if (pos == -1) { - pos = children.size(); + pos = static_cast(children.size()); children.emplace_back(ast); } else diff --git a/src/Parsers/ParserSampleRatio.cpp b/src/Parsers/ParserSampleRatio.cpp index 2f444bcf9e8..b6be04cbcc0 100644 --- a/src/Parsers/ParserSampleRatio.cpp +++ b/src/Parsers/ParserSampleRatio.cpp @@ -14,7 +14,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat { UInt64 num_before = 0; UInt64 num_after = 0; - Int64 exponent = 0; + Int32 exponent = 0; const char * pos_after_first_num = tryReadIntText(num_before, pos, end); @@ -28,12 +28,12 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat if (!has_num_before_point && !has_point) return false; - size_t number_of_digits_after_point = 0; + int number_of_digits_after_point = 0; if (has_point) { const char * pos_after_second_num = tryReadIntText(num_after, pos, end); - number_of_digits_after_point = pos_after_second_num - pos; + number_of_digits_after_point = static_cast(pos_after_second_num - pos); pos = pos_after_second_num; } diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index af8c9dc58a6..4a0c60da48d 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -236,7 +236,8 @@ ASTPtr tryParseQuery( { const char * query_begin = _out_query_end; Tokens tokens(query_begin, all_queries_end, max_query_size); - IParser::Pos token_iterator(tokens, max_parser_depth); + /// NOTE: consider use UInt32 for max_parser_depth setting. + IParser::Pos token_iterator(tokens, static_cast(max_parser_depth)); if (token_iterator->isEnd() || token_iterator->type == TokenType::Semicolon) diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 52395338279..abd91ddcf35 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -188,7 +188,7 @@ Chunk IRowInputFormat::generate() } e.setFileName(getFileNameFromReadBuffer(getReadBuffer())); - e.setLineNumber(total_rows); + e.setLineNumber(static_cast(total_rows)); e.addMessage(verbose_diagnostic); throw; } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 70510a165e6..e9b01ec7dda 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -82,7 +82,7 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); if (chunk->length() == 0) @@ -108,7 +108,7 @@ static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & column_offsets = assert_cast(*internal_column).getOffsets(); size_t chars_t_size = 0; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { ArrowArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); const size_t chunk_length = chunk.length(); @@ -123,7 +123,7 @@ static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptrlength()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { ArrowArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); std::shared_ptr buffer = chunk.value_data(); @@ -151,7 +151,7 @@ static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::BooleanArray & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); if (chunk.length() == 0) @@ -173,7 +173,7 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr & column_data = assert_cast &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::Date32Array & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); @@ -198,7 +198,7 @@ static ColumnWithTypeAndName readColumnWithDate64Data(std::shared_ptr &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) @@ -219,7 +219,7 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr &>(*internal_column).getData(); column_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { const auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) @@ -239,7 +239,7 @@ static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptrcreateColumn(); internal_column->reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); if (chunk.length() == 0) @@ -272,7 +272,7 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptrlength()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) @@ -308,7 +308,7 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr PaddedPODArray & bytemap_data = assert_cast &>(*nullmap_column).getData(); bytemap_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0; chunk_i != static_cast(arrow_column->num_chunks()); ++chunk_i) + for (int chunk_i = 0; chunk_i != arrow_column->num_chunks(); ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); @@ -324,7 +324,7 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr &>(*offsets_column).getData(); offsets_data.reserve(arrow_column->length()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::ListArray & list_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); auto arrow_offsets_array = list_chunk.offsets(); @@ -356,7 +356,7 @@ static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptrlength()); NumericType shift = is_nullable ? 2 : 1; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { std::shared_ptr chunk = arrow_column->chunk(chunk_i); if (chunk->length() == 0) @@ -450,7 +450,8 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr # define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \ case ARROW_NUMERIC_TYPE: \ { \ - return readColumnWithIndexesDataImpl(arrow_column, "", default_value_index, dict_size, is_nullable).column; \ + return readColumnWithIndexesDataImpl(\ + arrow_column, "", default_value_index, static_cast(dict_size), is_nullable).column; \ } FOR_ARROW_INDEXES_TYPES(DISPATCH) # undef DISPATCH @@ -463,7 +464,7 @@ static std::shared_ptr getNestedArrowColumn(std::shared_ptr { arrow::ArrayVector array_vector; array_vector.reserve(arrow_column->num_chunks()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::ListArray & list_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); std::shared_ptr chunk = list_chunk.values(); @@ -582,7 +583,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_type = arrow_column->type(); auto * arrow_struct_type = assert_cast(arrow_type.get()); std::vector nested_arrow_columns(arrow_struct_type->num_fields()); - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::StructArray & struct_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (int i = 0; i < arrow_struct_type->num_fields(); ++i) @@ -631,7 +632,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( if (!dict_info.values) { arrow::ArrayVector dict_array; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::DictionaryArray & dict_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); dict_array.emplace_back(dict_chunk.dictionary()); @@ -656,7 +657,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } arrow::ArrayVector indexes_array; - for (size_t chunk_i = 0, num_chunks = static_cast(arrow_column->num_chunks()); chunk_i < num_chunks; ++chunk_i) + for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) { arrow::DictionaryArray & dict_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); indexes_array.emplace_back(dict_chunk.indices()); diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 0ec04c61321..abb23d68334 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -283,14 +283,15 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (root_node->leaves() == 2 && (root_node->leafAt(0)->type() == avro::AVRO_NULL || root_node->leafAt(1)->type() == avro::AVRO_NULL)) { - size_t non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0; + int non_null_union_index = root_node->leafAt(0)->type() == avro::AVRO_NULL ? 1 : 0; if (target.isNullable()) { - auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), removeNullable(target_type)); + auto nested_deserialize = this->createDeserializeFn( + root_node->leafAt(non_null_union_index), removeNullable(target_type)); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { ColumnNullable & col = assert_cast(column); - size_t union_index = decoder.decodeUnionIndex(); + int union_index = static_cast(decoder.decodeUnionIndex()); if (union_index == non_null_union_index) { nested_deserialize(col.getNestedColumn(), decoder); @@ -308,7 +309,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node auto nested_deserialize = this->createDeserializeFn(root_node->leafAt(non_null_union_index), target_type); return [non_null_union_index, nested_deserialize](IColumn & column, avro::Decoder & decoder) { - size_t union_index = decoder.decodeUnionIndex(); + int union_index = static_cast(decoder.decodeUnionIndex()); if (union_index == non_null_union_index) nested_deserialize(column, decoder); else @@ -345,7 +346,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (target.isString()) { std::vector symbols; - for (size_t i = 0; i < root_node->names(); ++i) + for (int i = 0; i < static_cast(root_node->names()); ++i) { symbols.push_back(root_node->nameAt(i)); } @@ -360,7 +361,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { const auto & enum_type = dynamic_cast(*target_type); Row symbol_mapping; - for (size_t i = 0; i < root_node->names(); ++i) + for (int i = 0; i < static_cast(root_node->names()); ++i) { symbol_mapping.push_back(enum_type.castToValue(root_node->nameAt(i))); } @@ -397,7 +398,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (root_node->leaves() != nested_types.size()) throw Exception(ErrorCodes::INCORRECT_DATA, "The number of leaves in record doesn't match the number of elements in tuple"); - for (size_t i = 0; i != root_node->leaves(); ++i) + for (int i = 0; i != static_cast(root_node->leaves()); ++i) { const auto & name = root_node->nameAt(i); size_t pos = tuple_type.getPositionByName(name); @@ -505,7 +506,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_UNION: { std::vector union_skip_fns; - for (size_t i = 0; i < root_node->leaves(); ++i) + for (int i = 0; i < static_cast(root_node->leaves()); ++i) { union_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } @@ -546,7 +547,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_RECORD: { std::vector field_skip_fns; - for (size_t i = 0; i < root_node->leaves(); ++i) + for (int i = 0; i < static_cast(root_node->leaves()); ++i) { field_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); } @@ -633,7 +634,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co const auto & column = header.getByPosition(target_column_idx); try { - AvroDeserializer::Action action(target_column_idx, createDeserializeFn(node, column.type)); + AvroDeserializer::Action action(static_cast(target_column_idx), createDeserializeFn(node, column.type)); column_found[target_column_idx] = true; return action; } @@ -646,7 +647,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co else if (node->type() == avro::AVRO_RECORD) { std::vector field_actions(node->leaves()); - for (size_t i = 0; i < node->leaves(); ++i) + for (int i = 0; i < static_cast(node->leaves()); ++i) { const auto & field_node = node->leafAt(i); const auto & field_name = node->nameAt(i); @@ -657,7 +658,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co else if (node->type() == avro::AVRO_UNION) { std::vector branch_actions(node->leaves()); - for (size_t i = 0; i < node->leaves(); ++i) + for (int i = 0; i < static_cast(node->leaves()); ++i) { const auto & branch_node = node->leafAt(i); const auto & branch_name = nodeName(branch_node); @@ -687,7 +688,7 @@ AvroDeserializer::Action AvroDeserializer::createAction(const Block & header, co /// Create nested deserializer for each nested column. std::vector nested_deserializers; std::vector nested_indexes; - for (size_t i = 0; i != nested_avro_node->leaves(); ++i) + for (int i = 0; i != static_cast(nested_avro_node->leaves()); ++i) { const auto & name = nested_avro_node->nameAt(i); if (!nested_types.contains(name)) @@ -970,7 +971,7 @@ NamesAndTypesList AvroSchemaReader::readSchema() throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); NamesAndTypesList names_and_types; - for (size_t i = 0; i != root_node->leaves(); ++i) + for (int i = 0; i != static_cast(root_node->leaves()); ++i) names_and_types.emplace_back(root_node->nameAt(i), avroNodeToDataType(root_node->leafAt(i))); return names_and_types; @@ -999,14 +1000,14 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) if (node->names() < 128) { EnumValues::Values values; - for (size_t i = 0; i != node->names(); ++i) + for (int i = 0; i != static_cast(node->names()); ++i) values.emplace_back(node->nameAt(i), i); return std::make_shared(std::move(values)); } else if (node->names() < 32768) { EnumValues::Values values; - for (size_t i = 0; i != node->names(); ++i) + for (int i = 0; i != static_cast(node->names()); ++i) values.emplace_back(node->nameAt(i), i); return std::make_shared(std::move(values)); } @@ -1022,7 +1023,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) case avro::Type::AVRO_UNION: if (node->leaves() == 2 && (node->leafAt(0)->type() == avro::Type::AVRO_NULL || node->leafAt(1)->type() == avro::Type::AVRO_NULL)) { - size_t nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0; + int nested_leaf_index = node->leafAt(0)->type() == avro::Type::AVRO_NULL ? 1 : 0; auto nested_type = avroNodeToDataType(node->leafAt(nested_leaf_index)); return nested_type->canBeInsideNullable() ? makeNullable(nested_type) : nested_type; } @@ -1035,7 +1036,7 @@ DataTypePtr AvroSchemaReader::avroNodeToDataType(avro::NodePtr node) nested_types.reserve(node->leaves()); Names nested_names; nested_names.reserve(node->leaves()); - for (size_t i = 0; i != node->leaves(); ++i) + for (int i = 0; i != static_cast(node->leaves()); ++i) { nested_types.push_back(avroNodeToDataType(node->leafAt(i))); nested_names.push_back(node->nameAt(i)); diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index b63b1e7b9b1..e3d570d1876 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -193,7 +193,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF case TypeIndex::FixedString: { auto size = data_type->getSizeOfValueInMemory(); - auto schema = avro::FixedSchema(size, "fixed_" + toString(type_name_increment)); + auto schema = avro::FixedSchema(static_cast(size), "fixed_" + toString(type_name_increment)); return {schema, [](const IColumn & column, size_t row_num, avro::Encoder & encoder) { const std::string_view & s = assert_cast(column).getDataAt(row_num).toView(); diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 9e423f8a96b..87fff16c107 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -224,7 +224,14 @@ namespace DB for (size_t i = 0; i != column_tuple->tupleSize(); ++i) { ColumnPtr nested_column = column_tuple->getColumnPtr(i); - fillArrowArray(column_name + "." + nested_names[i], nested_column, nested_types[i], null_bytemap, builder.field_builder(i), format_name, start, end, output_string_as_string, dictionary_values); + fillArrowArray( + column_name + "." + nested_names[i], + nested_column, nested_types[i], null_bytemap, + builder.field_builder(static_cast(i)), + format_name, + start, end, + output_string_as_string, + dictionary_values); } for (size_t i = start; i != end; ++i) @@ -370,7 +377,7 @@ namespace DB else { std::string_view string_ref = internal_column.getDataAt(string_i).toView(); - status = builder.Append(string_ref.data(), string_ref.size()); + status = builder.Append(string_ref.data(), static_cast(string_ref.size())); } checkStatus(status, write_column->getName(), format_name); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index c6f8742455e..08d2cac743a 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -91,7 +91,7 @@ static void insertSignedInteger(IColumn & column, const DataTypePtr & column_typ assert_cast(column).insertValue(value); break; case TypeIndex::Int32: - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; case TypeIndex::Int64: assert_cast(column).insertValue(value); @@ -117,7 +117,7 @@ static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_t break; case TypeIndex::DateTime: [[fallthrough]]; case TypeIndex::UInt32: - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; case TypeIndex::UInt64: assert_cast(column).insertValue(value); diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index fd33abfb587..654917b6357 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -79,7 +79,7 @@ static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & col if (const auto * array_column = checkAndGetColumn(*column)) { size_t size = array_column->getOffsets()[row_num] - array_column->getOffsets()[row_num - 1]; - return struct_builder.init(field, size); + return struct_builder.init(field, static_cast(size)); } if (field.getType().isStruct()) @@ -200,7 +200,7 @@ static std::optional convertToDynamicValue( size_t size = offsets[row_num] - offset; const auto * nested_array_column = checkAndGetColumn(*nested_column); - for (size_t i = 0; i != size; ++i) + for (unsigned i = 0; i != static_cast(size); ++i) { capnp::DynamicValue::Builder value_builder; /// For nested arrays we need to initialize nested list builder. @@ -208,7 +208,7 @@ static std::optional convertToDynamicValue( { const auto & nested_offset = nested_array_column->getOffsets(); size_t nested_array_size = nested_offset[offset + i] - nested_offset[offset + i - 1]; - value_builder = list_builder.init(i, nested_array_size); + value_builder = list_builder.init(i, static_cast(nested_array_size)); } else value_builder = list_builder[i]; diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 739fa8735b2..bc363e5aa98 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -502,7 +502,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType(ReadBuffer & istr, co ParserTupleOfLiterals parser_tuple; Tokens tokens_number(istr.position(), istr.buffer().end()); - IParser::Pos iterator(tokens_number, settings.max_parser_depth); + IParser::Pos iterator(tokens_number, static_cast(settings.max_parser_depth)); Expected expected; ASTPtr ast; if (!parser_array.parse(iterator, ast, expected) && !parser_tuple.parse(iterator, ast, expected)) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 931a7587903..80fdda687e2 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -128,7 +128,7 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value) case TypeIndex::DateTime: [[fallthrough]]; case TypeIndex::UInt32: { - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; } case TypeIndex::UInt64: @@ -148,7 +148,7 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value) } case TypeIndex::Int32: { - assert_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value)); break; } case TypeIndex::Int64: diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index a470e193300..da683913d4d 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -99,15 +99,15 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr case TypeIndex::String: { const std::string_view & string = assert_cast(column).getDataAt(row_num).toView(); - packer.pack_bin(string.size()); - packer.pack_bin_body(string.data(), string.size()); + packer.pack_bin(static_cast(string.size())); + packer.pack_bin_body(string.data(), static_cast(string.size())); return; } case TypeIndex::FixedString: { const std::string_view & string = assert_cast(column).getDataAt(row_num).toView(); - packer.pack_bin(string.size()); - packer.pack_bin_body(string.data(), string.size()); + packer.pack_bin(static_cast(string.size())); + packer.pack_bin_body(string.data(), static_cast(string.size())); return; } case TypeIndex::Array: @@ -118,7 +118,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr const ColumnArray::Offsets & offsets = column_array.getOffsets(); size_t offset = offsets[row_num - 1]; size_t size = offsets[row_num] - offset; - packer.pack_array(size); + packer.pack_array(static_cast(size)); for (size_t i = 0; i < size; ++i) { serializeField(nested_column, nested_type, offset + i); @@ -152,7 +152,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr const auto & offsets = nested_column.getOffsets(); size_t offset = offsets[row_num - 1]; size_t size = offsets[row_num] - offset; - packer.pack_map(size); + packer.pack_map(static_cast(size)); for (size_t i = 0; i < size; ++i) { serializeField(*key_column, map_type.getKeyType(), offset + i); @@ -179,8 +179,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr WriteBufferFromOwnString buf; writeBinary(uuid_column.getElement(row_num), buf); std::string_view uuid_bin = buf.stringView(); - packer.pack_bin(uuid_bin.size()); - packer.pack_bin_body(uuid_bin.data(), uuid_bin.size()); + packer.pack_bin(static_cast(uuid_bin.size())); + packer.pack_bin_body(uuid_bin.data(), static_cast(uuid_bin.size())); return; } case FormatSettings::MsgPackUUIDRepresentation::STR: @@ -188,8 +188,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr WriteBufferFromOwnString buf; writeText(uuid_column.getElement(row_num), buf); std::string_view uuid_text = buf.stringView(); - packer.pack_str(uuid_text.size()); - packer.pack_bin_body(uuid_text.data(), uuid_text.size()); + packer.pack_str(static_cast(uuid_text.size())); + packer.pack_bin_body(uuid_text.data(), static_cast(uuid_text.size())); return; } case FormatSettings::MsgPackUUIDRepresentation::EXT: @@ -200,7 +200,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr writeBinaryBigEndian(value.toUnderType().items[1], buf); std::string_view uuid_ext = buf.stringView(); packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUIDType)); - packer.pack_ext_body(uuid_ext.data(), uuid_ext.size()); + packer.pack_ext_body(uuid_ext.data(), static_cast(uuid_ext.size())); return; } } diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 344c5c179db..b6b1b875add 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -65,7 +65,7 @@ void MySQLOutputFormat::consume(Chunk chunk) { for (size_t i = 0; i < chunk.getNumRows(); ++i) { - ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), i); + ProtocolText::ResultSetRow row_packet(serializations, chunk.getColumns(), static_cast(i)); packet_endpoint->sendPacket(row_packet); } } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 36126c21bf1..d6dbd69135a 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -136,7 +136,7 @@ void ORCBlockInputFormat::prepareReader() if (is_stopped) return; - stripe_total = file_reader->NumberOfStripes(); + stripe_total = static_cast(file_reader->NumberOfStripes()); stripe_current = 0; arrow_column_to_ch_column = std::make_unique( @@ -159,7 +159,7 @@ void ORCBlockInputFormat::prepareReader() { /// LIST type require 2 indices, STRUCT - the number of elements + 1, /// so we should recursively count the number of indices we need for this type. - int indexes_count = countIndicesForType(schema->field(i)->type()); + int indexes_count = static_cast(countIndicesForType(schema->field(i)->type())); const auto & name = schema->field(i)->name(); if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 2f54cca466c..9172c79c890 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -128,8 +128,9 @@ void ParallelParsingInputFormat::onBackgroundException(size_t offset) background_exception = std::current_exception(); if (ParsingException * e = exception_cast(background_exception)) { + /// NOTE: it is not that safe to use line number hack here (may exceed INT_MAX) if (e->getLineNumber() != -1) - e->setLineNumber(e->getLineNumber() + offset); + e->setLineNumber(static_cast(e->getLineNumber() + offset)); auto file_name = getFileNameFromReadBuffer(getReadBuffer()); if (!file_name.empty()) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 427c159314b..dd2826287b2 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -161,7 +161,7 @@ void ParquetBlockInputFormat::prepareReader() /// STRUCT type require the number of indexes equal to the number of /// nested elements, so we should recursively /// count the number of indices we need for this type. - int indexes_count = countIndicesForType(schema->field(i)->type()); + int indexes_count = static_cast(countIndicesForType(schema->field(i)->type())); const auto & name = schema->field(i)->name(); if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index c26b6b39e0d..6eacfe621e1 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -50,7 +50,11 @@ bool RegexpFieldExtractor::parseRow(PeekableReadBuffer & buf) if (line_size > 0 && buf.position()[line_size - 1] == '\r') --line_to_match; - bool match = re2_st::RE2::FullMatchN(re2_st::StringPiece(buf.position(), line_to_match), regexp, re2_arguments_ptrs.data(), re2_arguments_ptrs.size()); + bool match = re2_st::RE2::FullMatchN( + re2_st::StringPiece(buf.position(), line_to_match), + regexp, + re2_arguments_ptrs.data(), + static_cast(re2_arguments_ptrs.size())); if (!match && !skip_unmatched) throw Exception("Line \"" + std::string(buf.position(), line_to_match) + "\" doesn't match the regexp.", ErrorCodes::INCORRECT_DATA); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index aff4557a4b7..72e89ea0013 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -350,7 +350,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx Expected expected; Tokens tokens(buf->position(), buf->buffer().end()); - IParser::Pos token_iterator(tokens, settings.max_parser_depth); + IParser::Pos token_iterator(tokens, static_cast(settings.max_parser_depth)); ASTPtr ast; bool parsed = parser.parse(token_iterator, ast, expected); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index b340073e73d..59f9668d3a8 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -853,7 +853,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, const Names & real_column_names, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 0a013748e91..15258eb6c40 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -140,7 +140,7 @@ public: const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read, const MergeTreeData & data, const Names & real_column_names, diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index e868182f49b..ecc80bef40b 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -141,7 +141,7 @@ namespace read_bytes_size += 2; break; case ValueType::vtUInt32: - assert_cast(column).insertValue(value.getUInt()); + assert_cast(column).insertValue(static_cast(value.getUInt())); read_bytes_size += 4; break; case ValueType::vtUInt64: @@ -171,7 +171,7 @@ namespace read_bytes_size += 2; break; case ValueType::vtInt32: - assert_cast(column).insertValue(value.getInt()); + assert_cast(column).insertValue(static_cast(value.getInt())); read_bytes_size += 4; break; case ValueType::vtInt64: @@ -236,7 +236,7 @@ namespace readDateTimeText(time, in, assert_cast(data_type).getTimeZone()); if (time < 0) time = 0; - assert_cast(column).insertValue(time); + assert_cast(column).insertValue(static_cast(time)); read_bytes_size += 4; break; } diff --git a/src/Processors/Sources/SQLiteSource.cpp b/src/Processors/Sources/SQLiteSource.cpp index d2c6f2ebb23..79c4be7f692 100644 --- a/src/Processors/Sources/SQLiteSource.cpp +++ b/src/Processors/Sources/SQLiteSource.cpp @@ -35,7 +35,11 @@ SQLiteSource::SQLiteSource( description.init(sample_block); sqlite3_stmt * compiled_stmt = nullptr; - int status = sqlite3_prepare_v2(sqlite_db.get(), query_str.c_str(), query_str.size() + 1, &compiled_stmt, nullptr); + int status = sqlite3_prepare_v2( + sqlite_db.get(), + query_str.c_str(), + static_cast(query_str.size() + 1), + &compiled_stmt, nullptr); if (status != SQLITE_OK) throw Exception(ErrorCodes::SQLITE_ENGINE_ERROR, @@ -109,7 +113,7 @@ Chunk SQLiteSource::generate() return Chunk(std::move(columns), num_rows); } -void SQLiteSource::insertValue(IColumn & column, ExternalResultDescription::ValueType type, size_t idx) +void SQLiteSource::insertValue(IColumn & column, ExternalResultDescription::ValueType type, int idx) { switch (type) { @@ -120,7 +124,7 @@ void SQLiteSource::insertValue(IColumn & column, ExternalResultDescription::Valu assert_cast(column).insertValue(sqlite3_column_int(compiled_statement.get(), idx)); break; case ValueType::vtUInt32: - assert_cast(column).insertValue(sqlite3_column_int64(compiled_statement.get(), idx)); + assert_cast(column).insertValue(static_cast(sqlite3_column_int64(compiled_statement.get(), idx))); break; case ValueType::vtUInt64: /// There is no uint64 in sqlite3, only int and int64 diff --git a/src/Processors/Sources/SQLiteSource.h b/src/Processors/Sources/SQLiteSource.h index d792483c70f..c1bae4d8a67 100644 --- a/src/Processors/Sources/SQLiteSource.h +++ b/src/Processors/Sources/SQLiteSource.h @@ -33,7 +33,7 @@ private: Chunk generate() override; - void insertValue(IColumn & column, ExternalResultDescription::ValueType type, size_t idx); + void insertValue(IColumn & column, ExternalResultDescription::ValueType type, int idx); String query_str; UInt64 max_block_size; diff --git a/src/Processors/Sources/ShellCommandSource.cpp b/src/Processors/Sources/ShellCommandSource.cpp index 8598b0197fc..3f70abaea6d 100644 --- a/src/Processors/Sources/ShellCommandSource.cpp +++ b/src/Processors/Sources/ShellCommandSource.cpp @@ -77,7 +77,7 @@ static bool pollFd(int fd, size_t timeout_milliseconds, int events) while (true) { - res = poll(&pfd, 1, timeout_milliseconds); + res = poll(&pfd, 1, static_cast(timeout_milliseconds)); if (res < 0) { @@ -527,7 +527,7 @@ Pipe ShellCommandSourceCoordinator::createPipe( } else { - auto descriptor = i + 2; + int descriptor = static_cast(i) + 2; auto it = process->write_fds.find(descriptor); if (it == process->write_fds.end()) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Process does not contain descriptor to write {}", descriptor); diff --git a/src/Processors/TTL/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp index 489941950b5..c71ad740719 100644 --- a/src/Processors/TTL/ITTLAlgorithm.cpp +++ b/src/Processors/TTL/ITTLAlgorithm.cpp @@ -48,13 +48,13 @@ ColumnPtr ITTLAlgorithm::executeExpressionAndGetColumn( UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) const { if (const ColumnUInt16 * column_date = typeid_cast(column)) - return date_lut.fromDayNum(DayNum(column_date->getData()[index])); + return static_cast(date_lut.fromDayNum(DayNum(column_date->getData()[index]))); else if (const ColumnUInt32 * column_date_time = typeid_cast(column)) return column_date_time->getData()[index]; else if (const ColumnConst * column_const = typeid_cast(column)) { if (typeid_cast(&column_const->getDataColumn())) - return date_lut.fromDayNum(DayNum(column_const->getValue())); + return static_cast(date_lut.fromDayNum(DayNum(column_const->getValue()))); else if (typeid_cast(&column_const->getDataColumn())) return column_const->getValue(); } diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 16abb72cbd4..78ae6b8771f 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -40,7 +40,9 @@ static FillColumnDescription::StepFunction getStepFunction( { #define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ - return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(static_cast(field.get()), step, date_lut, scale); }; + return [step, scale, &date_lut](Field & field) { \ + field = Add##NAME##sImpl::execute(static_cast(\ + field.get()), static_cast(step), date_lut, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index b548e8c4184..9eef17cf40d 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -184,7 +184,7 @@ namespace break; case Poco::MongoDB::ElementTraits::TypeId: assert_cast &>(column).getData().push_back( - static_cast &>(value).value()); + static_cast(static_cast &>(value).value())); break; case Poco::MongoDB::ElementTraits::TypeId: assert_cast &>(column).getData().push_back(static_cast( @@ -282,7 +282,7 @@ namespace ErrorCodes::TYPE_MISMATCH}; assert_cast(column).getData().push_back( - static_cast &>(value).value().epochTime()); + static_cast(static_cast &>(value).value().epochTime())); break; } case ValueType::vtUUID: diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index d6c57f69376..83b0b202d74 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -62,20 +62,21 @@ static int compareValuesWithOffset(const IColumn * _compared_column, _compared_column); const auto * reference_column = assert_cast( _reference_column); + + using ValueType = typename ColumnType::ValueType; // Note that the storage type of offset returned by get<> is different, so // we need to specify the type explicitly. - const typename ColumnType::ValueType offset - = _offset.get(); + const ValueType offset = static_cast(_offset.get()); assert(offset >= 0); const auto compared_value_data = compared_column->getDataAt(compared_row); - assert(compared_value_data.size == sizeof(typename ColumnType::ValueType)); - auto compared_value = unalignedLoad( + assert(compared_value_data.size == sizeof(ValueType)); + auto compared_value = unalignedLoad( compared_value_data.data); const auto reference_value_data = reference_column->getDataAt(reference_row); - assert(reference_value_data.size == sizeof(typename ColumnType::ValueType)); - auto reference_value = unalignedLoad( + assert(reference_value_data.size == sizeof(ValueType)); + auto reference_value = unalignedLoad( reference_value_data.data); bool is_overflow; @@ -84,15 +85,6 @@ static int compareValuesWithOffset(const IColumn * _compared_column, else is_overflow = common::addOverflow(reference_value, offset, reference_value); -// fmt::print(stderr, -// "compared [{}] = {}, old ref {}, shifted ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n", -// compared_row, toString(compared_value), -// // fmt doesn't like char8_t. -// static_cast(unalignedLoad(reference_value_data.data)), -// reference_row, toString(reference_value), -// toString(offset), offset_is_preceding, -// is_overflow, offset_is_preceding); - if (is_overflow) { if (offset_is_preceding) diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 43bb5fc7083..4596bbb8961 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -126,12 +126,12 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) epoll_event events[3]; events[0].data.fd = events[1].data.fd = events[2].data.fd = -1; - int num_events = epoll.getManyReady(3, events, blocking); + size_t num_events = epoll.getManyReady(3, events, blocking); bool is_socket_ready = false; bool is_pipe_alarmed = false; - for (int i = 0; i < num_events; ++i) + for (size_t i = 0; i < num_events; ++i) { if (events[i].data.fd == connection_fd) is_socket_ready = true; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index a2a2db75d68..a9373555af7 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -1082,7 +1082,8 @@ namespace NamesAndTypesList columns; for (size_t column_idx : collections::range(external_table.columns_size())) { - const auto & name_and_type = external_table.columns(column_idx); + /// TODO: consider changing protocol + const auto & name_and_type = external_table.columns(static_cast(column_idx)); NameAndTypePair column; column.name = name_and_type.name(); if (column.name.empty()) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp index c8ae9c6e07c..c8015cfd185 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.cpp @@ -136,7 +136,7 @@ void WriteBufferFromHTTPServerResponse::nextImpl() WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - unsigned keep_alive_timeout_, + size_t keep_alive_timeout_, bool compress_, CompressionMethod compression_method_) : BufferWithOwnMemory(DBMS_DEFAULT_BUFFER_SIZE) diff --git a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h index 6905d5df8b5..ce677616755 100644 --- a/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h +++ b/src/Server/HTTP/WriteBufferFromHTTPServerResponse.h @@ -36,7 +36,7 @@ public: WriteBufferFromHTTPServerResponse( HTTPServerResponse & response_, bool is_http_method_head_, - unsigned keep_alive_timeout_, + size_t keep_alive_timeout_, bool compress_ = false, /// If true - set Content-Encoding header and compress the result. CompressionMethod compression_method_ = CompressionMethod::None); @@ -105,7 +105,7 @@ private: bool is_http_method_head; bool add_cors_header = false; - unsigned keep_alive_timeout = 0; + size_t keep_alive_timeout = 0; bool compress = false; CompressionMethod compression_method; int compression_level = 1; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index d02da92c613..2b63524fb79 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -622,8 +622,10 @@ void HTTPHandler::processQuery( /// Request body can be compressed using algorithm specified in the Content-Encoding header. String http_request_compression_method_str = request.get("Content-Encoding", ""); + int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); auto in_post = wrapReadBufferWithCompressionMethod( - wrapReadBufferReference(request.getStream()), chooseCompressionMethod({}, http_request_compression_method_str), context->getSettingsRef().zstd_window_log_max); + wrapReadBufferReference(request.getStream()), + chooseCompressionMethod({}, http_request_compression_method_str), zstd_window_log_max); /// The data can also be compressed using incompatible internal algorithm. This is indicated by /// 'decompress' query parameter. @@ -749,7 +751,7 @@ void HTTPHandler::processQuery( /// (using Accept-Encoding header) and 'enable_http_compression' setting is turned on. used_output.out->setCompression(client_supports_http_compression && settings.enable_http_compression); if (client_supports_http_compression) - used_output.out->setCompressionLevel(settings.http_zlib_compression_level); + used_output.out->setCompressionLevel(static_cast(settings.http_zlib_compression_level)); used_output.out->setSendProgress(settings.send_progress_in_http_headers); used_output.out->setSendProgressInterval(settings.http_headers_progress_interval_ms); diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 94e3597f88e..ea91dbc3450 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -126,7 +126,8 @@ struct SocketInterruptablePollWrapper do { Poco::Timestamp start; - rc = epoll_wait(epollfd, evout, 2, remaining_time.totalMilliseconds()); + /// TODO: use epoll_pwait() for more precise timers + rc = epoll_wait(epollfd, evout, 2, static_cast(remaining_time.totalMilliseconds())); if (rc < 0 && errno == EINTR) { Poco::Timestamp end; @@ -380,7 +381,7 @@ void KeeperTCPHandler::runImpl() response->zxid); UInt8 single_byte = 1; - [[maybe_unused]] int result = write(response_fd, &single_byte, sizeof(single_byte)); + [[maybe_unused]] ssize_t result = write(response_fd, &single_byte, sizeof(single_byte)); }; keeper_dispatcher->registerSession(session_id, response_callback); diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 8e701956d29..8e2d99e2909 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -63,8 +63,11 @@ static String showTableStatusReplacementQuery(const String & query); static String killConnectionIdReplacementQuery(const String & query); static String selectLimitReplacementQuery(const String & query); -MySQLHandler::MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, - bool ssl_enabled, size_t connection_id_) +MySQLHandler::MySQLHandler( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, uint32_t connection_id_) : Poco::Net::TCPServerConnection(socket_) , server(server_) , tcp_server(tcp_server_) @@ -215,7 +218,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::ConnectionPhase::HandshakeResp auto read_bytes = [this, &buf, &pos, &packet_size](size_t count) -> void { while (pos < count) { - int ret = socket().receiveBytes(buf + pos, packet_size - pos); + int ret = socket().receiveBytes(buf + pos, static_cast(packet_size - pos)); if (ret == 0) { throw Exception("Cannot read all data. Bytes read: " + std::to_string(pos) + ". Bytes expected: 3", ErrorCodes::CANNOT_READ_ALL_DATA); @@ -376,7 +379,14 @@ void MySQLHandler::finishHandshakeSSL( } #if USE_SSL -MySQLHandlerSSL::MySQLHandlerSSL(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_) +MySQLHandlerSSL::MySQLHandlerSSL( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, + uint32_t connection_id_, + RSA & public_key_, + RSA & private_key_) : MySQLHandler(server_, tcp_server_, socket_, ssl_enabled, connection_id_) , public_key(public_key_) , private_key(private_key_) diff --git a/src/Server/MySQLHandler.h b/src/Server/MySQLHandler.h index 2f43d471c40..3366e8792c9 100644 --- a/src/Server/MySQLHandler.h +++ b/src/Server/MySQLHandler.h @@ -31,7 +31,12 @@ class TCPServer; class MySQLHandler : public Poco::Net::TCPServerConnection { public: - MySQLHandler(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_); + MySQLHandler( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, + uint32_t connection_id_); void run() final; @@ -57,7 +62,7 @@ protected: IServer & server; TCPServer & tcp_server; Poco::Logger * log; - UInt64 connection_id = 0; + uint32_t connection_id = 0; uint32_t server_capabilities = 0; uint32_t client_capabilities = 0; @@ -81,7 +86,14 @@ protected: class MySQLHandlerSSL : public MySQLHandler { public: - MySQLHandlerSSL(IServer & server_, TCPServer & tcp_server_, const Poco::Net::StreamSocket & socket_, bool ssl_enabled, size_t connection_id_, RSA & public_key_, RSA & private_key_); + MySQLHandlerSSL( + IServer & server_, + TCPServer & tcp_server_, + const Poco::Net::StreamSocket & socket_, + bool ssl_enabled, + uint32_t connection_id_, + RSA & public_key_, + RSA & private_key_); private: void authPluginSSL() override; diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index c02a3015945..cbcddbb444a 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -127,7 +127,7 @@ void MySQLHandlerFactory::generateRSAKeys() Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) { - size_t connection_id = last_connection_id++; + uint32_t connection_id = last_connection_id++; LOG_TRACE(log, "MySQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString()); #if USE_SSL return new MySQLHandlerSSL(server, tcp_server, socket, ssl_enabled, connection_id, *public_key, *private_key); diff --git a/src/Server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h index 38caae922ee..fa4ce93f765 100644 --- a/src/Server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -36,7 +36,7 @@ private: bool ssl_enabled = false; #endif - std::atomic last_connection_id = 0; + std::atomic last_connection_id = 0; public: explicit MySQLHandlerFactory(IServer & server_); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 25a832ab7e3..2b8c49dafa8 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -841,7 +841,7 @@ void TCPHandler::processTablesStatusRequest() if (auto * replicated_table = dynamic_cast(table.get())) { status.is_replicated = true; - status.absolute_delay = replicated_table->getAbsoluteDelay(); + status.absolute_delay = static_cast(replicated_table->getAbsoluteDelay()); } else status.is_replicated = false; //-V1048 diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index c36ce1e9378..9c8d3ca60f3 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -159,7 +159,7 @@ private: UInt64 client_version_major = 0; UInt64 client_version_minor = 0; UInt64 client_version_patch = 0; - UInt64 client_tcp_protocol_version = 0; + UInt32 client_tcp_protocol_version = 0; String quota_key; /// Connection settings, which are extracted from a context. diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 9dc3d773e01..4b9667aa95d 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -106,7 +106,7 @@ namespace for (size_t replica_index = 1; replica_index <= replicas; ++replica_index) { - address.replica_index = replica_index; + address.replica_index = static_cast(replica_index); make_connection(address); } } diff --git a/src/Storages/FileLog/DirectoryWatcherBase.cpp b/src/Storages/FileLog/DirectoryWatcherBase.cpp index 005e1e5fd1b..efcd70d6742 100644 --- a/src/Storages/FileLog/DirectoryWatcherBase.cpp +++ b/src/Storages/FileLog/DirectoryWatcherBase.cpp @@ -70,10 +70,10 @@ void DirectoryWatcherBase::watchFunc() while (!stopped) { const auto & settings = owner.storage.getFileLogSettings(); - if (poll(&pfd, 1, milliseconds_to_wait) > 0 && pfd.revents & POLLIN) + if (poll(&pfd, 1, static_cast(milliseconds_to_wait)) > 0 && pfd.revents & POLLIN) { milliseconds_to_wait = settings->poll_directory_watch_events_backoff_init.totalMilliseconds(); - int n = read(fd, buffer.data(), buffer.size()); + ssize_t n = read(fd, buffer.data(), buffer.size()); int i = 0; if (n > 0) { diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 7848b75deec..722843a7ab6 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -315,7 +315,7 @@ Pipe StorageFileLog::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { /// If there are MVs depended on this table, we just forbid reading if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 4295a8a764a..56f2d40ef5a 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -54,7 +54,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; void drop() override; diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index dd77fc70358..3726d3aae96 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -52,7 +52,7 @@ private: std::future asyncReadInto(char * data, size_t size); IAsynchronousReader & reader; - Int32 priority; + size_t priority; std::shared_ptr impl; std::future prefetch_future; Memory<> prefetch_buffer; diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 4aebcd6f6ab..3f5c81dc01b 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -3,6 +3,7 @@ #if USE_HDFS #include #include +#include #include #include @@ -90,7 +91,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory(num_bytes_to_read)); if (bytes_read < 0) throw Exception(ErrorCodes::NETWORK_ERROR, "Fail to read from HDFS: {}, file path: {}. Error: {}", diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 138c92ea62d..bbabd523c45 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -214,8 +214,8 @@ ColumnsDescription StorageHDFS::getTableStructureFromData( return nullptr; auto compression = chooseCompressionMethod(*it, compression_method); auto impl = std::make_unique(uri_without_path, *it++, ctx->getGlobalContext()->getConfigRef(), ctx->getReadSettings()); - const auto zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max; - return wrapReadBufferWithCompressionMethod(std::move(impl), compression, zstd_window_log_max); + const Int64 zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max; + return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); }; ColumnsDescription columns; @@ -356,8 +356,8 @@ bool HDFSSource::initialize() auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); auto impl = std::make_unique( uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); - const auto zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, zstd_window_log_max); + const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; + read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); @@ -550,7 +550,7 @@ Pipe StorageHDFS::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { std::shared_ptr iterator_wrapper{nullptr}; if (distributed_processing) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 90a42d0c692..b641f5bfb43 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -40,7 +40,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 467203c58f6..64fdf77dbc2 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -72,7 +72,7 @@ Pipe StorageHDFSCluster::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 3239a1e4076..0d6f2bbe1b8 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -32,7 +32,7 @@ public: std::string getName() const override { return "HDFSCluster"; } Pipe read(const Names &, const StorageSnapshotPtr &, SelectQueryInfo &, - ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, unsigned /*num_streams*/) override; + ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, size_t /*num_streams*/) override; QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index a179f484652..1f952ec2bd9 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -57,7 +58,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl int write(const char * start, size_t size) const { - int bytes_written = hdfsWrite(fs.get(), fout, start, size); + int bytes_written = hdfsWrite(fs.get(), fout, start, safe_cast(size)); if (write_settings.remote_throttler) write_settings.remote_throttler->add(bytes_written); diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index fc08c046f93..8f5b1b5f5fd 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -210,7 +210,7 @@ std::unique_ptr HiveORCFile::buildMinMaxIndex(c { size_t pos = it->second; /// Attention: column statistics start from 1. 0 has special purpose. - const orc::ColumnStatistics * col_stats = statistics->getColumnStatistics(pos + 1); + const orc::ColumnStatistics * col_stats = statistics->getColumnStatistics(static_cast(pos + 1)); idx->hyperrectangle[i] = buildRange(col_stats); } ++i; @@ -297,7 +297,7 @@ void HiveParquetFile::loadSplitMinMaxIndexesImpl() const auto * schema = meta->schema(); for (size_t pos = 0; pos < num_cols; ++pos) { - String column{schema->Column(pos)->name()}; + String column{schema->Column(static_cast(pos))->name()}; boost::to_lower(column); parquet_column_positions[column] = pos; } @@ -306,7 +306,7 @@ void HiveParquetFile::loadSplitMinMaxIndexesImpl() split_minmax_idxes.resize(num_row_groups); for (size_t i = 0; i < num_row_groups; ++i) { - auto row_group_meta = meta->RowGroup(i); + auto row_group_meta = meta->RowGroup(static_cast(i)); split_minmax_idxes[i] = std::make_shared(); split_minmax_idxes[i]->hyperrectangle.resize(num_cols); @@ -321,7 +321,7 @@ void HiveParquetFile::loadSplitMinMaxIndexesImpl() continue; size_t pos = mit->second; - auto col_chunk = row_group_meta->ColumnChunk(pos); + auto col_chunk = row_group_meta->ColumnChunk(static_cast(pos)); if (!col_chunk->is_stats_set()) continue; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 01ee5a8c3c5..5bbc71364c6 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -727,7 +727,7 @@ HiveFilePtr StorageHive::getHiveFileIfNeeded( hive_file->getPath(), hive_file->describeMinMaxIndex(sub_minmax_idxes[i])); - skip_splits.insert(i); + skip_splits.insert(static_cast(i)); } } hive_file->setSkipSplits(skip_splits); @@ -749,7 +749,7 @@ Pipe StorageHive::read( ContextPtr context_, QueryProcessingStage::Enum /* processed_stage */, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { lazyInitialize(); @@ -809,7 +809,7 @@ Pipe StorageHive::read( } if (num_streams > sources_info->hive_files.size()) - num_streams = sources_info->hive_files.size(); + num_streams = static_cast(sources_info->hive_files.size()); Pipes pipes; for (size_t i = 0; i < num_streams; ++i) @@ -829,7 +829,7 @@ Pipe StorageHive::read( } HiveFiles StorageHive::collectHiveFiles( - unsigned max_threads, + size_t max_threads, const SelectQueryInfo & query_info, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, @@ -937,7 +937,13 @@ StorageHive::totalRowsImpl(const Settings & settings, const SelectQueryInfo & qu auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table); HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, getContext()->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); - HiveFiles hive_files = collectHiveFiles(settings.max_threads, query_info, hive_table_metadata, fs, context_, prune_level); + HiveFiles hive_files = collectHiveFiles( + settings.max_threads, + query_info, + hive_table_metadata, + fs, + context_, + prune_level); UInt64 total_rows = 0; for (const auto & hive_file : hive_files) diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 9c02d228f97..363042621c7 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -60,7 +60,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) override; @@ -98,7 +98,7 @@ private: void initMinMaxIndexExpression(); HiveFiles collectHiveFiles( - unsigned max_threads, + size_t max_threads, const SelectQueryInfo & query_info, const HiveTableMetadataPtr & hive_table_metadata, const HDFSFSPtr & fs, diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 5b12b720f1c..7a704a17f4d 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -108,7 +108,7 @@ Pipe IStorage::watch( ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -120,7 +120,7 @@ Pipe IStorage::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -133,7 +133,7 @@ void IStorage::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { auto pipe = read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); readFromPipe(query_plan, std::move(pipe), column_names, storage_snapshot, query_info, context, getName()); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 242f17d6f20..fd48d22b12b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -323,7 +323,7 @@ public: ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/); + size_t /*num_streams*/); /// Returns true if FINAL modifier must be added to SELECT query depending on required columns. /// It's needed for ReplacingMergeTree wrappers such as MaterializedMySQL and MaterializedPostrgeSQL @@ -357,7 +357,7 @@ private: ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/); + size_t /*num_streams*/); public: /// Other version of read which adds reading step to query plan. @@ -370,7 +370,7 @@ public: ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/); + size_t /*num_streams*/); /** Writes the data to a table. * Receives a description of the query, which can contain information about the data write method. diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index fa52850fb39..8e4dd78379e 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -212,7 +212,7 @@ StorageKafka::StorageKafka( , schema_name(getContext()->getMacros()->expand(kafka_settings->kafka_schema.value)) , num_consumers(kafka_settings->kafka_num_consumers.value) , log(&Poco::Logger::get("StorageKafka (" + table_id_.table_name + ")")) - , semaphore(0, num_consumers) + , semaphore(0, static_cast(num_consumers)) , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) , settings_adjustments(createSettingsAdjustments()) , thread_per_consumer(kafka_settings->kafka_thread_per_consumer.value) @@ -291,7 +291,7 @@ Pipe StorageKafka::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { if (num_created_consumers == 0) return {}; diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 77bad6e17a9..c1c67b19c51 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -53,7 +53,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index bc860a1fa3c..a732ada1da2 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -46,7 +46,7 @@ public: ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) override + size_t /*num_streams*/) override { return Pipe::unitePipes(std::move(pipes)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index e3d19d0a433..3d27205d638 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -531,7 +531,7 @@ Pipe StorageLiveView::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { std::lock_guard lock(mutex); @@ -556,7 +556,7 @@ Pipe StorageLiveView::watch( ContextPtr local_context, QueryProcessingStage::Enum & processed_stage, size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { ASTWatchQuery & query = typeid_cast(*query_info.query); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index c6a0379e2ab..31b1c425709 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -143,7 +143,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; Pipe watch( const Names & column_names, @@ -151,7 +151,7 @@ public: ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::shared_ptr getBlocksPtr() { return blocks_ptr; } MergeableBlocksPtr getMergeableBlocks() { return mergeable_blocks; } diff --git a/src/Storages/MeiliSearch/SourceMeiliSearch.cpp b/src/Storages/MeiliSearch/SourceMeiliSearch.cpp index 8e37e469e96..b516ad8d0cf 100644 --- a/src/Storages/MeiliSearch/SourceMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/SourceMeiliSearch.cpp @@ -174,7 +174,7 @@ size_t MeiliSearchSource::parseJSON(MutableColumns & columns, const JSON & jres) { ++cnt_fields; const auto & name = kv_pair.getName(); - int pos = description.sample_block.getPositionByName(name); + size_t pos = description.sample_block.getPositionByName(name); MutableColumnPtr & col = columns[pos]; DataTypePtr type_ptr = description.sample_block.getByPosition(pos).type; insertWithTypeId(col, kv_pair.getValue(), type_ptr); diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index c5966d9e322..30d49edbb10 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -80,7 +80,7 @@ Pipe StorageMeiliSearch::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned) + size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.h b/src/Storages/MeiliSearch/StorageMeiliSearch.h index d7a2697730c..5fa7ac2c0e3 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.h +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) override; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 91ecb3a37a0..5b3497bf926 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -140,7 +140,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( const SelectQueryInfo & query_info, ContextPtr context, const UInt64 max_block_size, - const unsigned num_streams, + const size_t num_streams, QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read, bool enable_parallel_reading) const @@ -930,7 +930,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Strings forced_indices; { Tokens tokens(indices.data(), &indices[indices.size()], settings.max_query_size); - IParser::Pos pos(tokens, settings.max_parser_depth); + IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); Expected expected; if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse force_data_skipping_indices ('{}')", indices); @@ -1279,7 +1279,7 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read) const { size_t total_parts = parts.size(); @@ -1318,7 +1318,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const SelectQueryInfo & query_info, ContextPtr context, const UInt64 max_block_size, - const unsigned num_streams, + const size_t num_streams, std::shared_ptr max_block_numbers_to_read, MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr, bool enable_parallel_reading) const diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index bb44f260eec..66dd7f7e5db 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -33,7 +33,7 @@ public: const SelectQueryInfo & query_info, ContextPtr context, UInt64 max_block_size, - unsigned num_streams, + size_t num_streams, QueryProcessingStage::Enum processed_stage, std::shared_ptr max_block_numbers_to_read = nullptr, bool enable_parallel_reading = false) const; @@ -46,7 +46,7 @@ public: const SelectQueryInfo & query_info, ContextPtr context, UInt64 max_block_size, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr, MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr = nullptr, bool enable_parallel_reading = false) const; @@ -62,7 +62,7 @@ public: const SelectQueryInfo & query_info, const ActionDAGNodes & added_filter_nodes, ContextPtr context, - unsigned num_streams, + size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr) const; private: diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 595e790ea3b..052834358bb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -116,7 +116,7 @@ MergeTreeIndexAggregatorAnnoy::MergeTreeIndexAggregatorAnnoy( MergeTreeIndexGranulePtr MergeTreeIndexAggregatorAnnoy::getGranuleAndReset() { // NOLINTNEXTLINE(*) - index->build(number_of_trees, /*number_of_threads=*/1); + index->build(static_cast(number_of_trees), /*number_of_threads=*/1); auto granule = std::make_shared(index_name, index_sample_block, index); index = nullptr; return granule; diff --git a/src/Storages/MergeTree/MergeType.cpp b/src/Storages/MergeTree/MergeType.cpp index 4b03f5ab57c..045114578d0 100644 --- a/src/Storages/MergeTree/MergeType.cpp +++ b/src/Storages/MergeTree/MergeType.cpp @@ -10,7 +10,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -MergeType checkAndGetMergeType(UInt64 merge_type) +MergeType checkAndGetMergeType(UInt32 merge_type) { if (auto maybe_merge_type = magic_enum::enum_cast(merge_type)) return *maybe_merge_type; diff --git a/src/Storages/MergeTree/MergeType.h b/src/Storages/MergeTree/MergeType.h index fad1ba33e3e..ce9a40c5931 100644 --- a/src/Storages/MergeTree/MergeType.h +++ b/src/Storages/MergeTree/MergeType.h @@ -22,7 +22,7 @@ enum class MergeType }; /// Check parsed merge_type from raw int and get enum value. -MergeType checkAndGetMergeType(UInt64 merge_type); +MergeType checkAndGetMergeType(UInt32 merge_type); /// Check this merge assigned with TTL bool isTTLMergeType(MergeType merge_type); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index e7882ce4952..d7e3c3b1955 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -240,7 +240,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) if (checkString("merge_type: ", in)) { - UInt64 value; + UInt32 value; in >> value; merge_type = checkAndGetMergeType(value); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp index ce33ac8c467..626295d7255 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp @@ -91,8 +91,8 @@ std::optional ReplicatedMergeTreeMergeStrategyPicker::pickReplicaToExecu void ReplicatedMergeTreeMergeStrategyPicker::refreshState() { const auto settings = storage.getSettings(); - auto threshold = settings->execute_merges_on_single_replica_time_threshold.totalSeconds(); - auto threshold_init = 0; + time_t threshold = settings->execute_merges_on_single_replica_time_threshold.totalSeconds(); + time_t threshold_init = 0; if (settings->allow_remote_fs_zero_copy_replication) threshold_init = settings->remote_fs_execute_merges_on_single_replica_time_threshold.totalSeconds(); @@ -127,7 +127,7 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState() active_replicas_tmp.push_back(replica); if (replica == storage.replica_name) { - current_replica_index_tmp = active_replicas_tmp.size() - 1; + current_replica_index_tmp = static_cast(active_replicas_tmp.size() - 1); } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 6ffcde161da..d6d937ce66f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1809,9 +1809,9 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const Status res; - res.future_parts = future_parts.size(); - res.queue_size = queue.size(); - res.last_queue_update = last_queue_update; + res.future_parts = static_cast(future_parts.size()); + res.queue_size = static_cast(queue.size()); + res.last_queue_update = static_cast(last_queue_update); res.inserts_in_queue = 0; res.merges_in_queue = 0; @@ -1824,7 +1824,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const for (const LogEntryPtr & entry : queue) { if (entry->create_time && (!res.queue_oldest_time || entry->create_time < res.queue_oldest_time)) - res.queue_oldest_time = entry->create_time; + res.queue_oldest_time = static_cast(entry->create_time); if (entry->type == LogEntry::GET_PART || entry->type == LogEntry::ATTACH_PART) { @@ -1832,7 +1832,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const if (entry->create_time && (!res.inserts_oldest_time || entry->create_time < res.inserts_oldest_time)) { - res.inserts_oldest_time = entry->create_time; + res.inserts_oldest_time = static_cast(entry->create_time); res.oldest_part_to_get = entry->new_part_name; } } @@ -1843,7 +1843,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const if (entry->create_time && (!res.merges_oldest_time || entry->create_time < res.merges_oldest_time)) { - res.merges_oldest_time = entry->create_time; + res.merges_oldest_time = static_cast(entry->create_time); res.oldest_part_to_merge_to = entry->new_part_name; } } @@ -1854,7 +1854,7 @@ ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const if (entry->create_time && (!res.part_mutations_oldest_time || entry->create_time < res.part_mutations_oldest_time)) { - res.part_mutations_oldest_time = entry->create_time; + res.part_mutations_oldest_time = static_cast(entry->create_time); res.oldest_part_to_mutate_to = entry->new_part_name; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 6d1a3efb01d..2ebdd604af2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -425,6 +425,7 @@ public: struct Status { + /// TODO: consider using UInt64 here UInt32 future_parts; UInt32 queue_size; UInt32 inserts_in_queue; diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index fd313a10bc8..a3d578cf5f2 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -65,7 +65,7 @@ public: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { query_plan = std::move(*MergeTreeDataSelectExecutor(storage) .readFromParts( diff --git a/src/Storages/MySQL/MySQLHelpers.cpp b/src/Storages/MySQL/MySQLHelpers.cpp index 94c07d2670f..127bdb96eaf 100644 --- a/src/Storages/MySQL/MySQLHelpers.cpp +++ b/src/Storages/MySQL/MySQLHelpers.cpp @@ -23,7 +23,7 @@ createMySQLPoolWithFailover(const StorageMySQLConfiguration & configuration, con return mysqlxx::PoolWithFailover( configuration.database, configuration.addresses, configuration.username, configuration.password, MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS, - mysql_settings.connection_pool_size, + static_cast(mysql_settings.connection_pool_size), mysql_settings.connection_max_tries, mysql_settings.connection_wait_timeout, mysql_settings.connect_timeout, diff --git a/src/Storages/NATS/NATSConnection.cpp b/src/Storages/NATS/NATSConnection.cpp index d33138419e2..70b3599aa09 100644 --- a/src/Storages/NATS/NATSConnection.cpp +++ b/src/Storages/NATS/NATSConnection.cpp @@ -111,7 +111,7 @@ void NATSConnectionManager::connectImpl() { servers[i] = configuration.servers[i].c_str(); } - natsOptions_SetServers(options, servers, configuration.servers.size()); + natsOptions_SetServers(options, servers, static_cast(configuration.servers.size())); } natsOptions_SetMaxReconnect(options, configuration.max_reconnect); natsOptions_SetReconnectWait(options, configuration.reconnect_wait); diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 4a3ba973e67..dea2553700b 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -60,7 +60,7 @@ StorageNATS::StorageNATS( , schema_name(getContext()->getMacros()->expand(nats_settings->nats_schema)) , num_consumers(nats_settings->nats_num_consumers.value) , log(&Poco::Logger::get("StorageNATS (" + table_id_.table_name + ")")) - , semaphore(0, num_consumers) + , semaphore(0, static_cast(num_consumers)) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , is_attach(is_attach_) { @@ -289,7 +289,7 @@ void StorageNATS::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { if (!consumers_ready) throw Exception("NATS consumers setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_NATS); diff --git a/src/Storages/NATS/StorageNATS.h b/src/Storages/NATS/StorageNATS.h index 185b39250c8..a5a050d566f 100644 --- a/src/Storages/NATS/StorageNATS.h +++ b/src/Storages/NATS/StorageNATS.h @@ -47,7 +47,7 @@ public: ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) override; + size_t /* num_streams */) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index cc80d567d1d..6d12960824a 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -279,7 +279,7 @@ void StorageMaterializedPostgreSQL::read( ContextPtr context_, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { auto nested_table = getNested(); diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index d8e9e98c662..af0adb10f9f 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -98,7 +98,7 @@ public: ContextPtr context_, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; /// This method is called only from MateriaizePostgreSQL database engine, because it needs to maintain /// an invariant: a table exists only if its nested table exists. This atomic variable is set to _true_ diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 70838daec24..57f5ddd86e6 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -87,7 +87,7 @@ StorageRabbitMQ::StorageRabbitMQ( , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) - , semaphore(0, num_consumers) + , semaphore(0, static_cast(num_consumers)) , unique_strbase(getRandomName()) , queue_size(std::max(QUEUE_SIZE, static_cast(getMaxBlockSize()))) , milliseconds_to_wait(RESCHEDULE_MS) @@ -674,7 +674,7 @@ void StorageRabbitMQ::read( ContextPtr local_context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, - unsigned /* num_streams */) + size_t /* num_streams */) { if (!rabbit_is_ready) throw Exception("RabbitMQ setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_RABBITMQ); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 455b2fe8f09..a1250f50829 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -50,7 +50,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index 3ec7a074fd4..28053c84e20 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -35,7 +35,7 @@ void readFinalFromNestedStorage( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned int num_streams) + size_t num_streams) { NameSet column_names_set = NameSet(column_names.begin(), column_names.end()); auto lock = nested_storage->lockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); @@ -59,7 +59,8 @@ void readFinalFromNestedStorage( } auto nested_snapshot = nested_storage->getStorageSnapshot(nested_metadata, context); - nested_storage->read(query_plan, require_columns_name, nested_snapshot, query_info, context, processed_stage, max_block_size, num_streams); + nested_storage->read( + query_plan, require_columns_name, nested_snapshot, query_info, context, processed_stage, max_block_size, num_streams); if (!query_plan.isInitialized()) { diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.h b/src/Storages/ReadFinalForExternalReplicaStorage.h index 178164b6643..f8d1264ccb3 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.h +++ b/src/Storages/ReadFinalForExternalReplicaStorage.h @@ -21,7 +21,7 @@ void readFinalFromNestedStorage( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned int num_streams); + size_t num_streams); } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 20b1de51a30..46ddb650eee 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -403,7 +403,7 @@ Pipe StorageEmbeddedRocksDB::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); @@ -467,7 +467,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) String rocksdb_dir; bool read_only{false}; if (!engine_args.empty()) - ttl = checkAndGetLiteralArgument(engine_args[0], "ttl"); + ttl = static_cast(checkAndGetLiteralArgument(engine_args[0], "ttl")); if (engine_args.size() > 1) rocksdb_dir = checkAndGetLiteralArgument(engine_args[1], "rocksdb_dir"); if (engine_args.size() > 2) diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 03848510e66..ca0ab7a1840 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -46,7 +46,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index f6b397950ed..65b4dce3ad2 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -228,7 +228,7 @@ void StorageBuffer::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { const auto & metadata_snapshot = storage_snapshot->metadata; diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 580742c0c84..387165171b9 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -82,7 +82,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool supportsParallelInsert() const override { return true; } diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 2839ac03a5b..f9995cceda3 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -169,7 +169,7 @@ Pipe StorageDictionary::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, - const unsigned threads) + const size_t threads) { auto registered_dictionary_name = location == Location::SameDatabaseAndNameAsDictionary ? getStorageID().getInternalDictionaryName() : dictionary_name; auto dictionary = getContext()->getExternalDictionariesLoader().getDictionary(registered_dictionary_name, local_context); diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index f81503910ca..6eadd1b2c21 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -69,7 +69,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned threads) override; + size_t threads) override; static NamesAndTypesList getNamesAndTypes(const DictionaryStructure & dictionary_structure); static String generateNamesAndTypesDescription(const NamesAndTypesList & list); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 4eb6697dd6e..bced552915b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -653,7 +653,7 @@ void StorageDistributed::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { const auto * select_query = query_info.query->as(); if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas) diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 7cb25ae46ab..334f44a90f9 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -112,7 +112,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t /*max_block_size*/, - unsigned /*num_streams*/) override; + size_t /*num_streams*/) override; bool supportsParallelInsert() const override { return true; } std::optional totalBytes(const Settings &) const override; diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index 2931e62b7ef..cd3cc4d48ac 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -111,7 +111,7 @@ void StorageExecutable::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned /*threads*/) + size_t /*threads*/) { auto & script_name = settings.script_name; diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 2638474082a..2393920fa3c 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -41,7 +41,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned threads) override; + size_t threads) override; private: ExecutableSettings settings; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index dcb7a90b2f6..7d1eef1e47c 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -181,7 +181,7 @@ void StorageExternalDistributed::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { std::vector> plans; for (const auto & shard : shards) diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h index 52a2a7a4106..a1bdb41dded 100644 --- a/src/Storages/StorageExternalDistributed.h +++ b/src/Storages/StorageExternalDistributed.h @@ -55,7 +55,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; private: using Shards = std::unordered_set; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 29f2d0667d9..eb3ed3f8ef3 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -209,7 +209,7 @@ std::unique_ptr createReadBuffer( in.setProgressCallback(context); } - auto zstd_window_log_max = context->getSettingsRef().zstd_window_log_max; + int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max); } @@ -645,7 +645,7 @@ Pipe StorageFile::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { if (use_table_fd) { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index e60e5f6b371..03b3aacb67f 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -48,7 +48,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 9cac1e57297..c00e82598b2 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -494,7 +494,7 @@ Pipe StorageGenerateRandom::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageGenerateRandom.h b/src/Storages/StorageGenerateRandom.h index 8dc3e490ae7..6b050c07e52 100644 --- a/src/Storages/StorageGenerateRandom.h +++ b/src/Storages/StorageGenerateRandom.h @@ -28,7 +28,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool supportsTransactions() const override { return true; } private: diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 4729d0a5bf8..18e8442c1b5 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -57,7 +57,7 @@ Pipe StorageInput::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { Pipes pipes; auto query_context = context->getQueryContext(); diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index 991a4f35b7b..da4669aaf37 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; private: Pipe pipe; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 2e3e1d443ae..e4f786cd23b 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -585,7 +585,7 @@ Pipe StorageJoin::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned /*num_streams*/) + size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 390af09422c..43515f800d9 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -68,7 +68,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::optional totalRows(const Settings & settings) const override; std::optional totalBytes(const Settings & settings) const override; diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f0bf4e431ae..21be205c0f6 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -408,7 +408,7 @@ Pipe StorageKeeperMap::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { checkTable(); storage_snapshot->check(column_names); diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index 87861362e42..45b32434f15 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -39,7 +39,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) override; diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index c6bc55fd620..9909489d901 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -782,7 +782,7 @@ Pipe StorageLog::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 2e677dd3161..a2b1356f240 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -53,7 +53,7 @@ public: ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) override; diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index bb69f211a9e..0dc0b1bff0b 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -40,7 +40,7 @@ void StorageMaterializedMySQL::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned int num_streams) + size_t num_streams) { if (const auto * db = typeid_cast(database)) db->rethrowExceptionIfNeeded(); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index a66b7eba804..cbb59e508e8 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -24,8 +24,13 @@ public: bool needRewriteQueryWithFinal(const Names & column_names) const override; void read( - QueryPlan & query_plan, const Names & column_names, const StorageSnapshotPtr & metadata_snapshot, SelectQueryInfo & query_info, - ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & metadata_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, size_t num_streams) override; SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr) override { throwNotAllowed(); } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index b01415f9590..e256e087728 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -150,7 +150,7 @@ void StorageMaterializedView::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - const unsigned num_streams) + const size_t num_streams) { auto storage = getTargetTable(); auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 1d8808b302e..af2dedf8164 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -91,7 +91,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; Strings getDataPaths() const override; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index e4dbfe15095..957aae450c8 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -231,7 +231,7 @@ Pipe StorageMemory::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 3889acb952b..c739088dbe4 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -51,7 +51,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool supportsParallelInsert() const override { return true; } bool supportsSubcolumns() const override { return true; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 9891340a0d0..c9067148739 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -249,7 +249,7 @@ void StorageMerge::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - unsigned num_streams) + size_t num_streams) { /** Just in case, turn off optimization "transfer to PREWHERE", * since there is no certainty that it works when one of table is MergeTree and other is not. diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 6bf68660803..33406321100 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -61,7 +61,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 7cfce882e7a..aac5372a83e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -220,7 +220,7 @@ void StorageMergeTree::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { /// If true, then we will ask initiator if we can read chosen ranges bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index ea2527e44a7..745546b96f6 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -66,7 +66,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::optional totalRows(const Settings &) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo &, ContextPtr) const override; diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index dce45b2431a..3ae9c974770 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -150,7 +150,7 @@ Pipe StorageMongoDB::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned) + size_t /*num_streams*/) { connectIfNotConnected(); diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h index 0e00b80432b..04fb759133a 100644 --- a/src/Storages/StorageMongoDB.h +++ b/src/Storages/StorageMongoDB.h @@ -37,7 +37,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write( const ASTPtr & query, diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 4ccd31ab981..20eb59c7262 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -78,7 +78,7 @@ Pipe StorageMySQL::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned) + size_t /*num_streams*/) { storage_snapshot->check(column_names_); String query = transformQueryForExternalDatabase( diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index e3c0712c179..bf9a24c9bfe 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -46,7 +46,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 0011b5c94ad..2270731c0e3 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -35,8 +35,8 @@ public: SelectQueryInfo &, ContextPtr /*context*/, QueryProcessingStage::Enum /*processing_stage*/, - size_t, - unsigned) override + size_t /*max_block_size*/, + size_t /*num_streams*/) override { return Pipe( std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index e0c6dbf5463..6cf4e458438 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -81,7 +81,7 @@ Pipe StoragePostgreSQL::read( ContextPtr context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size_, - unsigned) + size_t /*num_streams*/) { storage_snapshot->check(column_names_); diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index 0755e33269e..97c62daa50f 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -38,7 +38,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 0fabff59db4..2afd9e8a63b 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -50,7 +50,7 @@ public: ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { return getNested()->watch(column_names, query_info, context, processed_stage, max_block_size, num_streams); } @@ -63,7 +63,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { return getNested()->read(query_plan, column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ef2d91fabfc..a8863b53b54 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4369,7 +4369,7 @@ void StorageReplicatedMergeTree::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - const unsigned num_streams) + const size_t num_streams) { /// If true, then we will ask initiator if we can read chosen ranges const bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; @@ -5574,7 +5574,8 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) res.queue = queue.getStatus(); res.absolute_delay = getAbsoluteDelay(); /// NOTE: may be slightly inconsistent with queue status. - res.parts_to_check = part_check_thread.size(); + /// NOTE: consider convert to UInt64 + res.parts_to_check = static_cast(part_check_thread.size()); res.zookeeper_path = zookeeper_path; res.replica_name = replica_name; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index e10ffcce22c..4cd2ee0a621 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -131,7 +131,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::optional totalRows(const Settings & settings) const override; std::optional totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, ContextPtr context) const override; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 675dd548088..9a4e37cde1b 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -457,8 +457,9 @@ bool StorageS3Source::initialize() file_path = fs::path(bucket) / current_key; - auto zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; - read_buf = wrapReadBufferWithCompressionMethod(createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint), zstd_window_log_max); + int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); + read_buf = wrapReadBufferWithCompressionMethod( + createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint), zstd_window_log_max); auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -875,7 +876,7 @@ Pipe StorageS3::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { bool has_wildcards = s3_configuration.uri.bucket.find(PARTITION_ID_WILDCARD) != String::npos || keys.back().find(PARTITION_ID_WILDCARD) != String::npos; @@ -1079,12 +1080,12 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( upd.auth_settings.region, ctx->getRemoteHostFilter(), - ctx->getGlobalContext()->getSettingsRef().s3_max_redirects, + static_cast(ctx->getGlobalContext()->getSettingsRef().s3_max_redirects), ctx->getGlobalContext()->getSettingsRef().enable_s3_requests_logging, /* for_disk_s3 = */ false); client_configuration.endpointOverride = upd.uri.endpoint; - client_configuration.maxConnections = upd.rw_settings.max_connections; + client_configuration.maxConnections = static_cast(upd.rw_settings.max_connections); auto credentials = Aws::Auth::AWSCredentials(upd.auth_settings.access_key_id, upd.auth_settings.secret_access_key); auto headers = upd.auth_settings.headers; @@ -1254,7 +1255,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( } first = false; - const auto zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max; + int zstd_window_log_max = static_cast(ctx->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod( std::make_unique( s3_configuration.client, s3_configuration.uri.bucket, key, s3_configuration.uri.version_id, s3_configuration.rw_settings.max_single_read_retries, ctx->getReadSettings()), diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index c74a8501964..8e79516ba4c 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -171,7 +171,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index df927069bb0..0e4e51f7926 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -97,7 +97,7 @@ Pipe StorageS3Cluster::read( ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { StorageS3::updateS3Configuration(context, s3_configuration); diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index d2cf1b917a1..194c2ed0103 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -30,7 +30,7 @@ public: std::string getName() const override { return "S3Cluster"; } Pipe read(const Names &, const StorageSnapshotPtr &, SelectQueryInfo &, - ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, unsigned /*num_streams*/) override; + ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, size_t /*num_streams*/) override; QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index a86ed7646b3..92f954ebb9d 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -57,7 +57,7 @@ Pipe StorageSQLite::read( ContextPtr context_, QueryProcessingStage::Enum, size_t max_block_size, - unsigned int) + size_t /*num_streams*/) { if (!sqlite_db) sqlite_db = openSQLiteDB(database_path, getContext(), /* throw_on_error */true); diff --git a/src/Storages/StorageSQLite.h b/src/Storages/StorageSQLite.h index b0f209b5bc3..a021c00f627 100644 --- a/src/Storages/StorageSQLite.h +++ b/src/Storages/StorageSQLite.h @@ -38,7 +38,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 0ecbdb0db10..92d53ffc1ac 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -349,7 +349,7 @@ Pipe StorageStripeLog::read( ContextPtr local_context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index efdf18c0f7b..3f1b4ed0ad5 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -47,7 +47,7 @@ public: ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) override; diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 2a4bfdf304b..b105e50a54f 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -101,7 +101,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override + size_t num_streams) override { String cnames; for (const auto & c : column_names) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f1c924a3448..c38b4313359 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -266,6 +266,7 @@ namespace setCredentials(credentials, request_uri); const auto settings = context->getSettings(); + int zstd_window_log_max = static_cast(settings.zstd_window_log_max); try { if (download_threads > 1) @@ -354,7 +355,7 @@ namespace threadPoolCallbackRunner(IOThreadPool::get(), "URLParallelRead"), download_threads), compression_method, - settings.zstd_window_log_max); + zstd_window_log_max); } } catch (const Poco::Exception & e) @@ -386,7 +387,7 @@ namespace /* use_external_buffer */ false, /* skip_url_not_found_error */ skip_url_not_found_error), compression_method, - settings.zstd_window_log_max); + zstd_window_log_max); } catch (...) { @@ -641,7 +642,7 @@ Pipe IStorageURLBase::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); @@ -730,7 +731,7 @@ Pipe StorageURLWithFailover::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned /*num_streams*/) + size_t /*num_streams*/) { ColumnsDescription columns_description; Block block_for_format; diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 63c803f2d26..bf8858b8b66 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -35,7 +35,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context) override; @@ -206,7 +206,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; struct Configuration { diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 2a3e1743983..300b11b7346 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -27,7 +27,7 @@ Pipe StorageValues::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) + size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index bf7bf0466e4..55222903797 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -23,7 +23,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; /// Why we may have virtual columns in the storage from a single block? /// Because it used as tmp storage for pushing blocks into views, and some diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index adaf1c4e404..a55d7ad3c09 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -111,7 +111,7 @@ void StorageView::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { ASTPtr current_inner_query = storage_snapshot->metadata->getSelectQuery().inner_query; diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 31c96addd08..593ac820ad4 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -32,7 +32,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index aacbb5fa302..5f57d37278b 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -106,7 +106,7 @@ Pipe StorageXDBC::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index a2bb9c15baf..aa313e024ca 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -26,7 +26,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; StorageXDBC( const StorageID & table_id_, diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 2cfe2de05db..63b9a443f95 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -45,7 +45,7 @@ public: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) override + size_t /*num_streams*/) override { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index e7146711c4a..52a26fe0cd6 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -51,8 +51,8 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con column_uncompressed_size.insertValue(info.uncompressed_size); column_compressed_size.insertValue(info.compressed_size); column_error.insertData(info.error_message.data(), info.error_message.size()); - column_start_time.insertValue(std::chrono::system_clock::to_time_t(info.start_time)); - column_end_time.insertValue(std::chrono::system_clock::to_time_t(info.end_time)); + column_start_time.insertValue(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); + column_end_time.insertValue(static_cast(std::chrono::system_clock::to_time_t(info.end_time))); }; for (const auto & entry : context->getBackupsWorker().getAllInfos()) diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 20cab9fdc47..18e7d269795 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -296,7 +296,7 @@ Pipe StorageSystemColumns::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index 542e4ce9661..7b4b5dd8fb3 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -24,7 +24,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index e725f8a03c6..be04261cc4e 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -171,7 +171,7 @@ Pipe StorageSystemDataSkippingIndices::read( ContextPtr context, QueryProcessingStage::Enum /* processed_stage */, size_t max_block_size, - unsigned int /* num_streams */) + size_t /* num_streams */) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.h b/src/Storages/System/StorageSystemDataSkippingIndices.h index 046855edd5e..8a1e8c159b4 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.h +++ b/src/Storages/System/StorageSystemDataSkippingIndices.h @@ -21,7 +21,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 574ce4f44c2..d094fefddcb 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -36,7 +36,7 @@ Pipe StorageSystemDetachedParts::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { StoragesInfoStream stream(query_info, context); diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index 23f27816138..20ac69f0eea 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -27,7 +27,7 @@ protected: ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, - unsigned /*num_streams*/) override; + size_t /*num_streams*/) override; }; } diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 6b50b00dc30..86b5eafdf72 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -37,7 +37,7 @@ Pipe StorageSystemDisks::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index cd1dc1a8bbf..06cc7e8d4e2 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -27,7 +27,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemErrors.cpp b/src/Storages/System/StorageSystemErrors.cpp index 4c8c8e60d69..bbe67bc0d21 100644 --- a/src/Storages/System/StorageSystemErrors.cpp +++ b/src/Storages/System/StorageSystemErrors.cpp @@ -51,7 +51,7 @@ void StorageSystemErrors::fillData(MutableColumns & res_columns, ContextPtr cont for (size_t i = 0, end = ErrorCodes::end(); i < end; ++i) { const auto & error = ErrorCodes::values[i].get(); - std::string_view name = ErrorCodes::getName(i); + std::string_view name = ErrorCodes::getName(static_cast(i)); if (name.empty()) continue; diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 523ec25b89c..a4823d7e5e1 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -129,7 +129,7 @@ Pipe StorageSystemNumbers::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 6bb89c0525e..acddac681ef 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -38,7 +38,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool hasEvenlyDistributedRead() const override { return true; } bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index f262c981b83..3091ffdb51a 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -27,7 +27,7 @@ Pipe StorageSystemOne::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index 35dba59a99e..d8a26f1def4 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -28,7 +28,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index bcfd670ece9..a0c022f5540 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -247,7 +247,7 @@ Pipe StorageSystemPartsBase::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { bool has_state_column = hasStateColumn(column_names, storage_snapshot); diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 8db96700e1a..cb6265d82df 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -63,7 +63,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/System/StorageSystemQuotaLimits.cpp b/src/Storages/System/StorageSystemQuotaLimits.cpp index 0261d3d2cd9..6cc269130a0 100644 --- a/src/Storages/System/StorageSystemQuotaLimits.cpp +++ b/src/Storages/System/StorageSystemQuotaLimits.cpp @@ -90,7 +90,7 @@ void StorageSystemQuotaLimits::fillData(MutableColumns & res_columns, ContextPtr auto add_row = [&](const String & quota_name, const Quota::Limits & limits) { column_quota_name.insertData(quota_name.data(), quota_name.length()); - column_duration.push_back(limits.duration.count()); + column_duration.push_back(static_cast(limits.duration.count())); column_is_randomized_interval.push_back(limits.randomize_interval); for (auto quota_type : collections::range(QuotaType::MAX)) diff --git a/src/Storages/System/StorageSystemQuotaUsage.cpp b/src/Storages/System/StorageSystemQuotaUsage.cpp index 6ba47a86dbf..5d047dc0359 100644 --- a/src/Storages/System/StorageSystemQuotaUsage.cpp +++ b/src/Storages/System/StorageSystemQuotaUsage.cpp @@ -162,8 +162,8 @@ void StorageSystemQuotaUsage::fillDataImpl( time_t end_time = std::chrono::system_clock::to_time_t(interval->end_of_interval); UInt32 duration = static_cast(std::chrono::duration_cast(interval->duration).count()); time_t start_time = end_time - duration; - column_start_time.getData().push_back(start_time); - column_end_time.getData().push_back(end_time); + column_start_time.getData().push_back(static_cast(start_time)); + column_end_time.getData().push_back(static_cast(end_time)); column_duration.getData().push_back(duration); column_start_time_null_map.push_back(false); column_end_time_null_map.push_back(false); diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index 17863fa7326..439883e038a 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -96,7 +96,10 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr cont column_key_types_offsets.push_back(column_key_types.size()); for (const auto & limits : all_limits) - column_durations.push_back(std::chrono::duration_cast(limits.duration).count()); + { + column_durations.push_back( + static_cast(std::chrono::duration_cast(limits.duration).count())); + } column_durations_offsets.push_back(column_durations.size()); auto apply_to_ast = apply_to.toASTWithNames(access_control); diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index de7e1911e44..20076603522 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -38,7 +38,7 @@ Pipe StorageSystemRemoteDataPaths::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.h b/src/Storages/System/StorageSystemRemoteDataPaths.h index f868ae60795..7e883d144ef 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.h +++ b/src/Storages/System/StorageSystemRemoteDataPaths.h @@ -21,7 +21,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; }; } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index e018ccc0733..0f7877a6e41 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -66,7 +66,7 @@ Pipe StorageSystemReplicas::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index fc7f8f15861..e9c29dec0fd 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index 549ce193137..df3d8b74e6e 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -258,7 +258,7 @@ Pipe StorageSystemStackTrace::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); @@ -324,7 +324,7 @@ Pipe StorageSystemStackTrace::read( sigval sig_value{}; sig_value.sival_int = sequence_num.load(std::memory_order_acquire); - if (0 != ::sigqueue(tid, sig, sig_value)) + if (0 != ::sigqueue(static_cast(tid), sig, sig_value)) { /// The thread may has been already finished. if (ESRCH == errno) diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h index dd613882e49..9133a86aa55 100644 --- a/src/Storages/System/StorageSystemStackTrace.h +++ b/src/Storages/System/StorageSystemStackTrace.h @@ -33,7 +33,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 832c430e2be..b42bd7859dd 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -44,7 +44,7 @@ Pipe StorageSystemStoragePolicies::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index 3340a4b5e62..afc729c8368 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -27,7 +27,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index e36b22a979e..83f922850a3 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -574,7 +574,7 @@ Pipe StorageSystemTables::read( ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 11ac75aab08..60b6144f122 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -25,7 +25,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool isSystemStorage() const override { return true; } }; diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 9e5836fa358..6c2ddd8d3dd 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -97,7 +97,7 @@ Pipe StorageSystemZeros::read( ContextPtr /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, - unsigned num_streams) + size_t num_streams) { storage_snapshot->check(column_names); diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h index 5461feacb6b..64443a3cfd6 100644 --- a/src/Storages/System/StorageSystemZeros.h +++ b/src/Storages/System/StorageSystemZeros.h @@ -29,7 +29,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; bool hasEvenlyDistributedRead() const override { return true; } bool isSystemStorage() const override { return true; } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index d34066de769..c0bc5ad8da9 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1018,7 +1018,8 @@ void StorageWindowView::threadFuncFireProc() return; std::lock_guard lock(fire_signal_mutex); - UInt32 timestamp_now = std::time(nullptr); + /// TODO: consider using time_t instead (for every timestamp in this class) + UInt32 timestamp_now = static_cast(std::time(nullptr)); while (next_fire_signal <= timestamp_now) { @@ -1078,7 +1079,7 @@ void StorageWindowView::read( ContextPtr local_context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, - const unsigned num_streams) + const size_t num_streams) { if (target_table_id.empty()) return; @@ -1118,7 +1119,7 @@ Pipe StorageWindowView::watch( ContextPtr local_context, QueryProcessingStage::Enum & processed_stage, size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const size_t /*num_streams*/) { ASTWatchQuery & query = typeid_cast(*query_info.query); @@ -1189,7 +1190,7 @@ StorageWindowView::StorageWindowView( target_table_id = has_inner_target_table ? StorageID(table_id_.database_name, generateTargetTableName(table_id_)) : query.to_table_id; if (is_proctime) - next_fire_signal = getWindowUpperBound(std::time(nullptr)); + next_fire_signal = getWindowUpperBound(static_cast(std::time(nullptr))); std::exchange(has_inner_table, true); if (!attach_) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 96c034b9590..6da34389e4d 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -150,7 +150,7 @@ public: ContextPtr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; Pipe watch( const Names & column_names, @@ -158,7 +158,7 @@ public: ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size, - unsigned num_streams) override; + size_t num_streams) override; std::pair getNewBlocks(UInt32 watermark); diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index fd474f037b3..4ecf29a05bd 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -43,7 +43,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { - fd = (type == Field::Types::Int64) ? literal->value.get() : literal->value.get(); + fd = static_cast( + (type == Field::Types::Int64) ? literal->value.get() : literal->value.get()); if (fd < 0) throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS); } diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index e3210164d79..891a6d3d1dd 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -107,8 +107,12 @@ protected: if (variant == LZ4_REFERENCE) { - if (LZ4_decompress_fast(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_decompressed) < 0) + if (LZ4_decompress_fast( + compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, + static_cast(size_decompressed)) < 0) + { throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS); + } } else LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat); From 089bf39132da67905d65ba22e4c43b50f50b5d48 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 15 Oct 2022 23:26:20 +0200 Subject: [PATCH 214/252] Fix -Wshorten-64-to-32 for musl On musl uint_fast32_t is uint32_t, while for glibc it is unsigned long long. Signed-off-by: Azat Khuzhin --- src/Common/PoolWithFailoverBase.h | 2 +- src/Common/mysqlxx/PoolWithFailover.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index f26648bd213..9939a5738da 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -353,7 +353,7 @@ struct PoolWithFailoverBase::PoolState } private: - std::minstd_rand rng = std::minstd_rand(randomSeed()); + std::minstd_rand rng = std::minstd_rand(static_cast(randomSeed())); }; template diff --git a/src/Common/mysqlxx/PoolWithFailover.cpp b/src/Common/mysqlxx/PoolWithFailover.cpp index f3dee1a6776..0ed231cdf8d 100644 --- a/src/Common/mysqlxx/PoolWithFailover.cpp +++ b/src/Common/mysqlxx/PoolWithFailover.cpp @@ -42,8 +42,8 @@ PoolWithFailover::PoolWithFailover( /// which triggers massive re-constructing of connection pools. /// The state of PRNGs like std::mt19937 is considered to be quite heavy /// thus here we attempt to optimize its construction. - static thread_local std::mt19937 rnd_generator( - std::hash{}(std::this_thread::get_id()) + std::clock()); + static thread_local std::mt19937 rnd_generator(static_cast( + std::hash{}(std::this_thread::get_id()) + std::clock())); for (auto & [_, replicas] : replicas_by_priority) { if (replicas.size() > 1) From 3ccd5e8a1897590098fd0c413fbfff7ac46ecb55 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 16:21:10 +0200 Subject: [PATCH 215/252] Fix -Wshorten-64-to-32 with USE_BLAKE3 Signed-off-by: Azat Khuzhin --- src/Functions/FunctionsHashing.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 33b6b075ebe..ec0a489471b 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -636,10 +637,10 @@ struct ImplBLAKE3 static void apply(const char * begin, const size_t size, unsigned char* out_char_data) { #if defined(MEMORY_SANITIZER) - auto err_msg = blake3_apply_shim_msan_compat(begin, size, out_char_data); + auto err_msg = blake3_apply_shim_msan_compat(begin, safe_cast(size), out_char_data); __msan_unpoison(out_char_data, length); #else - auto err_msg = blake3_apply_shim(begin, size, out_char_data); + auto err_msg = blake3_apply_shim(begin, safe_cast(size), out_char_data); #endif if (err_msg != nullptr) { From 8414ea06913c66db7a1ce19afe376d636e515c4f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 16:23:27 +0200 Subject: [PATCH 216/252] Fix -Wshorten-64-to-32 in self extracting executable Signed-off-by: Azat Khuzhin --- utils/self-extracting-executable/compressor.cpp | 6 +++--- utils/self-extracting-executable/decompressor.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/self-extracting-executable/compressor.cpp b/utils/self-extracting-executable/compressor.cpp index d8b4fdbb038..0c0c85838da 100644 --- a/utils/self-extracting-executable/compressor.cpp +++ b/utils/self-extracting-executable/compressor.cpp @@ -356,7 +356,7 @@ int compressFiles(const char* out_name, const char* exec, char* filenames[], int return 0; } -int copy_decompressor(int input_fd, int decompressor_size, int output_fd) +int copy_decompressor(int input_fd, ssize_t decompressor_size, int output_fd) { const ssize_t buf_size = 1ul<<19; auto buf_memory = std::make_unique(buf_size); @@ -411,7 +411,7 @@ int copy_decompressor_self(const char *self, int output_fd) } char * end = nullptr; - int decompressor_size = strtol(size_str, &end, 10); + ssize_t decompressor_size = strtol(size_str, &end, 10); if (*end != 0) { std::cerr << "Error: unable to extract decompressor" << std::endl; @@ -519,7 +519,7 @@ int main(int argc, char* argv[]) if (p[0] != 0) { char * end = nullptr; - level = strtol(p, &end, 10); + level = static_cast(strtol(p, &end, 10)); if (*end != 0) { std::cerr << "Error: level [" << p << "] is not valid" << std::endl; diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp index c997526d38d..ab71c96eca1 100644 --- a/utils/self-extracting-executable/decompressor.cpp +++ b/utils/self-extracting-executable/decompressor.cpp @@ -514,7 +514,7 @@ int main(int/* argc*/, char* argv[]) return 1; } - if (chmod(self, decompressed_umask)) + if (chmod(self, static_cast(decompressed_umask))) { perror("chmod"); return 1; From 15bcd6250aae073012d38fab84c6a927b576fb9f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 19:25:33 +0200 Subject: [PATCH 217/252] Fix -Wshorten-64-to-32 for darwin builds Signed-off-by: Azat Khuzhin --- src/Server/KeeperTCPHandler.cpp | 2 +- utils/self-extracting-executable/decompressor.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index ea91dbc3450..8abb66ac148 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -157,7 +157,7 @@ struct SocketInterruptablePollWrapper do { Poco::Timestamp start; - rc = ::poll(poll_buf, 2, remaining_time.totalMilliseconds()); + rc = ::poll(poll_buf, 2, static_cast(remaining_time.totalMilliseconds())); if (rc < 0 && errno == POCO_EINTR) { Poco::Timestamp end; diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp index ab71c96eca1..be25d315d68 100644 --- a/utils/self-extracting-executable/decompressor.cpp +++ b/utils/self-extracting-executable/decompressor.cpp @@ -329,7 +329,7 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress int read_exe_path(char *exe, size_t buf_sz) { - uint32_t size = buf_sz; + uint32_t size = static_cast(buf_sz); char apple[size]; if (_NSGetExecutablePath(apple, &size) != 0) return 1; From 7b28a80ad73448f499964e599f9f321c59d999a6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 19:27:31 +0200 Subject: [PATCH 218/252] Fix -Wshorten-64-to-32 for amd64sse2 build Signed-off-by: Azat Khuzhin --- base/base/StringRef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 779099a1573..a3e32ff5058 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -302,7 +302,7 @@ struct StringRefHash : CRC32Hash {}; struct CRC32Hash { - size_t operator() (StringRef /* x */) const + unsigned operator() (StringRef /* x */) const { throw std::logic_error{"Not implemented CRC32Hash without SSE"}; } From f14645a94fe4ca575330e589db5c0df568b8c600 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 20:30:16 +0200 Subject: [PATCH 219/252] tests/gtest_DecimalFunctions: fix operator<< Signed-off-by: Azat Khuzhin --- src/Core/tests/gtest_DecimalFunctions.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Core/tests/gtest_DecimalFunctions.cpp b/src/Core/tests/gtest_DecimalFunctions.cpp index aeaf3a1fac9..bac47855ac4 100644 --- a/src/Core/tests/gtest_DecimalFunctions.cpp +++ b/src/Core/tests/gtest_DecimalFunctions.cpp @@ -17,11 +17,6 @@ struct DecimalUtilsSplitAndCombineTestParam DecimalUtils::DecimalComponents components; }; -std::ostream & operator << (std::ostream & ostr, const DecimalUtilsSplitAndCombineTestParam & param) -{ - return ostr << param.description; -} - class DecimalUtilsSplitAndCombineTest : public ::testing::TestWithParam {}; @@ -151,6 +146,17 @@ TEST_P(DecimalUtilsSplitAndCombineForDateTime64Test, getFractionalPartDateTime64 } +namespace std +{ + +std::ostream & operator << (std::ostream & ostr, const DecimalUtilsSplitAndCombineTestParam & param) +{ + return ostr << param.description; +} + +} + + // Intentionally small values that fit into 32-bit in order to cover Decimal32, Decimal64 and Decimal128 with single set of data. INSTANTIATE_TEST_SUITE_P(Basic, DecimalUtilsSplitAndCombineTest, From 4412f6670968167ef3f3b3ae6d4553c1a7a12c37 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 21:13:49 +0200 Subject: [PATCH 220/252] tests/gtest_DecimalFunctions: fix due to incorrect cast in wide_integer Signed-off-by: Azat Khuzhin --- src/Core/tests/gtest_DecimalFunctions.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Core/tests/gtest_DecimalFunctions.cpp b/src/Core/tests/gtest_DecimalFunctions.cpp index bac47855ac4..a07240cf4a5 100644 --- a/src/Core/tests/gtest_DecimalFunctions.cpp +++ b/src/Core/tests/gtest_DecimalFunctions.cpp @@ -23,7 +23,7 @@ class DecimalUtilsSplitAndCombineTest : public ::testing::TestWithParam void testSplit(const DecimalUtilsSplitAndCombineTestParam & param) { - const DecimalType decimal_value(static_cast(param.decimal_value)); + const DecimalType decimal_value(static_cast(param.decimal_value.value)); const auto & actual_components = DecimalUtils::split(decimal_value, param.scale); EXPECT_EQ(param.components.whole, actual_components.whole); @@ -45,7 +45,7 @@ void testGetWhole(const DecimalUtilsSplitAndCombineTestParam & param) { EXPECT_EQ(param.components.whole, DecimalUtils::getWholePart( - DecimalType{static_cast(param.decimal_value)}, + DecimalType{static_cast(param.decimal_value.value)}, param.scale)); } @@ -54,7 +54,7 @@ void testGetFractional(const DecimalUtilsSplitAndCombineTestParam & param) { EXPECT_EQ(param.components.fractional, DecimalUtils::getFractionalPart( - DecimalType{static_cast(param.decimal_value)}, + DecimalType{static_cast(param.decimal_value.value)}, param.scale)); } From 0625a866f0b442af70148116437e38d2fde5b125 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Oct 2022 21:18:45 +0200 Subject: [PATCH 221/252] Add a note about incorrect wide_integer with Decimal Signed-off-by: Azat Khuzhin --- base/base/wide_integer_impl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index eb2edcb98ff..1b5f502722c 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -227,6 +227,8 @@ struct integer::_impl template __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept { + /// NOTE: this can be called with DB::Decimal, and in this case, result + /// will be wrong if constexpr (std::is_signed_v) return static_cast(f); else From b8ab6865616ec2e7237c6ae8e2a8d0053b23f289 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Oct 2022 11:42:53 +0200 Subject: [PATCH 222/252] tests/gtest_compressionCodec: fix for darwin Signed-off-by: Azat Khuzhin --- src/Compression/tests/gtest_compressionCodec.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index e5e50cd5320..a8ab53b7089 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -722,7 +722,7 @@ template struct RandomGenerator { explicit RandomGenerator(T seed = 0, T value_min = std::numeric_limits::min(), T value_max = std::numeric_limits::max()) - : random_engine(seed), + : random_engine(static_cast(seed)), distribution(value_min, value_max) { } From 784f7d988c0afd404c160bdaeaa3e5ce30c3db59 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Oct 2022 11:40:54 +0200 Subject: [PATCH 223/252] Fix -Wshorten-64-to-32 in examples Signed-off-by: Azat Khuzhin --- src/Common/examples/arena_with_free_lists.cpp | 4 ++-- src/Common/examples/array_cache.cpp | 6 +++--- src/Common/examples/average.cpp | 2 +- src/IO/examples/valid_utf8_perf.cpp | 2 +- src/IO/examples/zlib_ng_bug.cpp | 4 ++-- src/Interpreters/examples/hash_map_string_small.cpp | 2 +- src/Storages/examples/merge_selector.cpp | 2 +- src/Storages/examples/merge_selector2.cpp | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Common/examples/arena_with_free_lists.cpp b/src/Common/examples/arena_with_free_lists.cpp index dcb777abc42..4f209ccb5b2 100644 --- a/src/Common/examples/arena_with_free_lists.cpp +++ b/src/Common/examples/arena_with_free_lists.cpp @@ -176,11 +176,11 @@ struct Dictionary { case AttributeUnderlyingTypeTest::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingTypeTest::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingTypeTest::UInt32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; case AttributeUnderlyingTypeTest::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingTypeTest::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingTypeTest::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; - case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; + case AttributeUnderlyingTypeTest::Int32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; case AttributeUnderlyingTypeTest::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingTypeTest::Float32: std::get>(attribute.arrays)[idx] = static_cast(value.get()); break; case AttributeUnderlyingTypeTest::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; diff --git a/src/Common/examples/array_cache.cpp b/src/Common/examples/array_cache.cpp index f6bbbba4cbc..c1267d3c9cf 100644 --- a/src/Common/examples/array_cache.cpp +++ b/src/Common/examples/array_cache.cpp @@ -46,9 +46,9 @@ int main(int argc, char ** argv) size_t cache_size = DB::parse(argv[1]); size_t num_threads = DB::parse(argv[2]); - size_t num_iterations = DB::parse(argv[3]); + int num_iterations = DB::parse(argv[3]); size_t region_max_size = DB::parse(argv[4]); - size_t max_key = DB::parse(argv[5]); + int max_key = DB::parse(argv[5]); using Cache = ArrayCache; Cache cache(cache_size); @@ -60,7 +60,7 @@ int main(int argc, char ** argv) { pcg64 generator(randomSeed()); - for (size_t j = 0; j < num_iterations; ++j) + for (int j = 0; j < num_iterations; ++j) { size_t size = std::uniform_int_distribution(1, region_max_size)(generator); int key = std::uniform_int_distribution(1, max_key)(generator); diff --git a/src/Common/examples/average.cpp b/src/Common/examples/average.cpp index d2802717fc8..749bad848cc 100644 --- a/src/Common/examples/average.cpp +++ b/src/Common/examples/average.cpp @@ -425,7 +425,7 @@ Float NO_INLINE microsort(const PODArray & keys, const PODArray & for (size_t i = 1; i < HISTOGRAM_SIZE; ++i) positions[i] = positions[i - 1] + count[i - 1]; - for (size_t i = 0; i < size; ++i) + for (UInt32 i = 0; i < size; ++i) *positions[keys[i]]++ = i; /// Update states. diff --git a/src/IO/examples/valid_utf8_perf.cpp b/src/IO/examples/valid_utf8_perf.cpp index b95cdb2c27c..f42251188d9 100644 --- a/src/IO/examples/valid_utf8_perf.cpp +++ b/src/IO/examples/valid_utf8_perf.cpp @@ -10,7 +10,7 @@ int main(int argc, char ** argv) { int repeats = 1; if (argc >= 2) - repeats = std::stol(argv[1]); + repeats = static_cast(std::stol(argv[1])); std::string text((std::istreambuf_iterator(std::cin)), std::istreambuf_iterator()); diff --git a/src/IO/examples/zlib_ng_bug.cpp b/src/IO/examples/zlib_ng_bug.cpp index 9fe3c961913..60b47e2bcc5 100644 --- a/src/IO/examples/zlib_ng_bug.cpp +++ b/src/IO/examples/zlib_ng_bug.cpp @@ -23,9 +23,9 @@ int main(int, char **) throw std::runtime_error("Cannot deflateInit2"); zstr.next_in = in.data(); - zstr.avail_in = in.size(); + zstr.avail_in = static_cast(in.size()); zstr.next_out = out.data(); - zstr.avail_out = out.size(); + zstr.avail_out = static_cast(out.size()); while (zstr.avail_in > 0) if (Z_OK != deflate(&zstr, Z_NO_FLUSH)) diff --git a/src/Interpreters/examples/hash_map_string_small.cpp b/src/Interpreters/examples/hash_map_string_small.cpp index 4a96f717bf7..b58cdfbacd0 100644 --- a/src/Interpreters/examples/hash_map_string_small.cpp +++ b/src/Interpreters/examples/hash_map_string_small.cpp @@ -23,7 +23,7 @@ struct SmallStringRef { - UInt32 size = 0; + size_t size = 0; union { diff --git a/src/Storages/examples/merge_selector.cpp b/src/Storages/examples/merge_selector.cpp index 9433e38c648..a3b0d8a29ef 100644 --- a/src/Storages/examples/merge_selector.cpp +++ b/src/Storages/examples/merge_selector.cpp @@ -66,7 +66,7 @@ int main(int, char **) size_t sum_merged_size = 0; size_t start_index = 0; - size_t max_level = 0; + unsigned max_level = 0; bool in_range = false; for (size_t i = 0, size = parts.size(); i < size; ++i) diff --git a/src/Storages/examples/merge_selector2.cpp b/src/Storages/examples/merge_selector2.cpp index d9d08a84bcf..029da26fad6 100644 --- a/src/Storages/examples/merge_selector2.cpp +++ b/src/Storages/examples/merge_selector2.cpp @@ -72,7 +72,7 @@ int main(int, char **) size_t sum_merged_size = 0; size_t start_index = 0; - size_t max_level = 0; + unsigned max_level = 0; bool in_range = false; for (size_t i = 0, size = parts.size(); i < size; ++i) From 2474303aef2e7a1aa9a8f2e5705f5b868abbe2b5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Oct 2022 12:54:23 +0200 Subject: [PATCH 224/252] tests/gtest_compressionCodec: Fix UBSAN report about signed integer overflow UBSAN report: $ UBSAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer-14 UBSAN_OPTIONS=print_stacktrace=1 ./unit_tests_dbms --gtest_filter=*Gorilla* ../src/Compression/tests/gtest_compressionCodec.cpp:1216:47: runtime error: signed integer overflow: 23 * 100000000 cannot be represented in type 'int' #0 0x14f67fd1 in auto (anonymous namespace)::$_6::operator()(int) const::'lambda'(auto)::operator()(auto) const build_docker/../src/Compression/tests/gtest_compressionCodec.cpp:1216:47 #1 0x14f67fd1 in (anonymous namespace)::CodecTestSequence (anonymous namespace)::generateSeq((anonymous namespace)::$_6::operator()(int) const::'lambda'(auto), char const*, int, int) build_docker/../src/Compression/tests/gtest_compressionCodec.cpp:394:36 #2 0x14f67fd1 in auto (anonymous namespace)::GCompatibilityTestSequence() build_docker/../src/Compression/tests/gtest_compressionCodec.cpp:1224:12 #3 0x14f3c7f3 in (anonymous namespace)::gtest_GorillaCodecTestCompatibility_EvalGenerator_() build_docker/../src/Compression/tests/gtest_compressionCodec.cpp:1227:1 #4 0x14f6bdb5 in testing::internal::ParameterizedTestSuiteInfo<(anonymous namespace)::CodecTestCompatibility>::RegisterTests() build_docker/../contrib/googletest/googletest/include/gtest/internal/gtest-param-util.h:553:45 #5 0x27d87988 in testing::internal::ParameterizedTestSuiteRegistry::RegisterTests() build_docker/../contrib/googletest/googletest/include/gtest/internal/gtest-param-util.h:726:24 #6 0x27d87988 in testing::internal::UnitTestImpl::RegisterParameterizedTests() build_docker/../contrib/googletest/googletest/src/gtest.cc:2805:34 #7 0x27d87988 in testing::internal::UnitTestImpl::PostFlagParsingInit() build_docker/../contrib/googletest/googletest/src/gtest.cc:5492:5 #8 0x27d9d002 in void testing::internal::InitGoogleTestImpl(int*, char**) build_docker/../contrib/googletest/googletest/src/gtest.cc:6499:22 #9 0x14fd5495 in main build_docker/../src/Coordination/tests/gtest_coordination.cpp:2189:5 #10 0x7f8c29005209 (/lib/x86_64-linux-gnu/libc.so.6+0x29209) (BuildId: 71a7c7b97bc0b3e349a3d8640252655552082bf5) #11 0x7f8c290052bb in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x292bb) (BuildId: 71a7c7b97bc0b3e349a3d8640252655552082bf5) #12 0x14ce356d in _start (/work1/azat/tmp/42190/unit_tests_dbms+0x14ce356d) (BuildId: 482550e3f8d45f06e8c7f8147f427ee798c1f645) SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior ../src/Compression/tests/gtest_compressionCodec.cpp:1216:47 in Signed-off-by: Azat Khuzhin --- src/Compression/tests/gtest_compressionCodec.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index a8ab53b7089..b1b68057c65 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -1213,7 +1213,13 @@ auto PrimesWithMultiplierGenerator = [](int multiplier = 1) static const size_t count = sizeof(vals)/sizeof(vals[0]); using T = decltype(i); - return static_cast(vals[i % count] * static_cast(multiplier)); + UInt64 ret; + do { + ret = static_cast(vals[i % count]) * multiplier; + --i; + } while (ret > std::numeric_limits::max()); + + return static_cast(ret); }; }; From 3d400068fd96139ace8e7ba26d52de3c85802865 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Oct 2022 13:12:39 +0200 Subject: [PATCH 225/252] tests/gtest_compressionCodec: fix UBSAN report to avoid test failure Signed-off-by: Azat Khuzhin --- src/Compression/tests/gtest_compressionCodec.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index b1b68057c65..c203b3e9d5c 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -1212,14 +1212,7 @@ auto PrimesWithMultiplierGenerator = [](int multiplier = 1) }; static const size_t count = sizeof(vals)/sizeof(vals[0]); - using T = decltype(i); - UInt64 ret; - do { - ret = static_cast(vals[i % count]) * multiplier; - --i; - } while (ret > std::numeric_limits::max()); - - return static_cast(ret); + return static_cast(vals[i % count]) * multiplier; }; }; From 5094c0dd6df9749336a362a69c667227bc18a678 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 18 Oct 2022 08:37:36 +0200 Subject: [PATCH 226/252] Fix clang-tidy performance-inefficient-vector-operation By some reason it appears only after static_cast<> was added [1]: /build/src/Processors/Formats/Impl/AvroRowInputFormat.cpp Oct 18 01:03:56 /build/src/Processors/Formats/Impl/AvroRowInputFormat.cpp:351:21: error: 'push_back' is called inside a loop; consider pre-allocating the container capacity before the loop [performance-inefficient-vector-operation,-warnings-as-errors] Oct 18 01:03:56 symbols.push_back(root_node->nameAt(i)); Oct 18 01:03:56 ^ Oct 18 01:03:56 /build/src/Processors/Formats/Impl/AvroRowInputFormat.cpp:511:17: error: 'push_back' is called inside a loop; consider pre-allocating the container capacity before the loop [performance-inefficient-vector-operation,-warnings-as-errors] Oct 18 01:03:56 union_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); Oct 18 01:03:56 ^ Oct 18 01:03:56 /build/src/Processors/Formats/Impl/AvroRowInputFormat.cpp:552:17: error: 'push_back' is called inside a loop; consider pre-allocating the container capacity before the loop [performance-inefficient-vector-operation,-warnings-as-errors] Oct 18 01:03:56 field_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); Oct 18 01:03:56 ^ Oct 18 01:03:56 197965 warnings generated. [1]: https://s3.amazonaws.com/clickhouse-builds/42190/453d91fa3539882dcef1d5ecd5097747499572d8/clickhouse_special_build_check/report.html Signed-off-by: Azat Khuzhin --- src/Processors/Formats/Impl/AvroRowInputFormat.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index abb23d68334..80183838277 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -346,6 +346,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node if (target.isString()) { std::vector symbols; + symbols.reserve(root_node->names()); for (int i = 0; i < static_cast(root_node->names()); ++i) { symbols.push_back(root_node->nameAt(i)); @@ -506,6 +507,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_UNION: { std::vector union_skip_fns; + union_skip_fns.reserve(root_node->leaves()); for (int i = 0; i < static_cast(root_node->leaves()); ++i) { union_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); @@ -547,6 +549,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) case avro::AVRO_RECORD: { std::vector field_skip_fns; + field_skip_fns.reserve(root_node->leaves()); for (int i = 0; i < static_cast(root_node->leaves()); ++i) { field_skip_fns.push_back(createSkipFn(root_node->leafAt(i))); From 0365bc56aaef94bfc530121ae894e597d0aae99a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 20 Oct 2022 17:03:07 +0200 Subject: [PATCH 227/252] Add backward compatiblity check for some functions * State of aggregate functions This test goes though all aggregate functions that: - has only one argument - support string as an argument And do a simple check by creating the aggregate state with one string. Yes this is not covers everything (does not cover functions with different number of arguments, types, different states in case of multiple values - uniqCombined, but as for uniqCombined it will be checked via uniqHLL12), but at least something. And about type, String had been selected, since it more likely that there will be used some hash function internally. * Function(String) Check every function which has only one argument, and it's type is String. Signed-off-by: Azat Khuzhin --- .../test_functions.py | 224 ++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 tests/integration/test_backward_compatibility/test_functions.py diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py new file mode 100644 index 00000000000..fe1c0ea7108 --- /dev/null +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -0,0 +1,224 @@ +# pylint: disable=unused-argument +# pylint: disable=line-too-long +# pylint: disable=call-var-from-loop +# pylint: disable=redefined-outer-name + +import logging +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) +upstream = cluster.add_instance("upstream") +backward = cluster.add_instance( + "backward", + image="clickhouse/clickhouse-server", + tag="22.9", + with_installed_binary=True, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_aggregate_states(start_cluster): + """ + This test goes though all aggregate functions that: + - has only one argument + - support string as an argument + + And do a simple check by creating the aggregate state with one string. + + Yes this is not covers everything (does not cover functions with + different number of arguments, types, different states in case of + multiple values - uniqCombined, but as for uniqCombined it will be + checked via uniqHLL12), but at least something. + + And about type, String had been selected, since it more likely that + there will be used some hash function internally. + """ + + aggregate_functions = backward.query( + """ + SELECT if(NOT empty(alias_to), alias_to, name) + FROM system.functions + WHERE is_aggregate = 1 + """ + ) + aggregate_functions = aggregate_functions.strip().split("\n") + aggregate_functions = map(lambda x: x.strip(), aggregate_functions) + + aggregate_functions = list(aggregate_functions) + logging.info("Got %s aggregate functions", len(aggregate_functions)) + + skipped = 0 + failed = 0 + passed = 0 + + def get_aggregate_state_hex(node, function_name): + return node.query( + f"select hex(initializeAggregation('{function_name}State', 'foo'))" + ).strip() + + for aggregate_function in aggregate_functions: + logging.info("Checking %s", aggregate_function) + + try: + backward_state = get_aggregate_state_hex(backward, aggregate_function) + except QueryRuntimeException as e: + error_message = str(e) + allowed_errors = [ + "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", + "ILLEGAL_TYPE_OF_ARGUMENT", + # sequenceNextNode() and friends + "UNKNOWN_AGGREGATE_FUNCTION", + # Function X takes exactly one parameter: + # The function 'X' can only be used as a window function + "BAD_ARGUMENTS", + # aggThrow + "AGGREGATE_FUNCTION_THROW", + ] + if any(map(lambda x: x in error_message, allowed_errors)): + logging.info("Skipping %s", aggregate_function) + skipped += 1 + continue + logging.exception("Failed %s", function) + failed += 1 + continue + + upstream_state = get_aggregate_state_hex(upstream, aggregate_function) + if upstream_state != backward_state: + logging.info( + "Failed %s, %s (backward) != %s (upstream)", + aggregate_function, + backward_state, + upstream_state, + ) + failed += 1 + else: + logging.info("OK %s", aggregate_function) + passed += 1 + + logging.info( + "Aggregate functions: %s, Failed: %s, skipped: %s, passed: %s", + len(aggregate_functions), + failed, + skipped, + passed, + ) + assert failed == 0 + assert passed > 0 + assert failed + passed + skipped == len(aggregate_functions) + + +def test_string_functions(start_cluster): + functions = backward.query( + """ + SELECT if(NOT empty(alias_to), alias_to, name) + FROM system.functions + WHERE is_aggregate = 0 + """ + ) + functions = functions.strip().split("\n") + functions = map(lambda x: x.strip(), functions) + + excludes = [ + "rand", + "rand64", + "randConstant", + "generateUUIDv4", + # Syntax error otherwise + "position", + "substring", + "CAST", + # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument + ] + functions = filter(lambda x: x not in excludes, functions) + + functions = list(functions) + logging.info("Got %s functions", len(functions)) + + skipped = 0 + failed = 0 + passed = 0 + + def get_function_value(node, function_name, value="foo"): + return node.query(f"select {function_name}('{value}')").strip() + + for function in functions: + logging.info("Checking %s", function) + + try: + backward_value = get_function_value(backward, function) + except QueryRuntimeException as e: + error_message = str(e) + allowed_errors = [ + # Messages + "Cannot load time zone ", + "No macro ", + "Should start with ", # POINT/POLYGON/... + "Cannot read input: expected a digit but got something else:", + # ErrorCodes + "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", + "ILLEGAL_TYPE_OF_ARGUMENT", + "TOO_FEW_ARGUMENTS_FOR_FUNCTION", + "DICTIONARIES_WAS_NOT_LOADED", + "CANNOT_PARSE_UUID", + "CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING", + "ILLEGAL_COLUMN", + "TYPE_MISMATCH", + "SUPPORT_IS_DISABLED", + "CANNOT_PARSE_DATE", + "UNKNOWN_SETTING", + "CANNOT_PARSE_BOOL", + "FILE_DOESNT_EXIST", + "NOT_IMPLEMENTED", + "BAD_GET", + "UNKNOWN_TYPE", + # addressToSymbol + "FUNCTION_NOT_ALLOWED", + # Date functions + "CANNOT_PARSE_TEXT", + "CANNOT_PARSE_DATETIME", + # Function X takes exactly one parameter: + # The function 'X' can only be used as a window function + "BAD_ARGUMENTS", + ] + if any(map(lambda x: x in error_message, allowed_errors)): + logging.info("Skipping %s", function) + skipped += 1 + continue + logging.exception("Failed %s", function) + failed += 1 + continue + + upstream_value = get_function_value(upstream, function) + if upstream_value != backward_value: + logging.info( + "Failed %s, %s (backward) != %s (upstream)", + function, + backward_value, + upstream_value, + ) + failed += 1 + else: + logging.info("OK %s", function) + passed += 1 + + logging.info( + "Functions: %s, failed: %s, skipped: %s, passed: %s", + len(functions), + failed, + skipped, + passed, + ) + assert failed == 0 + assert passed > 0 + assert failed + passed + skipped == len(functions) From 1e739791a4c8615962cac943b51e6d7c7f9db083 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 21 Oct 2022 06:20:40 +0200 Subject: [PATCH 228/252] Fix type error in itoa (wrong static_cast<>) Signed-off-by: Azat Khuzhin --- base/base/itoa.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/base/itoa.h b/base/base/itoa.h index 809b7c86c42..dd3e3cc96fe 100644 --- a/base/base/itoa.h +++ b/base/base/itoa.h @@ -122,7 +122,7 @@ QuotientAndRemainder static inline split(UnsignedOfSize value) constexpr DivisionBy10PowN division; UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); + UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); return {quotient, remainder}; } From 13d31eefbc39ae214f4aea2eb0ecec1d3356a166 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 21 Oct 2022 14:02:52 +0200 Subject: [PATCH 229/252] Replicated merge tree polishing + make read-only members const + reduce memory allocations --- src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++---- src/Storages/StorageReplicatedMergeTree.h | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7a2ff56a782..3db93ac325d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7735,7 +7735,7 @@ std::pair getParentLockedBlobs(zkutil::ZooKeeperPtr zookeeper_ptr std::pair StorageReplicatedMergeTree::unlockSharedDataByID( String part_id, const String & table_uuid, const String & part_name, - const String & replica_name_, std::string disk_type, zkutil::ZooKeeperPtr zookeeper_ptr, const MergeTreeSettings & settings, + const String & replica_name_, const std::string & disk_type, zkutil::ZooKeeperPtr zookeeper_ptr, const MergeTreeSettings & settings, Poco::Logger * logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version) { boost::replace_all(part_id, "/", "_"); @@ -7956,7 +7956,7 @@ String StorageReplicatedMergeTree::getSharedDataReplica( Strings StorageReplicatedMergeTree::getZeroCopyPartPath( - const MergeTreeSettings & settings, std::string disk_type, const String & table_uuid, + const MergeTreeSettings & settings, const std::string & disk_type, const String & table_uuid, const String & part_name, const String & zookeeper_path_old) { Strings res; @@ -7964,11 +7964,11 @@ Strings StorageReplicatedMergeTree::getZeroCopyPartPath( String zero_copy = fmt::format("zero_copy_{}", disk_type); String new_path = fs::path(settings.remote_fs_zero_copy_zookeeper_path.toString()) / zero_copy / table_uuid / part_name; - res.push_back(new_path); + res.push_back(std::move(new_path)); if (settings.remote_fs_zero_copy_path_compatible_mode && !zookeeper_path_old.empty()) { /// Compatibility mode for cluster with old and new versions String old_path = fs::path(zookeeper_path_old) / zero_copy / "shared" / part_name; - res.push_back(old_path); + res.push_back(std::move(old_path)); } return res; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index e10ffcce22c..0e7007c162b 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -279,7 +279,7 @@ public: /// Return true if data unlocked /// Return false if data is still used by another node static std::pair unlockSharedDataByID(String part_id, const String & table_uuid, const String & part_name, const String & replica_name_, - std::string disk_type, zkutil::ZooKeeperPtr zookeeper_, const MergeTreeSettings & settings, Poco::Logger * logger, + const std::string & disk_type, zkutil::ZooKeeperPtr zookeeper_, const MergeTreeSettings & settings, Poco::Logger * logger, const String & zookeeper_path_old, MergeTreeDataFormatVersion data_format_version); /// Fetch part only if some replica has it on shared storage like S3 @@ -288,7 +288,7 @@ public: /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, DataSourceType data_source_type) const; - inline String getReplicaName() const { return replica_name; } + inline const String & getReplicaName() const { return replica_name; } /// Restores table metadata if ZooKeeper lost it. /// Used only on restarted readonly replicas (not checked). All active (Active) parts are moved to detached/ @@ -310,9 +310,9 @@ public: bool createEmptyPartInsteadOfLost(zkutil::ZooKeeperPtr zookeeper, const String & lost_part_name); // Return default or custom zookeeper name for table - String getZooKeeperName() const { return zookeeper_name; } + const String & getZooKeeperName() const { return zookeeper_name; } - String getZooKeeperPath() const { return zookeeper_path; } + const String & getZooKeeperPath() const { return zookeeper_path; } // Return table id, common for different replicas String getTableSharedID() const override; @@ -382,10 +382,10 @@ private: std::optional has_metadata_in_zookeeper; static constexpr auto default_zookeeper_name = "default"; - String zookeeper_name; - String zookeeper_path; - String replica_name; - String replica_path; + const String zookeeper_name; + const String zookeeper_path; + const String replica_name; + const String replica_path; /** /replicas/me/is_active. */ @@ -829,7 +829,7 @@ private: PartitionBlockNumbersHolder allocateBlockNumbersInAffectedPartitions( const MutationCommands & commands, ContextPtr query_context, const zkutil::ZooKeeperPtr & zookeeper) const; - static Strings getZeroCopyPartPath(const MergeTreeSettings & settings, std::string disk_type, const String & table_uuid, + static Strings getZeroCopyPartPath(const MergeTreeSettings & settings, const std::string & disk_type, const String & table_uuid, const String & part_name, const String & zookeeper_path_old); static void createZeroCopyLockNode( From 851f556d65a2fb31a551868afd883554d45f745d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 21 Oct 2022 14:35:37 +0200 Subject: [PATCH 230/252] Remove unused parameter --- src/Storages/Freeze.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 5 +++-- src/Storages/StorageReplicatedMergeTree.h | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp index a2f0395b001..74adf3de0ae 100644 --- a/src/Storages/Freeze.cpp +++ b/src/Storages/Freeze.cpp @@ -194,7 +194,7 @@ bool Unfreezer::removeFreezedPart(DiskPtr disk, const String & path, const Strin if (meta.load(disk, path)) { FreezeMetaData::clean(disk, path); - return StorageReplicatedMergeTree::removeSharedDetachedPart(disk, path, part_name, meta.table_shared_id, meta.zookeeper_name, meta.replica_name, "", local_context, zookeeper); + return StorageReplicatedMergeTree::removeSharedDetachedPart(disk, path, part_name, meta.table_shared_id, meta.replica_name, "", local_context, zookeeper); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3db93ac325d..37bed80bfb4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -179,6 +179,7 @@ namespace ActionLocks static const auto QUEUE_UPDATE_ERROR_SLEEP_MS = 1 * 1000; static const auto MUTATIONS_FINALIZING_SLEEP_MS = 1 * 1000; static const auto MUTATIONS_FINALIZING_IDLE_SLEEP_MS = 5 * 1000; +const String StorageReplicatedMergeTree::default_zookeeper_name = "default"; void StorageReplicatedMergeTree::setZooKeeper() { @@ -8361,7 +8362,7 @@ bool StorageReplicatedMergeTree::removeDetachedPart(DiskPtr disk, const String & if (disk->supportZeroCopyReplication()) { String table_id = getTableSharedID(); - return removeSharedDetachedPart(disk, path, part_name, table_id, zookeeper_name, replica_name, zookeeper_path, getContext(), current_zookeeper); + return removeSharedDetachedPart(disk, path, part_name, table_id, replica_name, zookeeper_path, getContext(), current_zookeeper); } disk->removeRecursive(path); @@ -8371,7 +8372,7 @@ bool StorageReplicatedMergeTree::removeDetachedPart(DiskPtr disk, const String & bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const String & path, const String & part_name, const String & table_uuid, - const String &, const String & detached_replica_name, const String & detached_zookeeper_path, ContextPtr local_context, const zkutil::ZooKeeperPtr & zookeeper) + const String & detached_replica_name, const String & detached_zookeeper_path, const ContextPtr & local_context, const zkutil::ZooKeeperPtr & zookeeper) { bool keep_shared = false; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 0e7007c162b..2f332254f2e 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -320,13 +320,13 @@ public: /// Returns the same as getTableSharedID(), but extracts it from a create query. static std::optional tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); - static String getDefaultZooKeeperName() { return default_zookeeper_name; } + static const String & getDefaultZooKeeperName() { return default_zookeeper_name; } /// Check if there are new broken disks and enqueue part recovery tasks. void checkBrokenDisks(); static bool removeSharedDetachedPart(DiskPtr disk, const String & path, const String & part_name, const String & table_uuid, - const String & zookeeper_name, const String & replica_name, const String & zookeeper_path, ContextPtr local_context, const zkutil::ZooKeeperPtr & zookeeper); + const String & replica_name, const String & zookeeper_path, const ContextPtr & local_context, const zkutil::ZooKeeperPtr & zookeeper); bool canUseZeroCopyReplication() const; private: @@ -381,7 +381,7 @@ private: /// If false - ZooKeeper is available, but there is no table metadata. It's safe to drop table in this case. std::optional has_metadata_in_zookeeper; - static constexpr auto default_zookeeper_name = "default"; + static const String default_zookeeper_name; const String zookeeper_name; const String zookeeper_path; const String replica_name; From 1343f62e8fc4e15efe4dc928f6a14bf86be09c1e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 21 Oct 2022 15:59:27 +0300 Subject: [PATCH 231/252] Update test.py --- .../test_replicated_merge_tree_hdfs_zero_copy/test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py index 7d65bed3901..1f81421f93c 100644 --- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py @@ -1,8 +1,14 @@ +import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/42561 + +pytestmark = pytest.mark.skip + import logging from string import Template import time -import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry From d285c9e7ff9dbce5f94a6910afd3b04cf55752ea Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 21 Oct 2022 14:04:01 +0000 Subject: [PATCH 232/252] move libfuzzer to dedicated contrib/libfuzzer-cmake --- contrib/CMakeLists.txt | 1 + contrib/libfuzzer-cmake/CMakeLists.txt | 35 ++++++++++++++++++++++ contrib/llvm-project-cmake/CMakeLists.txt | 36 ----------------------- 3 files changed, 36 insertions(+), 36 deletions(-) create mode 100644 contrib/libfuzzer-cmake/CMakeLists.txt diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f914c0d2d3f..c2b16ae6dd6 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -114,6 +114,7 @@ if (ENABLE_TESTS) endif() add_contrib (llvm-project-cmake llvm-project) +add_contrib (libfuzzer-cmake llvm-project) add_contrib (libxml2-cmake libxml2) add_contrib (aws-s3-cmake aws diff --git a/contrib/libfuzzer-cmake/CMakeLists.txt b/contrib/libfuzzer-cmake/CMakeLists.txt new file mode 100644 index 00000000000..ff3a91d828e --- /dev/null +++ b/contrib/libfuzzer-cmake/CMakeLists.txt @@ -0,0 +1,35 @@ +set(COMPILER_RT_FUZZER_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib/fuzzer") + +set(FUZZER_SRCS + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerCrossOver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDataFlowTrace.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDriver.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsDlsym.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWeak.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCounters.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerFork.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIO.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOWindows.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerLoop.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMerge.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMutate.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerSHA1.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerTracePC.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtil.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilDarwin.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilFuchsia.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilLinux.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilPosix.cpp" + "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilWindows.cpp" +) + +add_library(_fuzzer_no_main STATIC ${FUZZER_SRCS}) +add_library(ch_contrib::fuzzer_no_main ALIAS _fuzzer_no_main) + +add_library(_fuzzer STATIC ${FUZZER_SRCS} "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMain.cpp") +add_library(ch_contrib::fuzzer ALIAS _fuzzer) + diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index 57995319cdd..6a73ae0f0c6 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,39 +1,3 @@ -set(COMPILER_RT_FUZZER_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/compiler-rt/lib/fuzzer") - -set(FUZZER_SRCS - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerCrossOver.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDataFlowTrace.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerDriver.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsDlsym.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWeak.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtFunctionsWindows.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCounters.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersDarwin.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerExtraCountersWindows.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerFork.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIO.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOPosix.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerIOWindows.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerLoop.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMerge.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMutate.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerSHA1.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerTracePC.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtil.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilDarwin.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilFuchsia.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilLinux.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilPosix.cpp" - "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerUtilWindows.cpp" -) - -add_library(_fuzzer_no_main STATIC ${FUZZER_SRCS}) -add_library(ch_contrib::fuzzer_no_main ALIAS _fuzzer_no_main) - -add_library(_fuzzer STATIC ${FUZZER_SRCS} "${COMPILER_RT_FUZZER_SRC_DIR}/FuzzerMain.cpp") -add_library(ch_contrib::fuzzer ALIAS _fuzzer) - - if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() From 95943dbdda66382b711e45d70cc529431c9aed74 Mon Sep 17 00:00:00 2001 From: alexeyerm <63284266+alexeyerm@users.noreply.github.com> Date: Fri, 21 Oct 2022 19:44:35 +0300 Subject: [PATCH 233/252] fix transform() example --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index b80d75e3611..02149e16988 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -571,7 +571,7 @@ Example: ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com'] AS s, count() AS c FROM test.hits GROUP BY domain(Referer) From 930c60d71e24333764ba6d6f04ed785f17457e66 Mon Sep 17 00:00:00 2001 From: alexeyerm <63284266+alexeyerm@users.noreply.github.com> Date: Fri, 21 Oct 2022 19:45:37 +0300 Subject: [PATCH 234/252] fix transform() example --- docs/ru/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 5c8584cd2a0..b3786287a62 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -568,7 +568,7 @@ ORDER BY c DESC ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com'] AS s, count() AS c FROM test.hits GROUP BY domain(Referer) From d25ecb9f0bbc4357cddc7de2709f7bb642f0fba5 Mon Sep 17 00:00:00 2001 From: alexeyerm <63284266+alexeyerm@users.noreply.github.com> Date: Fri, 21 Oct 2022 19:47:38 +0300 Subject: [PATCH 235/252] fix transform() example --- docs/zh/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index a475420ba64..1dc8c1c2155 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -237,7 +237,7 @@ ORDER BY c DESC ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com'] AS s, count() AS c FROM test.hits GROUP BY domain(Referer) From 9abfe76fbbdd7de6967acb26415be644074e35c3 Mon Sep 17 00:00:00 2001 From: alexeyerm <63284266+alexeyerm@users.noreply.github.com> Date: Fri, 21 Oct 2022 20:08:44 +0300 Subject: [PATCH 236/252] doc fix fix --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 02149e16988..6490d4c2272 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -571,7 +571,7 @@ Example: ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com'] AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c FROM test.hits GROUP BY domain(Referer) From 4b4d1e8a5d056c5f39bcc34340bcc3fadde14900 Mon Sep 17 00:00:00 2001 From: alexeyerm <63284266+alexeyerm@users.noreply.github.com> Date: Fri, 21 Oct 2022 20:09:19 +0300 Subject: [PATCH 237/252] doc fix fix --- docs/ru/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index b3786287a62..af21ccd6bed 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -568,7 +568,7 @@ ORDER BY c DESC ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com'] AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c FROM test.hits GROUP BY domain(Referer) From e1fccbd1610881edff840f62e7f887d8b158b83b Mon Sep 17 00:00:00 2001 From: alexeyerm <63284266+alexeyerm@users.noreply.github.com> Date: Fri, 21 Oct 2022 20:09:46 +0300 Subject: [PATCH 238/252] doc fix fix --- docs/zh/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index 1dc8c1c2155..62d2a377ff1 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -237,7 +237,7 @@ ORDER BY c DESC ``` sql SELECT - transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com'] AS s, + transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s, count() AS c FROM test.hits GROUP BY domain(Referer) From 905a95e1660f05054d0e521563847b73cc4bb4d1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 21 Oct 2022 21:16:13 +0200 Subject: [PATCH 239/252] Review fixes Signed-off-by: Azat Khuzhin --- programs/odbc-bridge/ColumnInfoHandler.cpp | 2 +- src/Client/ClientBase.cpp | 2 +- src/Client/ClientBase.h | 2 +- src/Columns/ColumnCompressed.cpp | 8 ++++---- src/Columns/ColumnLowCardinality.cpp | 2 +- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 2 +- src/Coordination/KeeperStorage.cpp | 4 ++-- src/Coordination/ZooKeeperDataReader.cpp | 2 +- src/Functions/FunctionsLogical.h | 2 +- src/Functions/ReplaceRegexpImpl.h | 2 +- src/IO/Archives/ZipArchiveReader.cpp | 2 +- src/IO/examples/zlib_ng_bug.cpp | 4 ++-- src/Interpreters/RowRefs.h | 2 +- src/Storages/Hive/StorageHive.cpp | 2 +- 14 files changed, 19 insertions(+), 19 deletions(-) diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 7fa51fc8fcd..bf11947d436 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -186,7 +186,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ WriteBufferFromHTTPServerResponse out( response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, - static_cast(keep_alive_timeout)); + keep_alive_timeout); try { writeStringBinary(columns.toString(), out); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index ee5c17ee8f2..5d7de8ec799 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -337,7 +337,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu /// Consumes trailing semicolons and tries to consume the same-line trailing comment. -void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, int max_parser_depth) +void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth) { // We have to skip the trailing semicolon that might be left // after VALUES parsing or just after a normal semicolon-terminated query. diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 01edb65e135..27f29f24949 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -72,7 +72,7 @@ protected: void processParsedSingleQuery(const String & full_query, const String & query_to_execute, ASTPtr parsed_query, std::optional echo_query_ = {}, bool report_error = false); - static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, int max_parser_depth); + static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth); ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const; static void setupSignalHandler(); diff --git a/src/Columns/ColumnCompressed.cpp b/src/Columns/ColumnCompressed.cpp index 3560e9d7e1c..cdf604d89f7 100644 --- a/src/Columns/ColumnCompressed.cpp +++ b/src/Columns/ColumnCompressed.cpp @@ -27,8 +27,8 @@ std::shared_ptr> ColumnCompressed::compressBuffer(const void * data, si auto compressed_size = LZ4_compress_default( reinterpret_cast(data), compressed.data(), - static_cast(data_size), - static_cast(max_dest_size)); + static_cast(data_size), + static_cast(max_dest_size)); if (compressed_size <= 0) throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column"); @@ -51,8 +51,8 @@ void ColumnCompressed::decompressBuffer( auto processed_size = LZ4_decompress_safe( reinterpret_cast(compressed_data), reinterpret_cast(decompressed_data), - static_cast(compressed_size), - static_cast(decompressed_size)); + static_cast(compressed_size), + static_cast(decompressed_size)); if (processed_size <= 0) throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column"); diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 0ac5a2c31bb..47c64242896 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -46,7 +46,7 @@ namespace HashMap hash_map; for (auto val : index) - hash_map.insert({val, static_cast(hash_map.size())}); + hash_map.insert({val, static_cast(hash_map.size())}); auto res_col = ColumnVector::create(); auto & data = res_col->getData(); diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index f555ebb132e..d49800b1abe 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -724,7 +724,7 @@ void ZooKeeperRequest::createLogElements(LogElements & elems) const elem.has_watch = has_watch; elem.op_num = static_cast(getOpNum()); elem.path = getPath(); - elem.request_idx = static_cast(elems.size()) - 1; + elem.request_idx = static_cast(elems.size() - 1); } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index d6b75baa90e..875dccfd705 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -922,7 +922,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr stat.version = 0; stat.aversion = 0; stat.cversion = 0; - stat.dataLength = static_cast(request.data.length()); + stat.dataLength = static_cast(request.data.length()); stat.ephemeralOwner = request.is_ephemeral ? session_id : 0; new_deltas.emplace_back( @@ -1222,7 +1222,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce value.stat.version++; value.stat.mzxid = zxid; value.stat.mtime = time; - value.stat.dataLength = static_cast(data.length()); + value.stat.dataLength = static_cast(data.length()); value.setData(data); }, request.version}); diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 82fce5297a1..3b803c18dbf 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -119,7 +119,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L Coordination::read(node.stat.pzxid, in); if (!path.empty()) { - node.stat.dataLength = static_cast(node.getData().length()); + node.stat.dataLength = static_cast(node.getData().length()); node.seq_num = node.stat.cversion; storage.container.insertOrReplace(path, node); diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index d2b9a96b4ad..22471a151d2 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -193,7 +193,7 @@ public: auto * next = b.GetInsertBlock(); auto * stop = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); b.SetInsertPoint(stop); - auto * phi = b.CreatePHI(b.getInt8Ty(), static_cast(values.size())); + auto * phi = b.CreatePHI(b.getInt8Ty(), static_cast(values.size())); for (size_t i = 0; i < types.size(); ++i) { b.SetInsertPoint(next); diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 3325c7b8eb1..a1d17ce9da1 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -189,7 +189,7 @@ struct ReplaceRegexpImpl /// Cannot perform search for whole columns. Will process each string separately. for (size_t i = 0; i < size; ++i) { - ssize_t from = i > 0 ? offsets[i - 1] : 0; + size_t from = i > 0 ? offsets[i - 1] : 0; re2_st::StringPiece input(reinterpret_cast(data.data() + from), offsets[i] - from - 1); processString(input, res_data, res_offset, searcher, num_captures, instructions); diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index f48699a8408..a7c72c7b575 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -281,7 +281,7 @@ private: bool nextImpl() override { RawHandle raw_handle = handle.getRawHandle(); - auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), static_cast(internal_buffer.size())); + auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), static_cast(internal_buffer.size())); if (bytes_read < 0) checkResult(bytes_read); diff --git a/src/IO/examples/zlib_ng_bug.cpp b/src/IO/examples/zlib_ng_bug.cpp index 60b47e2bcc5..f7c3d1eeefe 100644 --- a/src/IO/examples/zlib_ng_bug.cpp +++ b/src/IO/examples/zlib_ng_bug.cpp @@ -23,9 +23,9 @@ int main(int, char **) throw std::runtime_error("Cannot deflateInit2"); zstr.next_in = in.data(); - zstr.avail_in = static_cast(in.size()); + zstr.avail_in = static_cast(in.size()); zstr.next_out = out.data(); - zstr.avail_out = static_cast(out.size()); + zstr.avail_out = static_cast(out.size()); while (zstr.avail_in > 0) if (Z_OK != deflate(&zstr, Z_NO_FLUSH)) diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 197ac3ca00f..294da1da571 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -31,7 +31,7 @@ struct RowRef RowRef() = default; RowRef(const Block * block_, size_t row_num_) : block(block_) - , row_num(static_cast(row_num_)) + , row_num(static_cast(row_num_)) {} }; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 5bbc71364c6..47d7382f7ca 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -809,7 +809,7 @@ Pipe StorageHive::read( } if (num_streams > sources_info->hive_files.size()) - num_streams = static_cast(sources_info->hive_files.size()); + num_streams = sources_info->hive_files.size(); Pipes pipes; for (size_t i = 0; i < num_streams; ++i) From bbcdedb945064b4e9560377f8d3b368af02689ea Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 21 Oct 2022 22:21:17 +0200 Subject: [PATCH 240/252] Revert changes of hash functions signatures and fix callers Signed-off-by: Azat Khuzhin --- src/Columns/ColumnArray.cpp | 4 ++-- src/Columns/ColumnConst.cpp | 2 +- src/Columns/ColumnDecimal.cpp | 2 +- src/Columns/ColumnLowCardinality.cpp | 2 +- src/Columns/ColumnVector.cpp | 2 +- src/Common/HashTable/Hash.h | 30 ++++++++++++++-------------- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 0346851ee34..d34ae640962 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -277,13 +277,13 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const { /// This row improves hash a little bit according to integration tests. /// It is the same as to use previous hash value as the first element of array. - hash_data[i] = intHashCRC32(hash_data[i]); + hash_data[i] = static_cast(intHashCRC32(hash_data[i])); for (size_t row = prev_offset; row < offsets_data[i]; ++row) /// It is probably not the best way to combine hashes. /// But much better then xor which lead to similar hash for arrays like [1], [1, 1, 1], [1, 1, 1, 1, 1], ... /// Much better implementation - to add offsets as an optional argument to updateWeakHash32. - hash_data[i] = intHashCRC32(internal_hash_data[row], hash_data[i]); + hash_data[i] = static_cast(intHashCRC32(internal_hash_data[row], hash_data[i])); prev_offset = offsets_data[i]; } diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index 1864c0194f7..ca691d16f36 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -148,7 +148,7 @@ void ColumnConst::updateWeakHash32(WeakHash32 & hash) const size_t data_hash = element_hash.getData()[0]; for (auto & value : hash.getData()) - value = intHashCRC32(data_hash, value); + value = static_cast(intHashCRC32(data_hash, value)); } void ColumnConst::compareColumn( diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 63b76dbb230..33efe440220 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -109,7 +109,7 @@ void ColumnDecimal::updateWeakHash32(WeakHash32 & hash) const while (begin < end) { - *hash_data = intHashCRC32(*begin, *hash_data); + *hash_data = static_cast(intHashCRC32(*begin, *hash_data)); ++begin; ++hash_data; } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 47c64242896..0981a5b01fa 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -789,7 +789,7 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 & auto size = data.size(); for (size_t i = 0; i < size; ++i) - hash_data[i] = intHashCRC32(dict_hash_data[data[i]], hash_data[i]); + hash_data[i] = static_cast(intHashCRC32(dict_hash_data[data[i]], hash_data[i])); }; callForType(std::move(update_weak_hash), size_of_type); diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index cb570c87498..19a6e6bfa87 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -84,7 +84,7 @@ void ColumnVector::updateWeakHash32(WeakHash32 & hash) const while (begin < end) { - *hash_data = hashCRC32(*begin, *hash_data); + *hash_data = static_cast(hashCRC32(*begin, *hash_data)); ++begin; ++hash_data; } diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 1e98b9e4102..f36ab576766 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -51,33 +51,33 @@ inline DB::UInt64 intHash64(DB::UInt64 x) /// NOTE: Intel intrinsic can be confusing. /// - https://code.google.com/archive/p/sse-intrinsics/wikis/PmovIntrinsicBug.wiki /// - https://stackoverflow.com/questions/15752770/mm-crc32-u64-poorly-defined -inline DB::UInt32 intHashCRC32(DB::UInt64 x) +inline DB::UInt64 intHashCRC32(DB::UInt64 x) { #ifdef __SSE4_2__ - return static_cast(_mm_crc32_u64(-1ULL, x)); + return _mm_crc32_u64(-1ULL, x); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) - return static_cast(__crc32cd(-1U, x)); + return __crc32cd(-1U, x); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. /// NOTE: consider using intHash32() - return static_cast(intHash64(x)); + return intHash64(x); #endif } -inline DB::UInt32 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) +inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) { #ifdef __SSE4_2__ - return static_cast(_mm_crc32_u64(updated_value, x)); + return _mm_crc32_u64(updated_value, x); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(static_cast(updated_value), x); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. - return static_cast(intHash64(x) ^ updated_value); + return intHash64(x) ^ updated_value; #endif } template requires (sizeof(T) > sizeof(DB::UInt64)) -inline DB::UInt32 intHashCRC32(const T & x, DB::UInt64 updated_value) +inline DB::UInt64 intHashCRC32(const T & x, DB::UInt64 updated_value) { const auto * begin = reinterpret_cast(&x); for (size_t i = 0; i < sizeof(T); i += sizeof(UInt64)) @@ -86,7 +86,7 @@ inline DB::UInt32 intHashCRC32(const T & x, DB::UInt64 updated_value) begin += sizeof(DB::UInt64); } - return static_cast(updated_value); + return updated_value; } @@ -126,14 +126,14 @@ inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 up } reinterpret_cast(&value)[7] = size; - return intHashCRC32(value, updated_value); + return static_cast(intHashCRC32(value, updated_value)); } const auto * end = pos + size; while (pos + 8 <= end) { auto word = unalignedLoad(pos); - updated_value = intHashCRC32(word, updated_value); + updated_value = static_cast(intHashCRC32(word, updated_value)); pos += 8; } @@ -151,7 +151,7 @@ inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 up /// Use least byte to store tail length. word |= tail_size; /// Now word is '\3\0\0\0\0XYZ' - updated_value = intHashCRC32(word, updated_value); + updated_value = static_cast(intHashCRC32(word, updated_value)); } return updated_value; @@ -222,7 +222,7 @@ template struct HashCRC32; template requires (sizeof(T) <= sizeof(UInt64)) -inline UInt32 hashCRC32(T key, DB::UInt64 updated_value = -1) +inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1) { union { @@ -236,7 +236,7 @@ inline UInt32 hashCRC32(T key, DB::UInt64 updated_value = -1) template requires (sizeof(T) > sizeof(UInt64)) -inline UInt32 hashCRC32(T key, DB::UInt64 updated_value = -1) +inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1) { return intHashCRC32(key, updated_value); } @@ -244,7 +244,7 @@ inline UInt32 hashCRC32(T key, DB::UInt64 updated_value = -1) #define DEFINE_HASH(T) \ template <> struct HashCRC32\ {\ - UInt32 operator() (T key) const\ + size_t operator() (T key) const\ {\ return hashCRC32(key);\ }\ From 80867cb1fe616607ee985ab429113045f0a2e1f3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 21 Oct 2022 22:19:06 +0000 Subject: [PATCH 241/252] Add more trash. --- src/Interpreters/ProcessList.cpp | 5 +++-- src/Interpreters/ProcessList.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index b8a5ef3b537..3c1ebe21c48 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -344,6 +344,7 @@ QueryStatus::QueryStatus( , client_info(client_info_) , thread_group(std::move(thread_group_)) , priority_handle(std::move(priority_handle_)) + , global_overcommit_tracker(context_->getGlobalOvercommitTracker()) , query_kind(query_kind_) , num_queries_increment(CurrentMetrics::Query) { @@ -360,8 +361,8 @@ QueryStatus::~QueryStatus() { if (user_process_list) user_process_list->user_overcommit_tracker.onQueryStop(memory_tracker); - if (auto shared_context = getContext()) - shared_context->getGlobalOvercommitTracker()->onQueryStop(memory_tracker); + if (global_overcommit_tracker) + global_overcommit_tracker->onQueryStop(memory_tracker); } } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 077ded9d24c..5fbdce358f9 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -133,6 +133,8 @@ protected: ProcessListForUser * user_process_list = nullptr; + OvercommitTracker * global_overcommit_tracker = nullptr; + IAST::QueryKind query_kind; /// This field is unused in this class, but it From ef4b2b3f595de730377fda09398bdabd77402ceb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Oct 2022 07:25:37 +0200 Subject: [PATCH 242/252] Remove trash --- src/Functions/FunctionBinaryArithmetic.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index baa3c65537d..d7f85832d4e 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -898,11 +898,7 @@ class FunctionBinaryArithmetic : public IFunction if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) std::swap(new_arguments[0], new_arguments[1]); - /// Change interval argument type to its representation - new_arguments[1].type = std::make_shared>(); - auto function = function_builder->build(new_arguments); - return function->execute(new_arguments, result_type, input_rows_count); } From cb611e698ac5252e74d637b55e40bbe251c0168e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Oct 2022 07:38:03 +0200 Subject: [PATCH 243/252] Add tests --- .../0_stateless/02469_interval_msan.reference | 8 ++++++++ .../0_stateless/02469_interval_msan.sql | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/queries/0_stateless/02469_interval_msan.reference create mode 100644 tests/queries/0_stateless/02469_interval_msan.sql diff --git a/tests/queries/0_stateless/02469_interval_msan.reference b/tests/queries/0_stateless/02469_interval_msan.reference new file mode 100644 index 00000000000..c18b4e9b082 --- /dev/null +++ b/tests/queries/0_stateless/02469_interval_msan.reference @@ -0,0 +1,8 @@ +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02469_interval_msan.sql b/tests/queries/0_stateless/02469_interval_msan.sql new file mode 100644 index 00000000000..4b4a9f746ea --- /dev/null +++ b/tests/queries/0_stateless/02469_interval_msan.sql @@ -0,0 +1,19 @@ +SELECT now() + 1::Int128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() + 1::Int256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() + 1::UInt128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() + 1::UInt256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT now() - 1::Int128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() - 1::Int256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() - 1::UInt128; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT now() - 1::UInt256; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT now() + INTERVAL 1::Int128 SECOND - now(); +SELECT now() + INTERVAL 1::Int256 SECOND - now(); +SELECT now() + INTERVAL 1::UInt128 SECOND - now(); +SELECT now() + INTERVAL 1::UInt256 SECOND - now(); + +SELECT today() + INTERVAL 1::Int128 DAY - today(); +SELECT today() + INTERVAL 1::Int256 DAY - today(); +SELECT today() + INTERVAL 1::UInt128 DAY - today(); +SELECT today() + INTERVAL 1::UInt256 DAY - today(); From 7fc3cd7fb02e64170443e85fb5dc554a7d74a762 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Oct 2022 07:38:22 +0200 Subject: [PATCH 244/252] Fix error --- src/Functions/FunctionBinaryArithmetic.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index d7f85832d4e..050234c1265 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -898,6 +898,10 @@ class FunctionBinaryArithmetic : public IFunction if (isDateOrDate32(arguments[1].type) || isDateTime(arguments[1].type) || isDateTime64(arguments[1].type)) std::swap(new_arguments[0], new_arguments[1]); + /// Change interval argument type to its representation + if (WhichDataType(new_arguments[1].type).isInterval()) + new_arguments[1].type = std::make_shared>(); + auto function = function_builder->build(new_arguments); return function->execute(new_arguments, result_type, input_rows_count); } From 0bb91ed3b40ddd9300b3553637682f969ea76a64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Oct 2022 07:41:08 +0200 Subject: [PATCH 245/252] Add a test --- .../02470_suspicious_low_cardinality_msan.reference | 0 .../0_stateless/02470_suspicious_low_cardinality_msan.sql | 6 ++++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.reference create mode 100644 tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.sql diff --git a/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.reference b/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.sql b/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.sql new file mode 100644 index 00000000000..6969be1ca64 --- /dev/null +++ b/tests/queries/0_stateless/02470_suspicious_low_cardinality_msan.sql @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS alias_2__fuzz_25; +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE alias_2__fuzz_25 (`dt` LowCardinality(Date), `col` DateTime, `col2` Nullable(Int256), `colAlias0` Nullable(DateTime64(3)) ALIAS col, `colAlias3` Nullable(Int32) ALIAS col3 + colAlias0, `colAlias1` LowCardinality(UInt16) ALIAS colAlias0 + col2, `colAlias2` LowCardinality(Int32) ALIAS colAlias0 + colAlias1, `col3` Nullable(UInt8)) ENGINE = MergeTree ORDER BY dt; +insert into alias_2__fuzz_25 (dt, col, col2, col3) values ('2020-02-01', 1, 2, 3); +SELECT colAlias0, colAlias2, colAlias3 FROM alias_2__fuzz_25; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +DROP TABLE alias_2__fuzz_25; From 7bf1f4cdfabc3c2487d04e6b1ef176c3425d2224 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Oct 2022 09:02:20 +0200 Subject: [PATCH 246/252] Fix strange code in date monotonicity --- src/Functions/FunctionsConversion.h | 17 +++++++++++++---- .../02471_wrong_date_monotonicity.reference | 1 + .../02471_wrong_date_monotonicity.sql | 5 +++++ 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02471_wrong_date_monotonicity.reference create mode 100644 tests/queries/0_stateless/02471_wrong_date_monotonicity.sql diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index f3c9f46097f..bbda45d7c8a 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2243,15 +2243,24 @@ struct ToDateMonotonicity { auto which = WhichDataType(type); if (which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16()) + { return { .is_monotonic = true, .is_always_monotonic = true }; + } else if ( - (which.isUInt() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || (which.isInt() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || (which.isFloat() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || !type.isValueRepresentedByNumber()) + ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || (((left.getType() == Field::Types::Float64 || left.isNull()) && (right.getType() == Field::Types::Float64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + || !isNativeNumber(type)) + { return {}; + } else + { return { .is_monotonic = true, .is_always_monotonic = true }; + } } }; diff --git a/tests/queries/0_stateless/02471_wrong_date_monotonicity.reference b/tests/queries/0_stateless/02471_wrong_date_monotonicity.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02471_wrong_date_monotonicity.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02471_wrong_date_monotonicity.sql b/tests/queries/0_stateless/02471_wrong_date_monotonicity.sql new file mode 100644 index 00000000000..40d64e53309 --- /dev/null +++ b/tests/queries/0_stateless/02471_wrong_date_monotonicity.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS tdm__fuzz_23; +CREATE TABLE tdm__fuzz_23 (`x` UInt256) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0; +INSERT INTO tdm__fuzz_23 FORMAT Values (1); +SELECT count(x) FROM tdm__fuzz_23 WHERE toDate(x) < toDate(now(), 'Asia/Istanbul') SETTINGS max_rows_to_read = 1; +DROP TABLE tdm__fuzz_23; From e3e30a4457ab94880ae976b2484a6608e0029051 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Oct 2022 13:17:27 +0200 Subject: [PATCH 247/252] Fix one clang-tidy issue Signed-off-by: Azat Khuzhin --- src/DataTypes/getLeastSupertype.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 87c6055c35a..82c8cadc6a1 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -538,12 +538,12 @@ DataTypePtr getLeastSupertype(const DataTypes & types) TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64}; TypeIndex max_int = TypeIndex::Nothing; - for (size_t i = 0; i < int_ids.size(); ++i) + for (auto int_id : int_ids) { - size_t num = type_ids.count(int_ids[i]); + size_t num = type_ids.count(int_id); num_supported += num; if (num) - max_int = int_ids[i]; + max_int = int_id; } if (num_supported != type_ids.size()) From 9a9bbac19b4f97bcc95aa43c7715a3fa62be719f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Oct 2022 21:11:37 +0200 Subject: [PATCH 248/252] tests: avoid model overlap for obfuscator In case of stress tests it is possible, and there is LOGICAL_ERROR in case of error, which will create core dump. Actually on CI [1] there error was likely this: stress_test_run_17.txt:/usr/share/clickhouse-test/queries/1_stateful/00175_obfuscator_schema_inference.sh: line 18: /tmp/clickhouse-test/1_stateful/model.bin: No such file or directory So the file had been removed by another concurrent test. [1]: https://s3.amazonaws.com/clickhouse-test-reports/42190/56bc85746fa0b553e43c2253250404cfcca46855/stress_test__ubsan_.html Note, that actually it is enough just to change the name in this two tests, however let's make them even more error-resistant. Signed-off-by: Azat Khuzhin --- tests/queries/1_stateful/00096_obfuscator_save_load.sh | 10 ++++++---- .../1_stateful/00175_obfuscator_schema_inference.sh | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/queries/1_stateful/00096_obfuscator_save_load.sh b/tests/queries/1_stateful/00096_obfuscator_save_load.sh index c90eee1d0f9..a88dfcdb9b9 100755 --- a/tests/queries/1_stateful/00096_obfuscator_save_load.sh +++ b/tests/queries/1_stateful/00096_obfuscator_save_load.sh @@ -4,12 +4,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +model=$(mktemp "$CLICKHOUSE_TMP/obfuscator-model-XXXXXX.bin") + $CLICKHOUSE_CLIENT --max_threads 1 --query="SELECT URL, Title, SearchPhrase FROM test.hits LIMIT 1000" > "${CLICKHOUSE_TMP}"/data.tsv -$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 0 --save "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null -wc -c < "${CLICKHOUSE_TMP}"/model.bin -$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 2500 --load "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null -rm "${CLICKHOUSE_TMP}"/model.bin +$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 0 --save "$model" < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null +wc -c < "$model" +$CLICKHOUSE_OBFUSCATOR --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --seed hello --limit 2500 --load "$model" < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null +rm "$model" $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data.tsv $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data2500.tsv diff --git a/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh b/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh index 8ff0d2fa648..771c7ab5436 100755 --- a/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh +++ b/tests/queries/1_stateful/00175_obfuscator_schema_inference.sh @@ -4,6 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +model=$(mktemp "$CLICKHOUSE_TMP/obfuscator-model-XXXXXX.bin") + # Compared to explicitly specifying the structure of the input, # schema inference adds Nullable(T) to all types, so the model and the results # are a bit different from test '00175_obfuscator_schema_inference.sh' @@ -14,10 +16,10 @@ $CLICKHOUSE_CLIENT --max_threads 1 --query="SELECT URL, Title, SearchPhrase FROM $CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500.tsv 2>/dev/null # Test obfuscator with saving the model -$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 0 --save "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null -wc -c < "${CLICKHOUSE_TMP}"/model.bin -$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 --load "${CLICKHOUSE_TMP}"/model.bin < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500_load_from_model.tsv 2>/dev/null -rm "${CLICKHOUSE_TMP}"/model.bin +$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 0 --save "$model" < "${CLICKHOUSE_TMP}"/data.tsv 2>/dev/null +wc -c < "$model" +$CLICKHOUSE_OBFUSCATOR --input-format TSV --output-format TSV --seed hello --limit 2500 --load "$model" < "${CLICKHOUSE_TMP}"/data.tsv > "${CLICKHOUSE_TMP}"/data2500_load_from_model.tsv 2>/dev/null +rm "$model" $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data.tsv $CLICKHOUSE_LOCAL --structure "URL String, Title String, SearchPhrase String" --input-format TSV --output-format TSV --query "SELECT count(), uniq(URL), uniq(Title), uniq(SearchPhrase) FROM table" < "${CLICKHOUSE_TMP}"/data2500.tsv From 3e5a90478ef39343e10f9a16851b200e24d29438 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 22 Oct 2022 23:32:27 +0200 Subject: [PATCH 249/252] Fix clang-tidy issue in gtest_DecimalFunctions Signed-off-by: Azat Khuzhin --- src/Core/tests/gtest_DecimalFunctions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/tests/gtest_DecimalFunctions.cpp b/src/Core/tests/gtest_DecimalFunctions.cpp index a07240cf4a5..ca99ad65863 100644 --- a/src/Core/tests/gtest_DecimalFunctions.cpp +++ b/src/Core/tests/gtest_DecimalFunctions.cpp @@ -146,7 +146,7 @@ TEST_P(DecimalUtilsSplitAndCombineForDateTime64Test, getFractionalPartDateTime64 } -namespace std +namespace std // NOLINT(cert-dcl58-cpp) { std::ostream & operator << (std::ostream & ostr, const DecimalUtilsSplitAndCombineTestParam & param) From f18ac40be505b2bf8b6798791c738a3bdd5372f0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 23 Oct 2022 07:44:24 +0200 Subject: [PATCH 250/252] Fix clang-tidy cert-dcl58-cpp in gtest_DecimalFunctions Signed-off-by: Azat Khuzhin --- src/Core/tests/gtest_DecimalFunctions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/tests/gtest_DecimalFunctions.cpp b/src/Core/tests/gtest_DecimalFunctions.cpp index ca99ad65863..08f25ddd97c 100644 --- a/src/Core/tests/gtest_DecimalFunctions.cpp +++ b/src/Core/tests/gtest_DecimalFunctions.cpp @@ -149,7 +149,7 @@ TEST_P(DecimalUtilsSplitAndCombineForDateTime64Test, getFractionalPartDateTime64 namespace std // NOLINT(cert-dcl58-cpp) { -std::ostream & operator << (std::ostream & ostr, const DecimalUtilsSplitAndCombineTestParam & param) +std::ostream & operator << (std::ostream & ostr, const DecimalUtilsSplitAndCombineTestParam & param) // NOLINT(cert-dcl58-cpp) { return ostr << param.description; } From dd98608b78fcd2bebee2dd949dcdca064759b0f1 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Sun, 23 Oct 2022 19:49:00 +0200 Subject: [PATCH 251/252] Fix segfault in expression parser --- src/Parsers/ExpressionListParsers.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index f7a016a59e4..1249baf1859 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1582,6 +1582,9 @@ public: { if (parsed_interval_kind) { + if (elements.size() < 2) + return false; + elements[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements[0]); node = makeASTFunction(function_name, elements[1], elements[0]); } From 8b6f84cbc6d076e3535c43d22a7498aa3d889c18 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 24 Oct 2022 01:25:19 +0000 Subject: [PATCH 252/252] Add test --- .../0_stateless/02472_segfault_expression_parser.reference | 0 tests/queries/0_stateless/02472_segfault_expression_parser.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/02472_segfault_expression_parser.reference create mode 100644 tests/queries/0_stateless/02472_segfault_expression_parser.sql diff --git a/tests/queries/0_stateless/02472_segfault_expression_parser.reference b/tests/queries/0_stateless/02472_segfault_expression_parser.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02472_segfault_expression_parser.sql b/tests/queries/0_stateless/02472_segfault_expression_parser.sql new file mode 100644 index 00000000000..285de80a64a --- /dev/null +++ b/tests/queries/0_stateless/02472_segfault_expression_parser.sql @@ -0,0 +1 @@ +SELECT TIMESTAMP_SUB (SELECT ILIKE INTO OUTFILE , accurateCast ) FROM TIMESTAMP_SUB ( MINUTE , ) GROUP BY accurateCast; -- { clientError 62 }