From 77c22dce3c63436a2b241cf5e144b4f6cafebcd8 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Mon, 18 Oct 2021 18:53:42 +0800 Subject: [PATCH 001/132] Zookeeper load balancing settings --- src/Common/ZooKeeper/ZooKeeper.cpp | 95 +++++++++++++++++++++++++---- src/Common/ZooKeeper/ZooKeeper.h | 37 ++++++++++- src/Core/SettingsEnums.cpp | 8 +++ src/Core/SettingsEnums.h | 19 ++++++ tests/config/config.d/zookeeper.xml | 2 + 5 files changed, 145 insertions(+), 16 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 3d505c088db..65295a96d3e 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -5,15 +5,16 @@ #include #include -#include #include #include -#include +#include #include #include +#include #include +#include #define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000 @@ -47,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path) void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_) + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, ZooKeeperLoadBalancing zookeeper_load_balancing_) { log = &Poco::Logger::get("ZooKeeper"); hosts = hosts_; @@ -56,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ operation_timeout_ms = operation_timeout_ms_; chroot = chroot_; implementation = implementation_; + zookeeper_load_balancing = zookeeper_load_balancing_; if (implementation == "zookeeper") { @@ -65,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ Coordination::ZooKeeper::Nodes nodes; nodes.reserve(hosts.size()); - Strings shuffled_hosts = hosts; /// Shuffle the hosts to distribute the load among ZooKeeper nodes. - pcg64 generator(randomSeed()); - std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator); + std::vector shuffled_hosts = shuffleHosts(); bool dns_error = false; - for (auto & host_string : shuffled_hosts) + for (auto & host : shuffled_hosts) { + auto & host_string = host.host; try { bool secure = bool(startsWith(host_string, "secure://")); @@ -153,23 +154,85 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ } } +std::vector ZooKeeper::shuffleHosts() const +{ + std::vector hostname_differences; + hostname_differences.resize(hosts.size()); + String local_hostname = getFQDNOrHostName(); + for (size_t i = 0; i < hosts.size(); ++i) + { + String ip_or_hostname = hosts[i].substr(0, hosts[i].find_last_of(":")); + hostname_differences[i] = DB::getHostNameDifference(local_hostname, Poco::Net::DNS::resolve(ip_or_hostname).name()); + } + + size_t offset = 0; + std::function get_priority; + switch (ZooKeeperLoadBalancing(zookeeper_load_balancing)) + { + case ZooKeeperLoadBalancing::NEAREST_HOSTNAME: + get_priority = [&](size_t i) { return hostname_differences[i]; }; + break; + case ZooKeeperLoadBalancing::IN_ORDER: + get_priority = [](size_t i) { return i; }; + break; + case ZooKeeperLoadBalancing::RANDOM: + break; + case ZooKeeperLoadBalancing::FIRST_OR_RANDOM: + get_priority = [offset](size_t i) -> size_t { return i != offset; }; + break; + case ZooKeeperLoadBalancing::ROUND_ROBIN: + static size_t last_used = 0; + if (last_used >= hosts.size()) + last_used = 0; + ++last_used; + /* Consider hosts.size() equals to 5 + * last_used = 1 -> get_priority: 0 1 2 3 4 + * last_used = 2 -> get_priority: 4 0 1 2 3 + * last_used = 3 -> get_priority: 4 3 0 1 2 + * ... + * */ + get_priority = [&](size_t i) { ++i; return i < last_used ? hosts.size() - i : i - last_used; }; + break; + } + + std::vector shuffle_hosts; + for (size_t i = 0; i < hosts.size(); ++i) + { + ShuffleHost shuffle_host; + shuffle_host.host = hosts[i]; + if (get_priority) + shuffle_host.priority = get_priority(i); + shuffle_host.randomize(); + shuffle_hosts.emplace_back(shuffle_host); + } + + std::sort( + shuffle_hosts.begin(), shuffle_hosts.end(), + [](const ShuffleHost & lhs, const ShuffleHost & rhs) + { + return ShuffleHost::compare(lhs, rhs); + }); + + return shuffle_hosts; +} + ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_) + std::shared_ptr zk_log_, ZooKeeperLoadBalancing zookeeper_load_balancing_) { zk_log = std::move(zk_log_); Strings hosts_strings; splitInto<','>(hosts_strings, hosts_string); - init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); + init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, zookeeper_load_balancing_); } ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_) + std::shared_ptr zk_log_, ZooKeeperLoadBalancing zookeeper_load_balancing_) { zk_log = std::move(zk_log_); - init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); + init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, zookeeper_load_balancing_); } struct ZooKeeperArgs @@ -182,6 +245,7 @@ struct ZooKeeperArgs session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; implementation = "zookeeper"; + zookeeper_load_balancing = ZooKeeperLoadBalancing::RANDOM; for (const auto & key : keys) { if (startsWith(key, "node")) @@ -212,6 +276,10 @@ struct ZooKeeperArgs { implementation = config.getString(config_name + "." + key); } + else if (key == "zookeeper_load_balancing") + { + zookeeper_load_balancing = DB::SettingFieldZooKeeperLoadBalancingTraits::fromString(config.getString(config_name + "." + key)); + } else throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); } @@ -231,13 +299,14 @@ struct ZooKeeperArgs int operation_timeout_ms; std::string chroot; std::string implementation; + ZooKeeperLoadBalancing zookeeper_load_balancing; }; ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { ZooKeeperArgs args(config, config_name); - init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot); + init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.zookeeper_load_balancing); } bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const @@ -752,7 +821,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & ZooKeeperPtr ZooKeeper::startNewSession() const { - return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log); + return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, zookeeper_load_balancing); } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 8e015b1f331..4ad19eb3a4c 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -13,7 +13,10 @@ #include #include #include +#include +#include #include +#include namespace ProfileEvents @@ -37,6 +40,29 @@ namespace zkutil /// Preferred size of multi() command (in number of ops) constexpr size_t MULTI_BATCH_SIZE = 100; +struct ShuffleHost +{ + String host; + /// Priority from the GetPriorityFunc. + Int64 priority = 0; + UInt32 random = 0; + + void randomize() + { + random = rng(); + } + + static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs) + { + return std::forward_as_tuple(lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.priority, rhs.random); + } + +private: + std::minstd_rand rng = std::minstd_rand(randomSeed()); +}; + +using ZooKeeperLoadBalancing = DB::ZooKeeperLoadBalancing; /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API. /// @@ -58,14 +84,16 @@ public: int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr); + std::shared_ptr zk_log_ = nullptr, + ZooKeeperLoadBalancing zookeeper_load_balancing_ = ZooKeeperLoadBalancing::RANDOM); ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr); + std::shared_ptr zk_log_ = nullptr, + ZooKeeperLoadBalancing zookeeper_load_balancing_ = ZooKeeperLoadBalancing::RANDOM); /** Config of the form: @@ -91,6 +119,8 @@ public: */ ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_); + std::vector shuffleHosts() const; + /// Creates a new session with the same parameters. This method can be used for reconnecting /// after the session has expired. /// This object remains unchanged, and the new session is returned. @@ -284,7 +314,7 @@ private: friend class EphemeralNodeHolder; void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, ZooKeeperLoadBalancing zookeeper_load_balancing_); /// The following methods don't any throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); @@ -310,6 +340,7 @@ private: Poco::Logger * log = nullptr; std::shared_ptr zk_log; + ZooKeeperLoadBalancing zookeeper_load_balancing; AtomicStopwatch session_uptime; }; diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 8e588b62326..2aa296533fe 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -116,4 +116,12 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS {{"enable", ShortCircuitFunctionEvaluation::ENABLE}, {"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE}, {"disable", ShortCircuitFunctionEvaluation::DISABLE}}) + +IMPLEMENT_SETTING_ENUM(ZooKeeperLoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING, + {{"random", ZooKeeperLoadBalancing::RANDOM}, + {"nearest_hostname", ZooKeeperLoadBalancing::NEAREST_HOSTNAME}, + {"in_order", ZooKeeperLoadBalancing::IN_ORDER}, + {"first_or_random", ZooKeeperLoadBalancing::FIRST_OR_RANDOM}, + {"round_robin", ZooKeeperLoadBalancing::ROUND_ROBIN}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 33c5a6d8645..a308fa1745b 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -168,4 +168,23 @@ enum class ShortCircuitFunctionEvaluation DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) +enum class ZooKeeperLoadBalancing +{ + /// among replicas with a minimum number of errors selected randomly + RANDOM = 0, + /// a replica is selected among the replicas with the minimum number of errors + /// with the minimum number of distinguished characters in the replica name and local hostname + NEAREST_HOSTNAME, + // replicas with the same number of errors are accessed in the same order + // as they are specified in the configuration. + IN_ORDER, + /// if first replica one has higher number of errors, + /// pick a random one from replicas with minimum number of errors + FIRST_OR_RANDOM, + // round robin across replicas with the same number of errors. + ROUND_ROBIN, +}; + +DECLARE_SETTING_ENUM(ZooKeeperLoadBalancing) + } diff --git a/tests/config/config.d/zookeeper.xml b/tests/config/config.d/zookeeper.xml index 4fa529a6180..63057224ef9 100644 --- a/tests/config/config.d/zookeeper.xml +++ b/tests/config/config.d/zookeeper.xml @@ -1,5 +1,7 @@ + + random localhost 9181 From fed7bb594179257373aaf1b2109bcd1da6dd3bb8 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Mon, 18 Oct 2021 19:10:53 +0800 Subject: [PATCH 002/132] Update comments. --- src/Core/SettingsEnums.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index a308fa1745b..b7506e52176 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -170,18 +170,17 @@ DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) enum class ZooKeeperLoadBalancing { - /// among replicas with a minimum number of errors selected randomly + /// Randomly select one from the zookeeper nodes. RANDOM = 0, - /// a replica is selected among the replicas with the minimum number of errors - /// with the minimum number of distinguished characters in the replica name and local hostname + /// Choose one from the zookeeper node that has the least + /// number of characters different from the hostname of the local host NEAREST_HOSTNAME, - // replicas with the same number of errors are accessed in the same order - // as they are specified in the configuration. + /// Select one from the zookeeper node configuration in order. IN_ORDER, - /// if first replica one has higher number of errors, - /// pick a random one from replicas with minimum number of errors + /// If the first node cannot be connected, + /// one will be randomly selected from other nodes. FIRST_OR_RANDOM, - // round robin across replicas with the same number of errors. + /// Round robin from the node configured by zookeeper. ROUND_ROBIN, }; From 62a15c1c1a7077539d9faafd4615b9f3a755af75 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Tue, 19 Oct 2021 12:43:54 +0800 Subject: [PATCH 003/132] Fix some build error and try fix undefined symbol: DB::SettingFieldZooKeeperLoadBalancingTraits::fromString build error. --- src/Common/ZooKeeper/ZooKeeper.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 65295a96d3e..3bff19fc2d9 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -158,10 +158,10 @@ std::vector ZooKeeper::shuffleHosts() const { std::vector hostname_differences; hostname_differences.resize(hosts.size()); - String local_hostname = getFQDNOrHostName(); + const String & local_hostname = getFQDNOrHostName(); for (size_t i = 0; i < hosts.size(); ++i) { - String ip_or_hostname = hosts[i].substr(0, hosts[i].find_last_of(":")); + const String & ip_or_hostname = hosts[i].substr(0, hosts[i].find_last_of(':')); hostname_differences[i] = DB::getHostNameDifference(local_hostname, Poco::Net::DNS::resolve(ip_or_hostname).name()); } @@ -278,7 +278,9 @@ struct ZooKeeperArgs } else if (key == "zookeeper_load_balancing") { - zookeeper_load_balancing = DB::SettingFieldZooKeeperLoadBalancingTraits::fromString(config.getString(config_name + "." + key)); + DB::SettingFieldZooKeeperLoadBalancing setting_field; + setting_field.parseFromString(config.getString(config_name + "." + key)); + zookeeper_load_balancing = setting_field.value; } else throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); @@ -317,8 +319,8 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, if (args.implementation == implementation && implementation == "testkeeper") return false; - return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot) - != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot); + return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.zookeeper_load_balancing) + != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, zookeeper_load_balancing); } From a6ae846f02c32a2c91728ade3b4ff58c86ca85c2 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Tue, 19 Oct 2021 17:39:03 +0800 Subject: [PATCH 004/132] fix biuld and PVS check --- src/Common/ZooKeeper/ZooKeeper.cpp | 40 ++++++++++--- src/Common/ZooKeeper/ZooKeeper.h | 18 +++++- src/Core/SettingsEnums.cpp | 7 --- src/Core/SettingsEnums.h | 18 ------ .../__init__.py | 0 .../configs/remote_servers.xml | 23 ++++++++ .../configs/zookeeper_config_in_order.xml | 20 +++++++ .../test.py | 57 +++++++++++++++++++ 8 files changed, 149 insertions(+), 34 deletions(-) create mode 100644 tests/integration/test_zookeeper_config_load_balancing/__init__.py create mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml create mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml create mode 100644 tests/integration/test_zookeeper_config_load_balancing/test.py diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 3bff19fc2d9..2f33888846b 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -6,7 +6,6 @@ #include #include -#include #include #include #include @@ -27,6 +26,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_LOAD_BALANCING; } } @@ -46,6 +46,35 @@ static void check(Coordination::Error code, const std::string & path) throw KeeperException(code, path); } +static ZooKeeperLoadBalancing fromString(const std::string_view & str) +{ + static const std::unordered_map map = [] { + std::unordered_map res; + constexpr std::pair pairs[] + = {{"random", ZooKeeperLoadBalancing::RANDOM}, + {"nearest_hostname", ZooKeeperLoadBalancing::NEAREST_HOSTNAME}, + {"in_order", ZooKeeperLoadBalancing::IN_ORDER}, + {"first_or_random", ZooKeeperLoadBalancing::FIRST_OR_RANDOM}, + {"round_robin", ZooKeeperLoadBalancing::ROUND_ROBIN}}; + for (const auto & [name, val] : pairs) + res.emplace(name, val); + return res; + }(); + auto it = map.find(str); + if (it != map.end()) + return it->second; + String msg = "Unexpected value of ZooKeeperLoadBalancing: '" + String{str} + "'. Must be one of ["; + bool need_comma = false; + for (auto & name : map | boost::adaptors::map_keys) + { + if (std::exchange(need_comma, true)) + msg += ", "; + msg += "'" + String{name} + "'"; + } + msg += "]"; + throw DB::Exception(msg, DB::ErrorCodes::UNKNOWN_LOAD_BALANCING); +} + void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, ZooKeeperLoadBalancing zookeeper_load_balancing_) @@ -165,7 +194,6 @@ std::vector ZooKeeper::shuffleHosts() const hostname_differences[i] = DB::getHostNameDifference(local_hostname, Poco::Net::DNS::resolve(ip_or_hostname).name()); } - size_t offset = 0; std::function get_priority; switch (ZooKeeperLoadBalancing(zookeeper_load_balancing)) { @@ -178,7 +206,7 @@ std::vector ZooKeeper::shuffleHosts() const case ZooKeeperLoadBalancing::RANDOM: break; case ZooKeeperLoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; + get_priority = [](size_t i) -> size_t { return i != 0; }; break; case ZooKeeperLoadBalancing::ROUND_ROBIN: static size_t last_used = 0; @@ -191,7 +219,7 @@ std::vector ZooKeeper::shuffleHosts() const * last_used = 3 -> get_priority: 4 3 0 1 2 * ... * */ - get_priority = [&](size_t i) { ++i; return i < last_used ? hosts.size() - i : i - last_used; }; + get_priority = [this, last_used_value = last_used](size_t i) { ++i; return i < last_used_value ? hosts.size() - i : i - last_used_value; }; break; } @@ -278,9 +306,7 @@ struct ZooKeeperArgs } else if (key == "zookeeper_load_balancing") { - DB::SettingFieldZooKeeperLoadBalancing setting_field; - setting_field.parseFromString(config.getString(config_name + "." + key)); - zookeeper_load_balancing = setting_field.value; + zookeeper_load_balancing = fromString(config.getString(config_name + "." + key)); } else throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 4ad19eb3a4c..c992ffe3a43 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -14,7 +14,6 @@ #include #include #include -#include #include #include @@ -62,7 +61,22 @@ private: std::minstd_rand rng = std::minstd_rand(randomSeed()); }; -using ZooKeeperLoadBalancing = DB::ZooKeeperLoadBalancing; +enum class ZooKeeperLoadBalancing +{ + /// Randomly select one from the zookeeper nodes. + RANDOM = 0, + /// Choose one from the zookeeper node that has the least + /// number of characters different from the hostname of the local host + NEAREST_HOSTNAME, + /// Select one from the zookeeper node configuration in order. + IN_ORDER, + /// If the first node cannot be connected, + /// one will be randomly selected from other nodes. + FIRST_OR_RANDOM, + /// Round robin from the node configured by zookeeper. + ROUND_ROBIN, +}; + /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API. /// diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 2aa296533fe..6a5d8136227 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -117,11 +117,4 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS {"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE}, {"disable", ShortCircuitFunctionEvaluation::DISABLE}}) -IMPLEMENT_SETTING_ENUM(ZooKeeperLoadBalancing, ErrorCodes::UNKNOWN_LOAD_BALANCING, - {{"random", ZooKeeperLoadBalancing::RANDOM}, - {"nearest_hostname", ZooKeeperLoadBalancing::NEAREST_HOSTNAME}, - {"in_order", ZooKeeperLoadBalancing::IN_ORDER}, - {"first_or_random", ZooKeeperLoadBalancing::FIRST_OR_RANDOM}, - {"round_robin", ZooKeeperLoadBalancing::ROUND_ROBIN}}) - } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index b7506e52176..33c5a6d8645 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -168,22 +168,4 @@ enum class ShortCircuitFunctionEvaluation DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) -enum class ZooKeeperLoadBalancing -{ - /// Randomly select one from the zookeeper nodes. - RANDOM = 0, - /// Choose one from the zookeeper node that has the least - /// number of characters different from the hostname of the local host - NEAREST_HOSTNAME, - /// Select one from the zookeeper node configuration in order. - IN_ORDER, - /// If the first node cannot be connected, - /// one will be randomly selected from other nodes. - FIRST_OR_RANDOM, - /// Round robin from the node configured by zookeeper. - ROUND_ROBIN, -}; - -DECLARE_SETTING_ENUM(ZooKeeperLoadBalancing) - } diff --git a/tests/integration/test_zookeeper_config_load_balancing/__init__.py b/tests/integration/test_zookeeper_config_load_balancing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml new file mode 100644 index 00000000000..63fdcea5dab --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml @@ -0,0 +1,23 @@ + + + + + + node1 + 9000 + + + + node2 + 9000 + + + + node3 + 9000 + + + + + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml new file mode 100644 index 00000000000..bbed71532aa --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml @@ -0,0 +1,20 @@ + + + + random + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 3000 + /root_a + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py new file mode 100644 index 00000000000..95d9db27a7d --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -0,0 +1,57 @@ +import time +import pytest +import logging +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_root_a.xml') + +node1 = cluster.add_instance('node1', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_root_a.xml"]) +node2 = cluster.add_instance('node2', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_root_a.xml"]) +node3 = cluster.add_instance('node3', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_root_b.xml"]) + +def create_zk_roots(zk): + zk.ensure_path('/root_a') + zk.ensure_path('/root_b') + logging.debug(f"Create ZK roots:{zk.get_children('/')}") + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.add_zookeeper_startup_command(create_zk_roots) + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def test_chroot_with_same_root(started_cluster): + for i, node in enumerate([node1, node2]): + node.query('DROP TABLE IF EXISTS simple SYNC') + node.query(''' + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); + '''.format(replica=node.name)) + for j in range(2): # Second insert to test deduplication + node.query("INSERT INTO simple VALUES ({0}, {0})".format(i)) + + time.sleep(1) + + assert node1.query('select count() from simple').strip() == '2' + assert node2.query('select count() from simple').strip() == '2' + +def test_chroot_with_different_root(started_cluster): + for i, node in [(1, node1), (3, node3)]: + node.query('DROP TABLE IF EXISTS simple_different SYNC') + node.query(''' + CREATE TABLE simple_different (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple_different', '{replica}', date, id, 8192); + '''.format(replica=node.name)) + for j in range(2): # Second insert to test deduplication + node.query("INSERT INTO simple_different VALUES ({0}, {0})".format(i)) + + assert node1.query('select count() from simple_different').strip() == '1' + assert node3.query('select count() from simple_different').strip() == '1' From 8480ae631acda0caa381141c4204583a6e1a9150 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Wed, 20 Oct 2021 16:35:37 +0800 Subject: [PATCH 005/132] Refactor and add test. --- src/Client/ConnectionPoolWithFailover.cpp | 69 ++-------- src/Client/ConnectionPoolWithFailover.h | 8 +- src/Common/GetPriorityForLoadBalancing.cpp | 41 ++++++ src/Common/GetPriorityForLoadBalancing.h | 32 +++++ src/Common/ZooKeeper/ZooKeeper.cpp | 114 +++++---------- src/Common/ZooKeeper/ZooKeeper.h | 27 +--- .../configs/zookeeper_config_in_order.xml | 3 +- .../configs/zookeeper_config_round_robin.xml | 19 +++ .../configs/zookeeper_log.xml | 7 + .../test.py | 130 ++++++++++++++---- .../test_round_robin.py | 100 ++++++++++++++ 11 files changed, 357 insertions(+), 193 deletions(-) create mode 100644 src/Common/GetPriorityForLoadBalancing.cpp create mode 100644 src/Common/GetPriorityForLoadBalancing.h create mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml create mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml create mode 100644 tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index aaffe85ae2e..ecfc6bd5c08 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( time_t decrease_error_period_, size_t max_error_cap_) : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover")) - , default_load_balancing(load_balancing) + , get_priority_load_balancing(load_balancing) { const std::string & local_hostname = getFQDNOrHostName(); - hostname_differences.resize(nested_pools.size()); + get_priority_load_balancing.hostname_differences.resize(nested_pools.size()); for (size_t i = 0; i < nested_pools.size(); ++i) { ConnectionPool & connection_pool = dynamic_cast(*nested_pools[i]); - hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); + get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); } } @@ -50,36 +50,12 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return tryGetEntry(pool, timeouts, fail_message, settings); }; - size_t offset = 0; if (settings) - offset = settings->load_balancing_first_offset % nested_pools.size(); - GetPriorityFunc get_priority; - switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { - case LoadBalancing::NEAREST_HOSTNAME: - get_priority = [&](size_t i) { return hostname_differences[i]; }; - break; - case LoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; - break; - case LoadBalancing::RANDOM: - break; - case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; - break; - case LoadBalancing::ROUND_ROBIN: - if (last_used >= nested_pools.size()) - last_used = 0; - ++last_used; - /* Consider nested_pools.size() equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 4 0 1 2 3 - * last_used = 3 -> get_priority: 4 3 0 1 2 - * ... - * */ - get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; - break; + get_priority_load_balancing.offset = settings->load_balancing_first_offset % nested_pools.size(); + get_priority_load_balancing.load_balancing = settings->load_balancing; } + GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(); UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0; bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true; @@ -172,39 +148,12 @@ std::vector ConnectionPoolWithFailover::g ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings) { - size_t offset = 0; if (settings) - offset = settings->load_balancing_first_offset % nested_pools.size(); - - GetPriorityFunc get_priority; - switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { - case LoadBalancing::NEAREST_HOSTNAME: - get_priority = [&](size_t i) { return hostname_differences[i]; }; - break; - case LoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; - break; - case LoadBalancing::RANDOM: - break; - case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; - break; - case LoadBalancing::ROUND_ROBIN: - if (last_used >= nested_pools.size()) - last_used = 0; - ++last_used; - /* Consider nested_pools.size() equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 5 0 1 2 3 - * last_used = 3 -> get_priority: 5 4 0 1 2 - * ... - * */ - get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; - break; + get_priority_load_balancing.offset = settings->load_balancing_first_offset % nested_pools.size(); + get_priority_load_balancing.load_balancing = settings->load_balancing; } - - return get_priority; + return get_priority_load_balancing.getPriorityFunc(); } std::vector ConnectionPoolWithFailover::getManyImpl( diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index ce70c27838b..3c838459733 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -110,9 +111,10 @@ private: GetPriorityFunc makeGetPriorityFunc(const Settings * settings); private: - std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. - size_t last_used = 0; /// Last used for round_robin policy. - LoadBalancing default_load_balancing; + GetPriorityForLoadBalancing get_priority_load_balancing; +// std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. +// size_t last_used = 0; /// Last used for round_robin policy. +// LoadBalancing default_load_balancing; }; using ConnectionPoolWithFailoverPtr = std::shared_ptr; diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp new file mode 100644 index 00000000000..ae621d9e75c --- /dev/null +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -0,0 +1,41 @@ +#include + +namespace DB +{ + +std::function GetPriorityForLoadBalancing::getPriorityFunc() const +{ + std::function get_priority; + switch (load_balancing) + { + case LoadBalancing::NEAREST_HOSTNAME: + get_priority = [&](size_t i) { return hostname_differences[i]; }; + break; + case LoadBalancing::IN_ORDER: + get_priority = [](size_t i) { return i; }; + break; + case LoadBalancing::RANDOM: + break; + case LoadBalancing::FIRST_OR_RANDOM: + get_priority = [&](size_t i) -> size_t { return i != offset; }; + break; + case LoadBalancing::ROUND_ROBIN: + if (last_used >= pool_size) + last_used = 0; + ++last_used; + /* Consider pool_size equals to 5 + * last_used = 1 -> get_priority: 0 1 2 3 4 + * last_used = 2 -> get_priority: 4 0 1 2 3 + * last_used = 3 -> get_priority: 4 3 0 1 2 + * ... + * */ + get_priority = [&](size_t i) { + ++i; + return i < last_used ? pool_size - i : i - last_used; + }; + break; + } + return get_priority; +} + +} diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h new file mode 100644 index 00000000000..b845c2e7616 --- /dev/null +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +namespace DB +{ + +class GetPriorityForLoadBalancing +{ +public: + GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} + GetPriorityForLoadBalancing(){} + + bool operator!=(const GetPriorityForLoadBalancing & other) + { + return offset != other.offset || pool_size != other.pool_size || load_balancing != other.load_balancing + || hostname_differences != other.hostname_differences; + } + + std::function getPriorityFunc() const; + + std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. + size_t offset = 0; /// for first_or_random policy. + size_t pool_size; /// for round_robin policy. + + LoadBalancing load_balancing = LoadBalancing::RANDOM; + +private: + mutable size_t last_used = 0; /// Last used for round_robin policy. +}; + +} diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 2f33888846b..b1f6269d128 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -46,38 +46,9 @@ static void check(Coordination::Error code, const std::string & path) throw KeeperException(code, path); } -static ZooKeeperLoadBalancing fromString(const std::string_view & str) -{ - static const std::unordered_map map = [] { - std::unordered_map res; - constexpr std::pair pairs[] - = {{"random", ZooKeeperLoadBalancing::RANDOM}, - {"nearest_hostname", ZooKeeperLoadBalancing::NEAREST_HOSTNAME}, - {"in_order", ZooKeeperLoadBalancing::IN_ORDER}, - {"first_or_random", ZooKeeperLoadBalancing::FIRST_OR_RANDOM}, - {"round_robin", ZooKeeperLoadBalancing::ROUND_ROBIN}}; - for (const auto & [name, val] : pairs) - res.emplace(name, val); - return res; - }(); - auto it = map.find(str); - if (it != map.end()) - return it->second; - String msg = "Unexpected value of ZooKeeperLoadBalancing: '" + String{str} + "'. Must be one of ["; - bool need_comma = false; - for (auto & name : map | boost::adaptors::map_keys) - { - if (std::exchange(need_comma, true)) - msg += ", "; - msg += "'" + String{name} + "'"; - } - msg += "]"; - throw DB::Exception(msg, DB::ErrorCodes::UNKNOWN_LOAD_BALANCING); -} - void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, ZooKeeperLoadBalancing zookeeper_load_balancing_) + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { log = &Poco::Logger::get("ZooKeeper"); hosts = hosts_; @@ -86,7 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ operation_timeout_ms = operation_timeout_ms_; chroot = chroot_; implementation = implementation_; - zookeeper_load_balancing = zookeeper_load_balancing_; + get_priority_load_balancing = get_priority_load_balancing_; if (implementation == "zookeeper") { @@ -185,44 +156,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ std::vector ZooKeeper::shuffleHosts() const { - std::vector hostname_differences; - hostname_differences.resize(hosts.size()); - const String & local_hostname = getFQDNOrHostName(); - for (size_t i = 0; i < hosts.size(); ++i) - { - const String & ip_or_hostname = hosts[i].substr(0, hosts[i].find_last_of(':')); - hostname_differences[i] = DB::getHostNameDifference(local_hostname, Poco::Net::DNS::resolve(ip_or_hostname).name()); - } - - std::function get_priority; - switch (ZooKeeperLoadBalancing(zookeeper_load_balancing)) - { - case ZooKeeperLoadBalancing::NEAREST_HOSTNAME: - get_priority = [&](size_t i) { return hostname_differences[i]; }; - break; - case ZooKeeperLoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; - break; - case ZooKeeperLoadBalancing::RANDOM: - break; - case ZooKeeperLoadBalancing::FIRST_OR_RANDOM: - get_priority = [](size_t i) -> size_t { return i != 0; }; - break; - case ZooKeeperLoadBalancing::ROUND_ROBIN: - static size_t last_used = 0; - if (last_used >= hosts.size()) - last_used = 0; - ++last_used; - /* Consider hosts.size() equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 4 0 1 2 3 - * last_used = 3 -> get_priority: 4 3 0 1 2 - * ... - * */ - get_priority = [this, last_used_value = last_used](size_t i) { ++i; return i < last_used_value ? hosts.size() - i : i - last_used_value; }; - break; - } - + std::function get_priority = get_priority_load_balancing.getPriorityFunc(); std::vector shuffle_hosts; for (size_t i = 0; i < hosts.size(); ++i) { @@ -246,21 +180,21 @@ std::vector ZooKeeper::shuffleHosts() const ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_, ZooKeeperLoadBalancing zookeeper_load_balancing_) + std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { zk_log = std::move(zk_log_); Strings hosts_strings; splitInto<','>(hosts_strings, hosts_string); - init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, zookeeper_load_balancing_); + init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); } ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_, ZooKeeperLoadBalancing zookeeper_load_balancing_) + std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { zk_log = std::move(zk_log_); - init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, zookeeper_load_balancing_); + init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); } struct ZooKeeperArgs @@ -273,7 +207,6 @@ struct ZooKeeperArgs session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; implementation = "zookeeper"; - zookeeper_load_balancing = ZooKeeperLoadBalancing::RANDOM; for (const auto & key : keys) { if (startsWith(key, "node")) @@ -306,7 +239,7 @@ struct ZooKeeperArgs } else if (key == "zookeeper_load_balancing") { - zookeeper_load_balancing = fromString(config.getString(config_name + "." + key)); + get_priority_load_balancing.load_balancing = DB::SettingFieldLoadBalancingTraits::fromString(config.getString(config_name + "." + key)); } else throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); @@ -319,6 +252,24 @@ struct ZooKeeperArgs if (chroot.back() == '/') chroot.pop_back(); } + + /// init get_priority_load_balancing + get_priority_load_balancing.hostname_differences.resize(hosts.size()); + const String & local_hostname = getFQDNOrHostName(); + for (size_t i = 0; i < hosts.size(); ++i) + { + const String & ip_or_hostname = hosts[i].substr(0, hosts[i].find_last_of(':')); + try + { + get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, Poco::Net::DNS::resolve(ip_or_hostname).name()); + } + catch (...) + { + /// There may be HostNotFoundException or DNSException, these exceptions will be processed later. + LOG_ERROR(&Poco::Logger::get("ZooKeeperArgs"), "Cannot use ZooKeeper host {}, hostname differences will be set to the maximum value", hosts[i]); + } + } + get_priority_load_balancing.pool_size = hosts.size(); } Strings hosts; @@ -327,14 +278,14 @@ struct ZooKeeperArgs int operation_timeout_ms; std::string chroot; std::string implementation; - ZooKeeperLoadBalancing zookeeper_load_balancing; + GetPriorityForLoadBalancing get_priority_load_balancing; }; ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { ZooKeeperArgs args(config, config_name); - init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.zookeeper_load_balancing); + init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing); } bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const @@ -345,8 +296,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, if (args.implementation == implementation && implementation == "testkeeper") return false; - return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.zookeeper_load_balancing) - != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, zookeeper_load_balancing); + if (args.get_priority_load_balancing != get_priority_load_balancing) + return true; + + return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot) + != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot); } @@ -849,7 +803,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & ZooKeeperPtr ZooKeeper::startNewSession() const { - return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, zookeeper_load_balancing); + return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing); } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index c992ffe3a43..94232aeac86 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -61,22 +62,7 @@ private: std::minstd_rand rng = std::minstd_rand(randomSeed()); }; -enum class ZooKeeperLoadBalancing -{ - /// Randomly select one from the zookeeper nodes. - RANDOM = 0, - /// Choose one from the zookeeper node that has the least - /// number of characters different from the hostname of the local host - NEAREST_HOSTNAME, - /// Select one from the zookeeper node configuration in order. - IN_ORDER, - /// If the first node cannot be connected, - /// one will be randomly selected from other nodes. - FIRST_OR_RANDOM, - /// Round robin from the node configured by zookeeper. - ROUND_ROBIN, -}; - +using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing; /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API. /// @@ -99,7 +85,7 @@ public: const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", std::shared_ptr zk_log_ = nullptr, - ZooKeeperLoadBalancing zookeeper_load_balancing_ = ZooKeeperLoadBalancing::RANDOM); + const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, @@ -107,7 +93,7 @@ public: const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", std::shared_ptr zk_log_ = nullptr, - ZooKeeperLoadBalancing zookeeper_load_balancing_ = ZooKeeperLoadBalancing::RANDOM); + const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); /** Config of the form: @@ -328,7 +314,7 @@ private: friend class EphemeralNodeHolder; void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, ZooKeeperLoadBalancing zookeeper_load_balancing_); + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_); /// The following methods don't any throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); @@ -354,7 +340,8 @@ private: Poco::Logger * log = nullptr; std::shared_ptr zk_log; - ZooKeeperLoadBalancing zookeeper_load_balancing; + + GetPriorityForLoadBalancing get_priority_load_balancing; AtomicStopwatch session_uptime; }; diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml index bbed71532aa..ebd266d80b0 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml @@ -1,7 +1,7 @@ - random + in_order zoo1 2181 @@ -15,6 +15,5 @@ 2181 3000 - /root_a diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml new file mode 100644 index 00000000000..3b64d629e6e --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml @@ -0,0 +1,19 @@ + + + + round_robin + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 3000 + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml new file mode 100644 index 00000000000..a70cbc3ecc2 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml @@ -0,0 +1,7 @@ + + + system + zookeeper_log
+ 7500 +
+
diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index 95d9db27a7d..951dd7f12b4 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -3,24 +3,19 @@ import pytest import logging from helpers.cluster import ClickHouseCluster -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_root_a.xml') +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_in_order.xml') node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_root_a.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_root_a.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_root_b.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) -def create_zk_roots(zk): - zk.ensure_path('/root_a') - zk.ensure_path('/root_b') - logging.debug(f"Create ZK roots:{zk.get_children('/')}") @pytest.fixture(scope="module", autouse=True) def started_cluster(): try: - cluster.add_zookeeper_startup_command(create_zk_roots) cluster.start() yield cluster @@ -28,30 +23,109 @@ def started_cluster(): finally: cluster.shutdown() -def test_chroot_with_same_root(started_cluster): - for i, node in enumerate([node1, node2]): +def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): + start = time.time() + while time.time() - start < timeout: + try: + for instance in zk_nodes: + conn = started_cluster.get_kazoo_client(instance) + conn.get_children('/') + print("All instances of ZooKeeper started") + return + except Exception as ex: + print(("Can't connect to ZooKeeper " + str(ex))) + time.sleep(0.5) + +def test_in_order(started_cluster): + zoo1_ip = started_cluster.get_instance_ip("zoo1") + for i, node in enumerate([node1, node3]): node.query('DROP TABLE IF EXISTS simple SYNC') node.query(''' CREATE TABLE simple (date Date, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); '''.format(replica=node.name)) - for j in range(2): # Second insert to test deduplication - node.query("INSERT INTO simple VALUES ({0}, {0})".format(i)) - time.sleep(1) + time.sleep(5) + assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo1_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo1_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert node1.query('select count() from simple').strip() == '2' - assert node2.query('select count() from simple').strip() == '2' -def test_chroot_with_different_root(started_cluster): - for i, node in [(1, node1), (3, node3)]: - node.query('DROP TABLE IF EXISTS simple_different SYNC') - node.query(''' - CREATE TABLE simple_different (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple_different', '{replica}', date, id, 8192); - '''.format(replica=node.name)) - for j in range(2): # Second insert to test deduplication - node.query("INSERT INTO simple_different VALUES ({0}, {0})".format(i)) - - assert node1.query('select count() from simple_different').strip() == '1' - assert node3.query('select count() from simple_different').strip() == '1' +# def test_round_robin(started_cluster): +# new_config = """ +# +# +# round_robin +# +# zoo1 +# 2181 +# +# +# zoo2 +# 2181 +# +# +# zoo3 +# 2181 +# +# 3000 +# +# +# """ +# for i, node in enumerate([node1, node3]): +# node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config) +# node.query("SYSTEM RELOAD CONFIG") +# +# started_cluster.stop_zookeeper_nodes(["zoo1"]) +# zoo2_ip = started_cluster.get_instance_ip("zoo2") +# for i, node in enumerate([node1, node3]): +# node.query('DROP TABLE IF EXISTS simple SYNC') +# node.query(''' +# CREATE TABLE simple (date Date, id UInt32) +# ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); +# '''.format(replica=node.name)) +# assert '::ffff:' + str(zoo2_ip) + '\n' == node.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') +# ## start zoo2, zoo3, table will be readonly too, because it only connect to zoo1 +# started_cluster.start_zookeeper_nodes(["zoo1"]) +# wait_zookeeper_node_to_start(started_cluster, ["zoo1"]) +# +# +# def test_nearest_hostname(started_cluster): +# new_config = """ +# +# +# nearest_hostname +# +# zoo1 +# 2181 +# +# +# zoo2 +# 2181 +# +# +# zoo3 +# 2181 +# +# 3000 +# +# +# """ +# for i, node in enumerate([node1, node3]): +# node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config) +# node.query("SYSTEM RELOAD CONFIG") +# +# zoo1_ip = started_cluster.get_instance_ip("zoo1") +# zoo2_ip = started_cluster.get_instance_ip("zoo2") +# zoo3_ip = started_cluster.get_instance_ip("zoo3") +# +# for i, node in enumerate([node1, node3]): +# node.query('DROP TABLE IF EXISTS simple SYNC') +# node.query(''' +# CREATE TABLE simple (date Date, id UInt32) +# ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); +# '''.format(replica=node.name)) +# +# assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') +# assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') +# assert '::ffff:' + str(zoo3_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py new file mode 100644 index 00000000000..bee32205499 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py @@ -0,0 +1,100 @@ +import time +import pytest +import logging +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_in_order.xml') + +node1 = cluster.add_instance('node1', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml", "configs/zookeeper_log.xml"]) +node2 = cluster.add_instance('node2', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml", "configs/zookeeper_log.xml"]) +node3 = cluster.add_instance('node3', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml", "configs/zookeeper_log.xml"]) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): + start = time.time() + while time.time() - start < timeout: + try: + for instance in zk_nodes: + conn = started_cluster.get_kazoo_client(instance) + conn.get_children('/') + print("All instances of ZooKeeper started") + return + except Exception as ex: + print(("Can't connect to ZooKeeper " + str(ex))) + time.sleep(0.5) + + + +def test_round_robin(started_cluster): + + started_cluster.stop_zookeeper_nodes(["zoo1"]) + zoo2_ip = started_cluster.get_instance_ip("zoo2") + for i, node in enumerate([node1, node3]): + node.query('DROP TABLE IF EXISTS simple SYNC') + node.query(''' + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); + '''.format(replica=node.name)) + + time.sleep(5) + assert '::ffff:' + str(zoo2_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo2_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + + ## start zoo2, zoo3, table will be readonly too, because it only connect to zoo1 + started_cluster.start_zookeeper_nodes(["zoo1"]) + wait_zookeeper_node_to_start(started_cluster, ["zoo1"]) + + +# def test_nearest_hostname(started_cluster): +# new_config = """ +# +# +# nearest_hostname +# +# zoo1 +# 2181 +# +# +# zoo2 +# 2181 +# +# +# zoo3 +# 2181 +# +# 3000 +# +# +# """ +# for i, node in enumerate([node1, node3]): +# node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config) +# node.query("SYSTEM RELOAD CONFIG") +# +# zoo1_ip = started_cluster.get_instance_ip("zoo1") +# zoo2_ip = started_cluster.get_instance_ip("zoo2") +# zoo3_ip = started_cluster.get_instance_ip("zoo3") +# +# for i, node in enumerate([node1, node3]): +# node.query('DROP TABLE IF EXISTS simple SYNC') +# node.query(''' +# CREATE TABLE simple (date Date, id UInt32) +# ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); +# '''.format(replica=node.name)) +# +# assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') +# assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') +# assert '::ffff:' + str(zoo3_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') From e6cf9605a5b88345b8387647dc893bfdbb8d5a3a Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Thu, 21 Oct 2021 15:46:34 +0800 Subject: [PATCH 006/132] Refactor and add test. --- src/Client/ConnectionPoolWithFailover.h | 3 - src/Common/ZooKeeper/ZooKeeper.cpp | 12 +- .../remote_servers_nearest_hostname.xml | 23 +++ .../zookeeper_config_first_or_random.xml | 19 +++ .../zookeeper_config_nearest_hostname.xml | 19 +++ .../test.py | 131 ------------------ .../test_first_or_random.py | 53 +++++++ .../test_in_order.py | 53 +++++++ .../test_nearest_hostname.py | 56 ++++++++ .../test_round_robin.py | 47 +------ 10 files changed, 229 insertions(+), 187 deletions(-) create mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml create mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml create mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/test.py create mode 100644 tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py create mode 100644 tests/integration/test_zookeeper_config_load_balancing/test_in_order.py create mode 100644 tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 3c838459733..0ec02cc48de 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -112,9 +112,6 @@ private: private: GetPriorityForLoadBalancing get_priority_load_balancing; -// std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. -// size_t last_used = 0; /// Last used for round_robin policy. -// LoadBalancing default_load_balancing; }; using ConnectionPoolWithFailoverPtr = std::shared_ptr; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index b1f6269d128..30e13c982ec 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -258,16 +258,8 @@ struct ZooKeeperArgs const String & local_hostname = getFQDNOrHostName(); for (size_t i = 0; i < hosts.size(); ++i) { - const String & ip_or_hostname = hosts[i].substr(0, hosts[i].find_last_of(':')); - try - { - get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, Poco::Net::DNS::resolve(ip_or_hostname).name()); - } - catch (...) - { - /// There may be HostNotFoundException or DNSException, these exceptions will be processed later. - LOG_ERROR(&Poco::Logger::get("ZooKeeperArgs"), "Cannot use ZooKeeper host {}, hostname differences will be set to the maximum value", hosts[i]); - } + const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':')); + get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host); } get_priority_load_balancing.pool_size = hosts.size(); } diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml new file mode 100644 index 00000000000..62f361049c9 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml @@ -0,0 +1,23 @@ + + + + + + nod1 + 9000 + + + + nod2 + 9000 + + + + nod3 + 9000 + + + + + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml new file mode 100644 index 00000000000..9688480fa90 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml @@ -0,0 +1,19 @@ + + + + first_or_random + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 3000 + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml new file mode 100644 index 00000000000..265ebe05fab --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml @@ -0,0 +1,19 @@ + + + + nearest_hostname + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 3000 + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py deleted file mode 100644 index 951dd7f12b4..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ /dev/null @@ -1,131 +0,0 @@ -import time -import pytest -import logging -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_in_order.xml') - -node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) -node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) -node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) - - -@pytest.fixture(scope="module", autouse=True) -def started_cluster(): - try: - cluster.start() - - yield cluster - - finally: - cluster.shutdown() - -def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): - start = time.time() - while time.time() - start < timeout: - try: - for instance in zk_nodes: - conn = started_cluster.get_kazoo_client(instance) - conn.get_children('/') - print("All instances of ZooKeeper started") - return - except Exception as ex: - print(("Can't connect to ZooKeeper " + str(ex))) - time.sleep(0.5) - -def test_in_order(started_cluster): - zoo1_ip = started_cluster.get_instance_ip("zoo1") - for i, node in enumerate([node1, node3]): - node.query('DROP TABLE IF EXISTS simple SYNC') - node.query(''' - CREATE TABLE simple (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); - '''.format(replica=node.name)) - - time.sleep(5) - assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo1_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo1_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - - -# def test_round_robin(started_cluster): -# new_config = """ -# -# -# round_robin -# -# zoo1 -# 2181 -# -# -# zoo2 -# 2181 -# -# -# zoo3 -# 2181 -# -# 3000 -# -# -# """ -# for i, node in enumerate([node1, node3]): -# node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config) -# node.query("SYSTEM RELOAD CONFIG") -# -# started_cluster.stop_zookeeper_nodes(["zoo1"]) -# zoo2_ip = started_cluster.get_instance_ip("zoo2") -# for i, node in enumerate([node1, node3]): -# node.query('DROP TABLE IF EXISTS simple SYNC') -# node.query(''' -# CREATE TABLE simple (date Date, id UInt32) -# ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); -# '''.format(replica=node.name)) -# assert '::ffff:' + str(zoo2_ip) + '\n' == node.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') -# ## start zoo2, zoo3, table will be readonly too, because it only connect to zoo1 -# started_cluster.start_zookeeper_nodes(["zoo1"]) -# wait_zookeeper_node_to_start(started_cluster, ["zoo1"]) -# -# -# def test_nearest_hostname(started_cluster): -# new_config = """ -# -# -# nearest_hostname -# -# zoo1 -# 2181 -# -# -# zoo2 -# 2181 -# -# -# zoo3 -# 2181 -# -# 3000 -# -# -# """ -# for i, node in enumerate([node1, node3]): -# node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config) -# node.query("SYSTEM RELOAD CONFIG") -# -# zoo1_ip = started_cluster.get_instance_ip("zoo1") -# zoo2_ip = started_cluster.get_instance_ip("zoo2") -# zoo3_ip = started_cluster.get_instance_ip("zoo3") -# -# for i, node in enumerate([node1, node3]): -# node.query('DROP TABLE IF EXISTS simple SYNC') -# node.query(''' -# CREATE TABLE simple (date Date, id UInt32) -# ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); -# '''.format(replica=node.name)) -# -# assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') -# assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') -# assert '::ffff:' + str(zoo3_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py b/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py new file mode 100644 index 00000000000..5d510ae3da4 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py @@ -0,0 +1,53 @@ +import time +import pytest +import logging +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_first_or_random.xml') + +node1 = cluster.add_instance('node1', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml", "configs/zookeeper_log.xml"]) +node2 = cluster.add_instance('node2', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml", "configs/zookeeper_log.xml"]) +node3 = cluster.add_instance('node3', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml", "configs/zookeeper_log.xml"]) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): + start = time.time() + while time.time() - start < timeout: + try: + for instance in zk_nodes: + conn = started_cluster.get_kazoo_client(instance) + conn.get_children('/') + print("All instances of ZooKeeper started") + return + except Exception as ex: + print(("Can't connect to ZooKeeper " + str(ex))) + time.sleep(0.5) + +def test_first_or_random(started_cluster): + wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) + time.sleep(2) + zoo1_ip = started_cluster.get_instance_ip("zoo1") + for i, node in enumerate([node1, node3]): + node.query('DROP TABLE IF EXISTS simple SYNC') + node.query(''' + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); + '''.format(replica=node.name)) + + time.sleep(5) + assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo1_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo1_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py new file mode 100644 index 00000000000..88143116170 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py @@ -0,0 +1,53 @@ +import time +import pytest +import logging +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_in_order.xml') + +node1 = cluster.add_instance('node1', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) +node2 = cluster.add_instance('node2', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) +node3 = cluster.add_instance('node3', with_zookeeper=True, + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): + start = time.time() + while time.time() - start < timeout: + try: + for instance in zk_nodes: + conn = started_cluster.get_kazoo_client(instance) + conn.get_children('/') + print("All instances of ZooKeeper started") + return + except Exception as ex: + print(("Can't connect to ZooKeeper " + str(ex))) + time.sleep(0.5) + +def test_in_order(started_cluster): + wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) + time.sleep(2) + zoo1_ip = started_cluster.get_instance_ip("zoo1") + for i, node in enumerate([node1, node3]): + node.query('DROP TABLE IF EXISTS simple SYNC') + node.query(''' + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); + '''.format(replica=node.name)) + + time.sleep(5) + assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo1_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo1_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py new file mode 100644 index 00000000000..79fa61c0104 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py @@ -0,0 +1,56 @@ +import time +import pytest +import logging +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_nearest_hostname.xml') + +node1 = cluster.add_instance('nod1', with_zookeeper=True, + main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml", "configs/zookeeper_log.xml"]) +node2 = cluster.add_instance('nod2', with_zookeeper=True, + main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml", "configs/zookeeper_log.xml"]) +node3 = cluster.add_instance('nod3', with_zookeeper=True, + main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml", "configs/zookeeper_log.xml"]) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): + start = time.time() + while time.time() - start < timeout: + try: + for instance in zk_nodes: + conn = started_cluster.get_kazoo_client(instance) + conn.get_children('/') + print("All instances of ZooKeeper started") + return + except Exception as ex: + print(("Can't connect to ZooKeeper " + str(ex))) + time.sleep(0.5) + +def test_nearest_hostname(started_cluster): + wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) + time.sleep(2) + zoo1_ip = started_cluster.get_instance_ip("zoo1") + zoo2_ip = started_cluster.get_instance_ip("zoo2") + zoo3_ip = started_cluster.get_instance_ip("zoo3") + + for i, node in enumerate([node1, node3]): + node.query('DROP TABLE IF EXISTS simple SYNC') + node.query(''' + CREATE TABLE simple (date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); + '''.format(replica=node.name)) + + time.sleep(5) + assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '::ffff:' + str(zoo3_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py index bee32205499..f447f929d38 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py @@ -3,7 +3,7 @@ import pytest import logging from helpers.cluster import ClickHouseCluster -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_in_order.xml') +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_round_robin.xml') node1 = cluster.add_instance('node1', with_zookeeper=True, main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml", "configs/zookeeper_log.xml"]) @@ -39,8 +39,9 @@ def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): def test_round_robin(started_cluster): - + wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) started_cluster.stop_zookeeper_nodes(["zoo1"]) + time.sleep(10) zoo2_ip = started_cluster.get_instance_ip("zoo2") for i, node in enumerate([node1, node3]): node.query('DROP TABLE IF EXISTS simple SYNC') @@ -50,6 +51,7 @@ def test_round_robin(started_cluster): '''.format(replica=node.name)) time.sleep(5) + print("zoo2", zoo2_ip) assert '::ffff:' + str(zoo2_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') assert '::ffff:' + str(zoo2_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') @@ -57,44 +59,3 @@ def test_round_robin(started_cluster): ## start zoo2, zoo3, table will be readonly too, because it only connect to zoo1 started_cluster.start_zookeeper_nodes(["zoo1"]) wait_zookeeper_node_to_start(started_cluster, ["zoo1"]) - - -# def test_nearest_hostname(started_cluster): -# new_config = """ -# -# -# nearest_hostname -# -# zoo1 -# 2181 -# -# -# zoo2 -# 2181 -# -# -# zoo3 -# 2181 -# -# 3000 -# -# -# """ -# for i, node in enumerate([node1, node3]): -# node.replace_config("/etc/clickhouse-server/conf.d/zookeeper.xml", new_config) -# node.query("SYSTEM RELOAD CONFIG") -# -# zoo1_ip = started_cluster.get_instance_ip("zoo1") -# zoo2_ip = started_cluster.get_instance_ip("zoo2") -# zoo3_ip = started_cluster.get_instance_ip("zoo3") -# -# for i, node in enumerate([node1, node3]): -# node.query('DROP TABLE IF EXISTS simple SYNC') -# node.query(''' -# CREATE TABLE simple (date Date, id UInt32) -# ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); -# '''.format(replica=node.name)) -# -# assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') -# assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') -# assert '::ffff:' + str(zoo3_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') From 5c34e24f4948cab5a49ab9fb7c7a87c29435a9ef Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Thu, 21 Oct 2021 15:57:21 +0800 Subject: [PATCH 007/132] Fix code style --- src/Common/GetPriorityForLoadBalancing.cpp | 3 ++- src/Common/ZooKeeper/ZooKeeper.cpp | 1 - src/Common/ZooKeeper/ZooKeeper.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index ae621d9e75c..15ba4e2534c 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -29,7 +29,8 @@ std::function GetPriorityForLoadBalancing::getPriorityFunc * last_used = 3 -> get_priority: 4 3 0 1 2 * ... * */ - get_priority = [&](size_t i) { + get_priority = [&](size_t i) + { ++i; return i < last_used ? pool_size - i : i - last_used; }; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 30e13c982ec..2a964ceba89 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -26,7 +26,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int UNKNOWN_LOAD_BALANCING; } } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 94232aeac86..392c0427545 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -43,7 +43,6 @@ constexpr size_t MULTI_BATCH_SIZE = 100; struct ShuffleHost { String host; - /// Priority from the GetPriorityFunc. Int64 priority = 0; UInt32 random = 0; From 6cd0f18bfda86578232d0ace9cd521bb02b65481 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Thu, 21 Oct 2021 17:59:24 +0800 Subject: [PATCH 008/132] Fix PVS check --- src/Client/ConnectionPoolWithFailover.cpp | 2 +- src/Common/GetPriorityForLoadBalancing.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index ecfc6bd5c08..0e213dc6700 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -29,7 +29,7 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( time_t decrease_error_period_, size_t max_error_cap_) : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover")) - , get_priority_load_balancing(load_balancing) + , get_priority_load_balancing(load_balancing, nested_pools.size()) { const std::string & local_hostname = getFQDNOrHostName(); diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index b845c2e7616..940ece2b0bc 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -8,7 +8,7 @@ namespace DB class GetPriorityForLoadBalancing { public: - GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} + GetPriorityForLoadBalancing(LoadBalancing load_balancing_, size_t pool_size_) : load_balancing(load_balancing_), pool_size(pool_size_) {} GetPriorityForLoadBalancing(){} bool operator!=(const GetPriorityForLoadBalancing & other) @@ -19,11 +19,11 @@ public: std::function getPriorityFunc() const; + LoadBalancing load_balancing = LoadBalancing::RANDOM; + std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. size_t offset = 0; /// for first_or_random policy. - size_t pool_size; /// for round_robin policy. - - LoadBalancing load_balancing = LoadBalancing::RANDOM; + size_t pool_size = 0; /// for round_robin policy. private: mutable size_t last_used = 0; /// Last used for round_robin policy. From d1891c2527258d6bbfd3b699ae5e836ed687b03c Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Fri, 22 Oct 2021 14:27:50 +0800 Subject: [PATCH 009/132] Fix build --- src/Common/ZooKeeper/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index d29fba53277..7510cd0755c 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -4,7 +4,7 @@ add_headers_and_sources(clickhouse_common_zookeeper .) add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) -target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common PRIVATE string_utils) +target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common dbms PRIVATE string_utils) if (ENABLE_EXAMPLES) add_subdirectory(examples) From 4e49eba087c8b5fc0db7c7f7dd819e0b5a80701e Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Fri, 22 Oct 2021 20:23:25 +0800 Subject: [PATCH 010/132] Fix data race --- src/Client/ConnectionPoolWithFailover.cpp | 21 ++++++++++++++------- src/Common/GetPriorityForLoadBalancing.cpp | 6 +++--- src/Common/GetPriorityForLoadBalancing.h | 13 +++++-------- src/Common/ZooKeeper/ZooKeeper.cpp | 3 +-- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 0e213dc6700..accde6b5475 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -29,7 +29,7 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( time_t decrease_error_period_, size_t max_error_cap_) : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover")) - , get_priority_load_balancing(load_balancing, nested_pools.size()) + , get_priority_load_balancing(load_balancing) { const std::string & local_hostname = getFQDNOrHostName(); @@ -50,12 +50,16 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return tryGetEntry(pool, timeouts, fail_message, settings); }; + GetPriorityForLoadBalancing get_priority_local(get_priority_load_balancing); + size_t offset = 0; + LoadBalancing load_balancing = get_priority_load_balancing.load_balancing; if (settings) { - get_priority_load_balancing.offset = settings->load_balancing_first_offset % nested_pools.size(); - get_priority_load_balancing.load_balancing = settings->load_balancing; + offset = settings->load_balancing_first_offset % nested_pools.size(); + load_balancing = LoadBalancing(settings->load_balancing); } - GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(); + + GetPriorityFunc get_priority = get_priority_local.getPriorityFunc(load_balancing, offset, nested_pools.size()); UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0; bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true; @@ -148,12 +152,15 @@ std::vector ConnectionPoolWithFailover::g ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings) { + size_t offset = 0; + LoadBalancing load_balancing = get_priority_load_balancing.load_balancing; if (settings) { - get_priority_load_balancing.offset = settings->load_balancing_first_offset % nested_pools.size(); - get_priority_load_balancing.load_balancing = settings->load_balancing; + offset = settings->load_balancing_first_offset % nested_pools.size(); + load_balancing = LoadBalancing(settings->load_balancing); } - return get_priority_load_balancing.getPriorityFunc(); + + return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size()); } std::vector ConnectionPoolWithFailover::getManyImpl( diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index 15ba4e2534c..fa0eeb14bed 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -3,10 +3,10 @@ namespace DB { -std::function GetPriorityForLoadBalancing::getPriorityFunc() const +std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const { std::function get_priority; - switch (load_balancing) + switch (load_balance) { case LoadBalancing::NEAREST_HOSTNAME: get_priority = [&](size_t i) { return hostname_differences[i]; }; @@ -17,7 +17,7 @@ std::function GetPriorityForLoadBalancing::getPriorityFunc case LoadBalancing::RANDOM: break; case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [&](size_t i) -> size_t { return i != offset; }; + get_priority = [offset](size_t i) -> size_t { return i != offset; }; break; case LoadBalancing::ROUND_ROBIN: if (last_used >= pool_size) diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index 940ece2b0bc..a6b8c88bb73 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -8,22 +8,19 @@ namespace DB class GetPriorityForLoadBalancing { public: - GetPriorityForLoadBalancing(LoadBalancing load_balancing_, size_t pool_size_) : load_balancing(load_balancing_), pool_size(pool_size_) {} + GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} GetPriorityForLoadBalancing(){} bool operator!=(const GetPriorityForLoadBalancing & other) { - return offset != other.offset || pool_size != other.pool_size || load_balancing != other.load_balancing - || hostname_differences != other.hostname_differences; + return load_balancing != other.load_balancing || hostname_differences != other.hostname_differences; } - std::function getPriorityFunc() const; - - LoadBalancing load_balancing = LoadBalancing::RANDOM; + std::function getPriorityFunc(LoadBalancing load_balancing, size_t offset, size_t pool_size) const; std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. - size_t offset = 0; /// for first_or_random policy. - size_t pool_size = 0; /// for round_robin policy. + + LoadBalancing load_balancing = LoadBalancing::RANDOM; private: mutable size_t last_used = 0; /// Last used for round_robin policy. diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 2a964ceba89..5e43eda636c 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -155,7 +155,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ std::vector ZooKeeper::shuffleHosts() const { - std::function get_priority = get_priority_load_balancing.getPriorityFunc(); + std::function get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size()); std::vector shuffle_hosts; for (size_t i = 0; i < hosts.size(); ++i) { @@ -260,7 +260,6 @@ struct ZooKeeperArgs const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':')); get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host); } - get_priority_load_balancing.pool_size = hosts.size(); } Strings hosts; From 1710e5ea5989cbe9103df7551a98e8bbf20e1a7b Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Mon, 25 Oct 2021 19:10:54 +0800 Subject: [PATCH 011/132] Fix build --- src/CMakeLists.txt | 2 ++ src/Common/ZooKeeper/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 09aaa85c394..95f2051399d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -183,6 +183,8 @@ endmacro() add_object_library(clickhouse_access Access) add_object_library(clickhouse_backups Backups) add_object_library(clickhouse_core Core) +add_library (clickhouse_core_settings_enums Core/SettingsEnums.cpp) +target_link_libraries(clickhouse_core_settings_enums PRIVATE common clickhouse_common_io) add_object_library(clickhouse_core_mysql Core/MySQL) add_object_library(clickhouse_compression Compression) add_object_library(clickhouse_querypipeline QueryPipeline) diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index 7510cd0755c..5797a0a5e21 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -4,7 +4,7 @@ add_headers_and_sources(clickhouse_common_zookeeper .) add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) -target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common dbms PRIVATE string_utils) +target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_core_settings_enums clickhouse_common_io common PRIVATE string_utils) if (ENABLE_EXAMPLES) add_subdirectory(examples) From bf9aebac90be71566b8cf8650dd16c56b478bb27 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Tue, 26 Oct 2021 12:45:09 +0800 Subject: [PATCH 012/132] Fix test and build --- src/Client/ConnectionPoolWithFailover.cpp | 3 +-- src/Common/GetPriorityForLoadBalancing.h | 2 +- .../test_first_or_random.py | 2 +- .../test_zookeeper_config_load_balancing/test_in_order.py | 2 +- .../test_nearest_hostname.py | 2 +- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index accde6b5475..13d39980e1c 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -50,7 +50,6 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return tryGetEntry(pool, timeouts, fail_message, settings); }; - GetPriorityForLoadBalancing get_priority_local(get_priority_load_balancing); size_t offset = 0; LoadBalancing load_balancing = get_priority_load_balancing.load_balancing; if (settings) @@ -59,7 +58,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts load_balancing = LoadBalancing(settings->load_balancing); } - GetPriorityFunc get_priority = get_priority_local.getPriorityFunc(load_balancing, offset, nested_pools.size()); + GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size()); UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0; bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true; diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index a6b8c88bb73..4ec686188e4 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -16,7 +16,7 @@ public: return load_balancing != other.load_balancing || hostname_differences != other.hostname_differences; } - std::function getPriorityFunc(LoadBalancing load_balancing, size_t offset, size_t pool_size) const; + std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py b/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py index 5d510ae3da4..71084492b44 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py @@ -38,7 +38,7 @@ def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): def test_first_or_random(started_cluster): wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) - time.sleep(2) + time.sleep(10) zoo1_ip = started_cluster.get_instance_ip("zoo1") for i, node in enumerate([node1, node3]): node.query('DROP TABLE IF EXISTS simple SYNC') diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py index 88143116170..92ba927860c 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py @@ -38,7 +38,7 @@ def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): def test_in_order(started_cluster): wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) - time.sleep(2) + time.sleep(10) zoo1_ip = started_cluster.get_instance_ip("zoo1") for i, node in enumerate([node1, node3]): node.query('DROP TABLE IF EXISTS simple SYNC') diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py index 79fa61c0104..832af32bbae 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py @@ -38,7 +38,7 @@ def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): def test_nearest_hostname(started_cluster): wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) - time.sleep(2) + time.sleep(10) zoo1_ip = started_cluster.get_instance_ip("zoo1") zoo2_ip = started_cluster.get_instance_ip("zoo2") zoo3_ip = started_cluster.get_instance_ip("zoo3") From 4e53eb0e4e65f8d1da865db971f78955a39f5920 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Wed, 27 Oct 2021 11:29:31 +0800 Subject: [PATCH 013/132] empty commit From 49e03025dde6513921787d0ab9b56df27fb557b7 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Mon, 1 Nov 2021 18:26:50 +0800 Subject: [PATCH 014/132] better test --- .../configs/zookeeper_log.xml | 7 ---- .../test_first_or_random.py | 34 +++------------ .../test_in_order.py | 35 +++------------- .../test_nearest_hostname.py | 38 +++-------------- .../test_round_robin.py | 41 ++++--------------- 5 files changed, 26 insertions(+), 129 deletions(-) delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml deleted file mode 100644 index a70cbc3ecc2..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_log.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - system - zookeeper_log
- 7500 -
-
diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py b/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py index 71084492b44..38361016512 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py @@ -6,11 +6,11 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_first_or_random.xml') node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml"]) node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml"]) node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml"]) @pytest.fixture(scope="module", autouse=True) @@ -23,31 +23,9 @@ def started_cluster(): finally: cluster.shutdown() -def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): - start = time.time() - while time.time() - start < timeout: - try: - for instance in zk_nodes: - conn = started_cluster.get_kazoo_client(instance) - conn.get_children('/') - print("All instances of ZooKeeper started") - return - except Exception as ex: - print(("Can't connect to ZooKeeper " + str(ex))) - time.sleep(0.5) def test_first_or_random(started_cluster): - wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) - time.sleep(10) - zoo1_ip = started_cluster.get_instance_ip("zoo1") - for i, node in enumerate([node1, node3]): - node.query('DROP TABLE IF EXISTS simple SYNC') - node.query(''' - CREATE TABLE simple (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); - '''.format(replica=node.name)) - time.sleep(5) - assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo1_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo1_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py index 92ba927860c..c11d05f0a75 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py @@ -6,11 +6,11 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_in_order.xml') node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml"]) node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml"]) node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml"]) @pytest.fixture(scope="module", autouse=True) @@ -23,31 +23,8 @@ def started_cluster(): finally: cluster.shutdown() -def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): - start = time.time() - while time.time() - start < timeout: - try: - for instance in zk_nodes: - conn = started_cluster.get_kazoo_client(instance) - conn.get_children('/') - print("All instances of ZooKeeper started") - return - except Exception as ex: - print(("Can't connect to ZooKeeper " + str(ex))) - time.sleep(0.5) - def test_in_order(started_cluster): - wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) - time.sleep(10) - zoo1_ip = started_cluster.get_instance_ip("zoo1") - for i, node in enumerate([node1, node3]): - node.query('DROP TABLE IF EXISTS simple SYNC') - node.query(''' - CREATE TABLE simple (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); - '''.format(replica=node.name)) - time.sleep(5) - assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo1_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo1_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py index 832af32bbae..30fca5c5395 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py @@ -6,11 +6,11 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_nearest_hostname.xml') node1 = cluster.add_instance('nod1', with_zookeeper=True, - main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml"]) node2 = cluster.add_instance('nod2', with_zookeeper=True, - main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml"]) node3 = cluster.add_instance('nod3', with_zookeeper=True, - main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml"]) @pytest.fixture(scope="module", autouse=True) @@ -23,34 +23,8 @@ def started_cluster(): finally: cluster.shutdown() -def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): - start = time.time() - while time.time() - start < timeout: - try: - for instance in zk_nodes: - conn = started_cluster.get_kazoo_client(instance) - conn.get_children('/') - print("All instances of ZooKeeper started") - return - except Exception as ex: - print(("Can't connect to ZooKeeper " + str(ex))) - time.sleep(0.5) - def test_nearest_hostname(started_cluster): - wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) - time.sleep(10) - zoo1_ip = started_cluster.get_instance_ip("zoo1") - zoo2_ip = started_cluster.get_instance_ip("zoo2") - zoo3_ip = started_cluster.get_instance_ip("zoo3") - for i, node in enumerate([node1, node3]): - node.query('DROP TABLE IF EXISTS simple SYNC') - node.query(''' - CREATE TABLE simple (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); - '''.format(replica=node.name)) - - time.sleep(5) - assert '::ffff:' + str(zoo1_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo3_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo3_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py index f447f929d38..98d751f075b 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py @@ -6,11 +6,11 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_round_robin.xml') node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml"]) node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml"]) node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml", "configs/zookeeper_log.xml"]) + main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml"]) @pytest.fixture(scope="module", autouse=True) @@ -23,39 +23,14 @@ def started_cluster(): finally: cluster.shutdown() -def wait_zookeeper_node_to_start(started_cluster, zk_nodes, timeout=60): - start = time.time() - while time.time() - start < timeout: - try: - for instance in zk_nodes: - conn = started_cluster.get_kazoo_client(instance) - conn.get_children('/') - print("All instances of ZooKeeper started") - return - except Exception as ex: - print(("Can't connect to ZooKeeper " + str(ex))) - time.sleep(0.5) - - def test_round_robin(started_cluster): - wait_zookeeper_node_to_start(started_cluster, ["zoo1", "zoo2", "zoo3"]) + started_cluster.stop_zookeeper_nodes(["zoo1"]) - time.sleep(10) - zoo2_ip = started_cluster.get_instance_ip("zoo2") - for i, node in enumerate([node1, node3]): - node.query('DROP TABLE IF EXISTS simple SYNC') - node.query(''' - CREATE TABLE simple (date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', '{replica}', date, id, 8192); - '''.format(replica=node.name)) + time.sleep(1) - time.sleep(5) - print("zoo2", zoo2_ip) - assert '::ffff:' + str(zoo2_ip) + '\n' == node1.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo2_ip) + '\n' == node2.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') - assert '::ffff:' + str(zoo2_ip) + '\n' == node3.query('SELECT IPv6NumToString(address) FROM system.zookeeper_log ORDER BY event_time DESC LIMIT 1') + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - ## start zoo2, zoo3, table will be readonly too, because it only connect to zoo1 started_cluster.start_zookeeper_nodes(["zoo1"]) - wait_zookeeper_node_to_start(started_cluster, ["zoo1"]) From 821ad7cb2ae2c0c66afe3279cf0ebbf6c1c09bc2 Mon Sep 17 00:00:00 2001 From: zhangxiao871 <821008736@qq.com> Date: Tue, 2 Nov 2021 15:40:05 +0800 Subject: [PATCH 015/132] try fix test --- .../{test_first_or_random.py => test.py} | 8 ++++++-- .../test_zookeeper_config_load_balancing/test_in_order.py | 7 ++++++- .../test_nearest_hostname.py | 7 ++++++- .../test_round_robin.py | 7 ++++++- 4 files changed, 24 insertions(+), 5 deletions(-) rename tests/integration/test_zookeeper_config_load_balancing/{test_first_or_random.py => test.py} (78%) diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py b/tests/integration/test_zookeeper_config_load_balancing/test.py similarity index 78% rename from tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py rename to tests/integration/test_zookeeper_config_load_balancing/test.py index 38361016512..144ba14ce40 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_first_or_random.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -13,7 +13,7 @@ node3 = cluster.add_instance('node3', with_zookeeper=True, main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml"]) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(scope="module") def started_cluster(): try: cluster.start() @@ -25,7 +25,11 @@ def started_cluster(): def test_first_or_random(started_cluster): - + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py index c11d05f0a75..095aba72217 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py @@ -13,7 +13,7 @@ node3 = cluster.add_instance('node3', with_zookeeper=True, main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml"]) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(scope="module") def started_cluster(): try: cluster.start() @@ -25,6 +25,11 @@ def started_cluster(): def test_in_order(started_cluster): + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py index 30fca5c5395..23c0386b1d2 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py @@ -13,7 +13,7 @@ node3 = cluster.add_instance('nod3', with_zookeeper=True, main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml"]) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(scope="module") def started_cluster(): try: cluster.start() @@ -25,6 +25,11 @@ def started_cluster(): def test_nearest_hostname(started_cluster): + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo3_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py index 98d751f075b..3623371c244 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py @@ -13,7 +13,7 @@ node3 = cluster.add_instance('node3', with_zookeeper=True, main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml"]) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(scope="module") def started_cluster(): try: cluster.start() @@ -29,8 +29,13 @@ def test_round_robin(started_cluster): started_cluster.stop_zookeeper_nodes(["zoo1"]) time.sleep(1) + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() started_cluster.start_zookeeper_nodes(["zoo1"]) From be081d997d76efd5410a1ab115a73f41753a65d6 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 6 Feb 2022 15:14:18 +0300 Subject: [PATCH 016/132] basic added fractional intervals types --- src/Common/IntervalKind.cpp | 36 ++++++++++++++++++++++++++++++ src/Common/IntervalKind.h | 6 +++++ src/DataTypes/DataTypeInterval.cpp | 3 +++ 3 files changed, 45 insertions(+) diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 69b56be48ac..bd5d1a512f1 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -13,6 +13,9 @@ Int32 IntervalKind::toAvgSeconds() const { switch (kind) { + case IntervalKind::Nanosecond: return 0; /// fractional parts of seconds have 0 seconds + case IntervalKind::Microsecond: return 0; + case IntervalKind::Millisecond: return 0; case IntervalKind::Second: return 1; case IntervalKind::Minute: return 60; case IntervalKind::Hour: return 3600; @@ -52,6 +55,9 @@ const char * IntervalKind::toKeyword() const { switch (kind) { + case IntervalKind::Nanosecond: return "NANOSECOND"; + case IntervalKind::Microsecond: return "MICROSECOND"; + case IntervalKind::Millisecond: return "MILLISECOND"; case IntervalKind::Second: return "SECOND"; case IntervalKind::Minute: return "MINUTE"; case IntervalKind::Hour: return "HOUR"; @@ -69,6 +75,9 @@ const char * IntervalKind::toLowercasedKeyword() const { switch (kind) { + case IntervalKind::Nanosecond: return "nanosecond"; + case IntervalKind::Microsecond: return "microsecond"; + case IntervalKind::Millisecond: return "millisecond"; case IntervalKind::Second: return "second"; case IntervalKind::Minute: return "minute"; case IntervalKind::Hour: return "hour"; @@ -111,6 +120,12 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const { switch (kind) { + case IntervalKind::Nanosecond: + return "toIntervalNanosecond"; + case IntervalKind::Microsecond: + return "toIntervalMicrosecond"; + case IntervalKind::Millisecond: + return "toIntervalMillisecond"; case IntervalKind::Second: return "toIntervalSecond"; case IntervalKind::Minute: @@ -136,6 +151,12 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const { switch (kind) { + case IntervalKind::Nanosecond: + return "toNanosecond"; + case IntervalKind::Microsecond: + return "toMicrosecond"; + case IntervalKind::Millisecond: + return "toMillisecond"; case IntervalKind::Second: return "toSecond"; case IntervalKind::Minute: @@ -162,6 +183,21 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result) { + if ("nanosecond" == kind) + { + result = IntervalKind::Nanosecond; + return true; + } + if ("microsecond" == kind) + { + result = IntervalKind::Microsecond; + return true; + } + if ("millisecond" == kind) + { + result = IntervalKind::Millisecond; + return true; + } if ("second" == kind) { result = IntervalKind::Second; diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index f1b5dce0792..c162d74be2e 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -10,6 +10,9 @@ struct IntervalKind { enum Kind { + Nanosecond, + Microsecond, + Millisecond, Second, Minute, Hour, @@ -60,6 +63,9 @@ struct IntervalKind }; #define FOR_EACH_INTERVAL_KIND(M) \ + M(Nanosecond) \ + M(Microsecond) \ + M(Millisecond) \ M(Second) \ M(Minute) \ M(Hour) \ diff --git a/src/DataTypes/DataTypeInterval.cpp b/src/DataTypes/DataTypeInterval.cpp index 57d071a8666..9faf0cec2d8 100644 --- a/src/DataTypes/DataTypeInterval.cpp +++ b/src/DataTypes/DataTypeInterval.cpp @@ -13,6 +13,9 @@ bool DataTypeInterval::equals(const IDataType & rhs) const void registerDataTypeInterval(DataTypeFactory & factory) { + factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Nanosecond)); }); + factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Microsecond)); }); + factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Millisecond)); }); factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Second)); }); factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared(IntervalKind::Minute)); }); factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared(IntervalKind::Hour)); }); From 9456d89fad73d1c95b6c4591970e530bab4c0d40 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 7 Feb 2022 21:44:14 +0300 Subject: [PATCH 017/132] Move current work from old laptop skip-checks: true --- src/Common/IntervalKind.cpp | 6 ++ src/Functions/CMakeLists.txt | 2 +- .../FunctionDateOrDateTimeAddInterval.h | 78 +++++++++++++++++++ src/Functions/FunctionsConversion.cpp | 3 + src/Functions/FunctionsConversion.h | 3 + src/Functions/FunctionsTimeWindow.h | 2 +- src/Functions/addSubSeconds.cpp | 39 ++++++++++ src/Functions/registerFunctionsDateTime.cpp | 6 ++ src/Storages/WindowView/StorageWindowView.cpp | 26 ++++++- 9 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 src/Functions/addSubSeconds.cpp diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index bd5d1a512f1..1478b832282 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -95,6 +95,12 @@ const char * IntervalKind::toDateDiffUnit() const { switch (kind) { + case IntervalKind::Nanosecond: + return "nanosecond"; + case IntervalKind::Microsecond: + return "microsecond"; + case IntervalKind::Millisecond: + return "millisecond"; case IntervalKind::Second: return "second"; case IntervalKind::Minute: diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index b7020ea128e..707b3e7ceb8 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -9,7 +9,7 @@ add_headers_and_sources(clickhouse_functions .) list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp) list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h) -add_library(clickhouse_functions ${clickhouse_functions_sources}) +add_library(clickhouse_functions ${clickhouse_functions_sources} addSubSeconds.cpp) target_link_libraries(clickhouse_functions PUBLIC diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 8f6b1370935..d135a41291c 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -37,6 +37,84 @@ namespace ErrorCodes /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' +struct AddNanosecondsImpl +{ + static constexpr auto name = "addNanoseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + { + return {t.whole, t.fractional + delta}; + } + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(UInt32 t, Int64 delta, const DateLUTImpl &) + { + return {t, delta}; + } + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + { + return time_zone.fromDayNum(DayNum(d)) + delta; + } +}; + +struct AddMicrosecondsImpl +{ + static constexpr auto name = "addMicroseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + { + return {t.whole, t.fractional + delta}; + } + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(UInt32 t, Int64 delta, const DateLUTImpl &) + { + return {t, delta}; + } + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + { + return time_zone.fromDayNum(DayNum(d)) + delta; + } +}; + +struct AddMillisecondsImpl +{ + static constexpr auto name = "addMilliseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + { + return {t.whole, t.fractional + delta}; + } + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(UInt32 t, Int64 delta, const DateLUTImpl &) + { + return {t, delta}; + } + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + { + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + { + return time_zone.fromDayNum(DayNum(d)) + delta; + } +}; + struct AddSecondsImpl { static constexpr auto name = "addSeconds"; diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 4f5f6ae483f..7f8e9148032 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -112,6 +112,9 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 909803d7cd7..31ec597761d 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1444,6 +1444,9 @@ struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; static constexpr auto kind = IntervalKind::INTERVAL_KIND; \ }; +DEFINE_NAME_TO_INTERVAL(Nanosecond) +DEFINE_NAME_TO_INTERVAL(Microsecond) +DEFINE_NAME_TO_INTERVAL(Millisecond) DEFINE_NAME_TO_INTERVAL(Second) DEFINE_NAME_TO_INTERVAL(Minute) DEFINE_NAME_TO_INTERVAL(Hour) diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 313de10702d..4394f73061f 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -80,7 +80,7 @@ struct ToStartOfTransform; TRANSFORM_TIME(Hour) TRANSFORM_TIME(Minute) TRANSFORM_TIME(Second) -#undef TRANSFORM_DATE +#undef TRANSFORM_TIME template struct AddTime; diff --git a/src/Functions/addSubSeconds.cpp b/src/Functions/addSubSeconds.cpp new file mode 100644 index 00000000000..e5393448040 --- /dev/null +++ b/src/Functions/addSubSeconds.cpp @@ -0,0 +1,39 @@ +#include +#include + + +namespace DB +{ + +//#define SUBSECOND(SUBSECOND_KIND) \ +// using FunctionAdd##SUBSECOND_KIND##seconds = FunctionDateOrDateTimeAddInterval;\ +// void registerFunctionAdd##SUBSECOND_KIND##seconds(FunctionFactory & factory) \ +// { \ +// factory.registerFunction(); \ +// }; +//SUBSECOND(Nano) +//SUBSECOND(Micro) +//SUBSECOND(Milli) +//#undef SUBSECOND + +using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddNanoseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddMicroseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddMilliseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +} + + diff --git a/src/Functions/registerFunctionsDateTime.cpp b/src/Functions/registerFunctionsDateTime.cpp index 5211a62ff1e..e13d21688f0 100644 --- a/src/Functions/registerFunctionsDateTime.cpp +++ b/src/Functions/registerFunctionsDateTime.cpp @@ -47,6 +47,9 @@ void registerFunctionTimeSlots(FunctionFactory &); void registerFunctionToYYYYMM(FunctionFactory &); void registerFunctionToYYYYMMDD(FunctionFactory &); void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &); +void registerFunctionAddNanoseconds(FunctionFactory &); +void registerFunctionAddMicroseconds(FunctionFactory &); +void registerFunctionAddMilliseconds(FunctionFactory &); void registerFunctionAddSeconds(FunctionFactory &); void registerFunctionAddMinutes(FunctionFactory &); void registerFunctionAddHours(FunctionFactory &); @@ -119,6 +122,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionToYYYYMM(factory); registerFunctionToYYYYMMDD(factory); registerFunctionToYYYYMMDDhhmmss(factory); + registerFunctionAddNanoseconds(factory); + registerFunctionAddMicroseconds(factory); + registerFunctionAddMilliseconds(factory); registerFunctionAddSeconds(factory); registerFunctionAddMinutes(factory); registerFunctionAddHours(factory); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 37c913f58a9..f1c5aa067b7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -262,7 +262,13 @@ namespace IntervalKind strToIntervalKind(const String& interval_str) { - if (interval_str == "Second") + if (interval_str == "Nanosecond") + return IntervalKind::Nanosecond; + else if (interval_str == "Microsecond") + return IntervalKind::Microsecond; + else if (interval_str == "Millisecond") + return IntervalKind::Millisecond; + else if (interval_str == "Second") return IntervalKind::Second; else if (interval_str == "Minute") return IntervalKind::Minute; @@ -307,6 +313,12 @@ namespace { switch (kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: { \ return AddTime::execute(time_sec, num_units, time_zone); \ @@ -724,6 +736,12 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) switch (window_interval_kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ @@ -759,6 +777,12 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) switch (window_interval_kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ From d8b40e7deb3d299c82bc8b14e865346a0f56ac56 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 9 Feb 2022 10:28:15 +0300 Subject: [PATCH 018/132] basic add[...]seconds added TODO: fix DT64 scale resolution when input and func's scale are not the same --- src/Functions/CMakeLists.txt | 2 +- .../FunctionDateOrDateTimeAddInterval.h | 45 +++++++++--- src/Functions/FunctionsTimeWindow.cpp | 34 +++++++++ src/Functions/SubtractSubSeconds.cpp | 39 +++++++++++ src/Functions/TransformDateTime64.h | 2 +- src/Functions/registerFunctionsDateTime.cpp | 6 ++ src/Functions/toStartOfInterval.cpp | 70 ++++++++++++++++++- 7 files changed, 183 insertions(+), 15 deletions(-) create mode 100644 src/Functions/SubtractSubSeconds.cpp diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 707b3e7ceb8..b7020ea128e 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -9,7 +9,7 @@ add_headers_and_sources(clickhouse_functions .) list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp) list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h) -add_library(clickhouse_functions ${clickhouse_functions_sources} addSubSeconds.cpp) +add_library(clickhouse_functions ${clickhouse_functions_sources}) target_link_libraries(clickhouse_functions PUBLIC diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index d135a41291c..3442005ee70 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -37,6 +37,7 @@ namespace ErrorCodes /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' +//TODO: pass info about current scale struct AddNanosecondsImpl { static constexpr auto name = "addNanoseconds"; @@ -47,10 +48,9 @@ struct AddNanosecondsImpl return {t.whole, t.fractional + delta}; } - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) { - return {t, delta}; + return t + delta; } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) { @@ -73,10 +73,9 @@ struct AddMicrosecondsImpl return {t.whole, t.fractional + delta}; } - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) { - return {t, delta}; + return t + delta; } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) { @@ -99,10 +98,9 @@ struct AddMillisecondsImpl return {t.whole, t.fractional + delta}; } - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) { - return {t, delta}; + return t + delta; } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) { @@ -332,6 +330,9 @@ struct SubtractIntervalImpl : public Transform } }; +struct SubtractNanosecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractNanoseconds"; }; +struct SubtractMicrosecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMicroseconds"; }; +struct SubtractMillisecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMilliseconds"; }; struct SubtractSecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractSeconds"; }; struct SubtractMinutesImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMinutes"; }; struct SubtractHoursImpl : SubtractIntervalImpl { static constexpr auto name = "subtractHours"; }; @@ -540,6 +541,7 @@ public: /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types, /// e.g. addSeconds(Date, 1) => DateTime + template using TransformExecuteReturnType = decltype(std::declval>().execute(FieldType(), 0, std::declval())); @@ -567,11 +569,32 @@ public: if (typeid_cast(arguments[0].type.get())) { const auto & datetime64_type = assert_cast(*arguments[0].type); - return std::make_shared(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + + auto from_scale = datetime64_type.getScale(); + auto scale = from_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + + scale = std::max(scale, from_scale); + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } else { - return std::make_shared(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + auto scale = DataTypeDateTime64::default_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } } else diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 79ce7356ee7..76844e2e6fb 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int SYNTAX_ERROR; } namespace @@ -167,6 +168,13 @@ struct TimeWindowImpl switch (std::get<0>(interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); +// case IntervalKind::Microsecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); +// case IntervalKind::Millisecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Second: return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Minute: @@ -183,6 +191,8 @@ struct TimeWindowImpl return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Year: return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } @@ -350,6 +360,16 @@ struct TimeWindowImpl switch (std::get<0>(window_interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Microsecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Millisecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); case IntervalKind::Second: return executeHop( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); @@ -374,6 +394,8 @@ struct TimeWindowImpl case IntervalKind::Year: return executeHop( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } @@ -487,6 +509,16 @@ struct TimeWindowImpl switch (std::get<0>(window_interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Microsecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Millisecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); case IntervalKind::Second: return executeHopSlice( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); @@ -511,6 +543,8 @@ struct TimeWindowImpl case IntervalKind::Year: return executeHopSlice( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } diff --git a/src/Functions/SubtractSubSeconds.cpp b/src/Functions/SubtractSubSeconds.cpp new file mode 100644 index 00000000000..07afa38c963 --- /dev/null +++ b/src/Functions/SubtractSubSeconds.cpp @@ -0,0 +1,39 @@ +#include +#include + + +namespace DB +{ + +//#define SUBSECOND(SUBSECOND_KIND) \ +// using FunctionAdd##SUBSECOND_KIND##seconds = FunctionDateOrDateTimeAddInterval;\ +// void registerFunctionAdd##SUBSECOND_KIND##seconds(FunctionFactory & factory) \ +// { \ +// factory.registerFunction(); \ +// }; +//SUBSECOND(Nano) +//SUBSECOND(Micro) +//SUBSECOND(Milli) +//#undef SUBSECOND + +using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractNanoseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractMicroseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractMilliseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +} + + diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index 4eab2a491c7..ee6b9e98961 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -13,7 +13,7 @@ namespace DB * * DateTime64 value and scale factor (2) * * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3) * - * Suitable Transfotm-types are commonly used in Date/DateTime manipulation functions, + * Suitable Transform-types are commonly used in Date/DateTime manipulation functions, * and should implement static (or const) function with following signatures: * 1: * R execute(Int64 whole_value, ... ) diff --git a/src/Functions/registerFunctionsDateTime.cpp b/src/Functions/registerFunctionsDateTime.cpp index e13d21688f0..09c58064bbb 100644 --- a/src/Functions/registerFunctionsDateTime.cpp +++ b/src/Functions/registerFunctionsDateTime.cpp @@ -58,6 +58,9 @@ void registerFunctionAddWeeks(FunctionFactory &); void registerFunctionAddMonths(FunctionFactory &); void registerFunctionAddQuarters(FunctionFactory &); void registerFunctionAddYears(FunctionFactory &); +void registerFunctionSubtractNanoseconds(FunctionFactory &); +void registerFunctionSubtractMicroseconds(FunctionFactory &); +void registerFunctionSubtractMilliseconds(FunctionFactory &); void registerFunctionSubtractSeconds(FunctionFactory &); void registerFunctionSubtractMinutes(FunctionFactory &); void registerFunctionSubtractHours(FunctionFactory &); @@ -133,6 +136,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionAddMonths(factory); registerFunctionAddQuarters(factory); registerFunctionAddYears(factory); + registerFunctionSubtractNanoseconds(factory); + registerFunctionSubtractMicroseconds(factory); + registerFunctionSubtractMilliseconds(factory); registerFunctionSubtractSeconds(factory); registerFunctionSubtractMinutes(factory); registerFunctionSubtractHours(factory); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 09b7931de8d..a48b2cdf075 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -211,6 +211,66 @@ namespace } }; + template <> + struct Transform + { + static constexpr auto name = function_name; + + static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + + static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + }; + + template <> + struct Transform + { + static constexpr auto name = function_name; + + static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + + static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + }; + + template <> + struct Transform + { + static constexpr auto name = function_name; + + static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + + static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) + { + return time_zone.toStartOfSecondInterval(t, seconds); + } + }; + class FunctionToStartOfInterval : public IFunction { @@ -263,7 +323,7 @@ public: if (first_argument_is_date && result_type_is_date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " - "has the type DateTime", + "has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); }; @@ -368,6 +428,12 @@ private: switch (interval_type->getKind()) { + case IntervalKind::Nanosecond: + return execute(from, time_column, num_units, time_zone); + case IntervalKind::Microsecond: + return execute(from, time_column, num_units, time_zone); + case IntervalKind::Millisecond: + return execute(from, time_column, num_units, time_zone); case IntervalKind::Second: return execute(from, time_column, num_units, time_zone); case IntervalKind::Minute: @@ -399,7 +465,7 @@ private: auto & result_data = result->getData(); result_data.resize(size); - if constexpr (std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) { const auto transform = TransformDateTime64>{from_datatype.getScale()}; for (size_t i = 0; i != size; ++i) From 0fe1fdb6ca07de1d50e4ef1574953ce7fb0ef510 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 9 Feb 2022 20:19:01 +0300 Subject: [PATCH 019/132] add[...]Seconds basically work --- .../FunctionDateOrDateTimeAddInterval.h | 151 ++++++++++-------- 1 file changed, 83 insertions(+), 68 deletions(-) diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 3442005ee70..5f44215a0be 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -37,27 +37,28 @@ namespace ErrorCodes /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' -//TODO: pass info about current scale +//TODO: fix wrong results when source DateTime64 precision is less then result struct AddNanosecondsImpl { static constexpr auto name = "addNanoseconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {t.whole, t.fractional + delta}; + Int64 multiplier = std::pow(10, 9 - scale); + return {t.whole + (t.fractional * multiplier + delta) / 1000000000, (t.fractional * multiplier + delta) % 1000000000}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -68,21 +69,28 @@ struct AddMicrosecondsImpl static constexpr auto name = "addMicroseconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {t.whole, t.fractional + delta}; + if (scale <= 6) + { + Int64 multiplier = std::pow(10, 6 - scale); + return {t.whole + (t.fractional * multiplier + delta) / 1000000, (t.fractional * multiplier + delta) % 1000000}; + } else { + Int64 multiplier = std::pow(10, scale - 6); + return {t.whole + (t.fractional + delta * multiplier) / 1000000, (t.fractional + delta * multiplier) % 1000000}; + } } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -93,21 +101,28 @@ struct AddMillisecondsImpl static constexpr auto name = "addMilliseconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {t.whole, t.fractional + delta}; + if (scale <= 3) + { + Int64 multiplier = std::pow(10, 3 - scale); + return {t.whole + (t.fractional * multiplier + delta) / 1000, (t.fractional * multiplier + delta) % 1000}; + } else { + Int64 multiplier = std::pow(10, scale - 3); + return {t.whole + (t.fractional + delta * multiplier) / 1000, (t.fractional + delta * multiplier) % 1000}; + } } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -118,21 +133,21 @@ struct AddSecondsImpl static constexpr auto name = "addSeconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -143,21 +158,21 @@ struct AddMinutesImpl static constexpr auto name = "addMinutes"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta * 60, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 60; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 60; } @@ -168,20 +183,20 @@ struct AddHoursImpl static constexpr auto name = "addHours"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta * 3600, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 3600; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 3600; } @@ -192,22 +207,22 @@ struct AddDaysImpl static constexpr auto name = "addDays"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addDays(t.whole, delta), t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addDays(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta; } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta; } @@ -218,22 +233,22 @@ struct AddWeeksImpl static constexpr auto name = "addWeeks"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addWeeks(t.whole, delta), t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addWeeks(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } @@ -244,22 +259,22 @@ struct AddMonthsImpl static constexpr auto name = "addMonths"; static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addMonths(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(ExtendedDayNum(d), delta); } @@ -270,22 +285,22 @@ struct AddQuartersImpl static constexpr auto name = "addQuarters"; static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addQuarters(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) + static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(t, delta); } - static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone) + static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone) + static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(ExtendedDayNum(d), delta); } @@ -296,22 +311,22 @@ struct AddYearsImpl static constexpr auto name = "addYears"; static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addYears(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(ExtendedDayNum(d), delta); } @@ -323,10 +338,10 @@ struct SubtractIntervalImpl : public Transform using Transform::Transform; template - inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone) const + inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const { /// Signed integer overflow is Ok. - return Transform::execute(t, -delta, time_zone); + return Transform::execute(t, -delta, time_zone, scale); } }; @@ -353,17 +368,17 @@ struct Adder {} template - void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone) const + void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], delta, time_zone); + vec_to[i] = transform.execute(vec_from[i], delta, time_zone, scale); } template - void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const + void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = vec_from.size(); vec_to.resize(size); @@ -372,11 +387,11 @@ struct Adder ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, size); return true; }); + &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; }); } template - void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const + void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = delta.size(); vec_to.resize(size); @@ -385,24 +400,24 @@ struct Adder ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, size); return true; }); + &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; }); } private: template NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector( - const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const + const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const { for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], delta.getData()[i], time_zone); + vec_to[i] = transform.execute(vec_from[i], delta.getData()[i], time_zone, scale); } template NO_INLINE NO_SANITIZE_UNDEFINED void constantVector( - const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const + const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const { for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(from, delta.getData()[i], time_zone); + vec_to[i] = transform.execute(from, delta.getData()[i], time_zone, scale); } }; @@ -410,7 +425,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -430,16 +445,16 @@ struct DateTimeAddIntervalImpl const IColumn & delta_column = *arguments[1].column; if (const auto * delta_const_column = typeid_cast(&delta_column)) - op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone); + op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale); else - op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone); + op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale); } else if (const auto * sources_const = checkAndGetColumnConst(source_col.get())) { op.constantVector( sources_const->template getValue(), col_to->getData(), - *arguments[1].column, time_zone); + *arguments[1].column, time_zone, scale); } else { @@ -543,7 +558,7 @@ public: /// e.g. addSeconds(Date, 1) => DateTime template - using TransformExecuteReturnType = decltype(std::declval>().execute(FieldType(), 0, std::declval())); + using TransformExecuteReturnType = decltype(std::declval>().execute(FieldType(), 0, std::declval(), 0)); // Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl). // e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return, @@ -592,8 +607,7 @@ public: scale = 9; else if (std::is_same_v) scale = 6; - else if (std::is_same_v) - scale = 3; + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } } @@ -631,9 +645,10 @@ public: } else if (const auto * datetime64_type = assert_cast(from_type)) { + auto scale = datetime64_type->getScale(); using WrappedTransformType = TransformType; return DateTimeAddIntervalImpl, WrappedTransformType>::execute( - WrappedTransformType{datetime64_type->getScale()}, arguments, result_type); + WrappedTransformType{datetime64_type->getScale()}, arguments, result_type, scale); } else throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), From da1ba60202a60f65fd822c42246b8bfda20a7619 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 11 Feb 2022 13:35:50 +0300 Subject: [PATCH 020/132] upload --- .../FunctionDateOrDateTimeAddInterval.h | 38 ++++++++++--------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 5f44215a0be..b5829c2d1ca 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -43,15 +43,17 @@ struct AddNanosecondsImpl static constexpr auto name = "addNanoseconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - Int64 multiplier = std::pow(10, 9 - scale); - return {t.whole + (t.fractional * multiplier + delta) / 1000000000, (t.fractional * multiplier + delta) % 1000000000}; +// auto dt = DataTypeDateTime64(scale); +// Int64 multiplier = std::pow(10, 9 - scale); + auto div_result = std::div(t.fractional + delta, static_cast(1000000000)); + return {t.whole + div_result.quot, div_result.rem}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) { - return t + delta; + return t; } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { @@ -71,19 +73,20 @@ struct AddMicrosecondsImpl static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { + Int64 multiplier = std::pow(10, std::abs(6 - scale)); if (scale <= 6) { - Int64 multiplier = std::pow(10, 6 - scale); - return {t.whole + (t.fractional * multiplier + delta) / 1000000, (t.fractional * multiplier + delta) % 1000000}; + return {t.whole * multiplier + (t.fractional * multiplier + delta) / static_cast(10e6 / multiplier), + (t.fractional * multiplier + delta) % static_cast(10e6 / multiplier)}; } else { - Int64 multiplier = std::pow(10, scale - 6); - return {t.whole + (t.fractional + delta * multiplier) / 1000000, (t.fractional + delta * multiplier) % 1000000}; + return {t.whole + (t.fractional + delta * multiplier) / static_cast(10e6 / multiplier), + (t.fractional + delta * multiplier) % static_cast(10e6 / multiplier)}; } } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) { - return t + delta; + return t; } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { @@ -103,19 +106,20 @@ struct AddMillisecondsImpl static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { + Int64 multiplier = std::pow(10, std::abs(3 - scale)); if (scale <= 3) { - Int64 multiplier = std::pow(10, 3 - scale); - return {t.whole + (t.fractional * multiplier + delta) / 1000, (t.fractional * multiplier + delta) % 1000}; + return {t.whole * multiplier + (t.fractional * multiplier + delta) / static_cast(1000 / multiplier), + (t.fractional * multiplier + delta) % static_cast(1000 / multiplier)}; } else { - Int64 multiplier = std::pow(10, scale - 3); - return {t.whole + (t.fractional + delta * multiplier) / 1000, (t.fractional + delta * multiplier) % 1000}; + return {t.whole + (t.fractional + delta * multiplier) / static_cast(1000 / multiplier), + (t.fractional + delta * multiplier) % static_cast(1000 / multiplier)}; } } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) { - return t + delta; + return t; } static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { From 0c5e8a0beb166ff3d4e4d93f8d66b2a009510b90 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sat, 12 Feb 2022 19:37:06 +0300 Subject: [PATCH 021/132] upload --- contrib/replxx | 2 +- src/Functions/FunctionDateOrDateTimeAddInterval.h | 9 ++++----- src/Functions/SubtractSubSeconds.cpp | 11 ----------- src/Functions/addSubSeconds.cpp | 11 ----------- 4 files changed, 5 insertions(+), 28 deletions(-) diff --git a/contrib/replxx b/contrib/replxx index 9460e5e0fc1..c745b3fb012 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d +Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index b5829c2d1ca..815a85beba3 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -43,12 +43,11 @@ struct AddNanosecondsImpl static constexpr auto name = "addNanoseconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { -// auto dt = DataTypeDateTime64(scale); -// Int64 multiplier = std::pow(10, 9 - scale); - auto div_result = std::div(t.fractional + delta, static_cast(1000000000)); - return {t.whole + div_result.quot, div_result.rem}; + Int64 multiplier = std::pow(10, 9 - scale); + auto div_result = std::div(t.fractional * multiplier + delta, static_cast(10e9)); + return {t.whole / multiplier + div_result.quot, div_result.rem}; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) diff --git a/src/Functions/SubtractSubSeconds.cpp b/src/Functions/SubtractSubSeconds.cpp index 07afa38c963..5eeb24c8748 100644 --- a/src/Functions/SubtractSubSeconds.cpp +++ b/src/Functions/SubtractSubSeconds.cpp @@ -5,17 +5,6 @@ namespace DB { -//#define SUBSECOND(SUBSECOND_KIND) \ -// using FunctionAdd##SUBSECOND_KIND##seconds = FunctionDateOrDateTimeAddInterval;\ -// void registerFunctionAdd##SUBSECOND_KIND##seconds(FunctionFactory & factory) \ -// { \ -// factory.registerFunction(); \ -// }; -//SUBSECOND(Nano) -//SUBSECOND(Micro) -//SUBSECOND(Milli) -//#undef SUBSECOND - using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionSubtractNanoseconds(FunctionFactory & factory) { diff --git a/src/Functions/addSubSeconds.cpp b/src/Functions/addSubSeconds.cpp index e5393448040..f58f8b20b99 100644 --- a/src/Functions/addSubSeconds.cpp +++ b/src/Functions/addSubSeconds.cpp @@ -5,17 +5,6 @@ namespace DB { -//#define SUBSECOND(SUBSECOND_KIND) \ -// using FunctionAdd##SUBSECOND_KIND##seconds = FunctionDateOrDateTimeAddInterval;\ -// void registerFunctionAdd##SUBSECOND_KIND##seconds(FunctionFactory & factory) \ -// { \ -// factory.registerFunction(); \ -// }; -//SUBSECOND(Nano) -//SUBSECOND(Micro) -//SUBSECOND(Milli) -//#undef SUBSECOND - using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; void registerFunctionAddNanoseconds(FunctionFactory & factory) { From 48c1b8b62b156441c7d7e3f80838a0bffd34d5e7 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 13 Feb 2022 13:01:48 +0300 Subject: [PATCH 022/132] Updated IntervalParser --- contrib/replxx | 2 +- .../FunctionDateOrDateTimeAddInterval.h | 42 +++++++++---------- src/Parsers/parseIntervalKind.cpp | 21 ++++++++++ 3 files changed, 43 insertions(+), 22 deletions(-) diff --git a/contrib/replxx b/contrib/replxx index c745b3fb012..9460e5e0fc1 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df +Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 815a85beba3..cb8b15919bb 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -37,29 +37,27 @@ namespace ErrorCodes /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' -//TODO: fix wrong results when source DateTime64 precision is less then result struct AddNanosecondsImpl { static constexpr auto name = "addNanoseconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - Int64 multiplier = std::pow(10, 9 - scale); - auto div_result = std::div(t.fractional * multiplier + delta, static_cast(10e9)); - return {t.whole / multiplier + div_result.quot, div_result.rem}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); + return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { return t; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = DataTypeDateTime64::default_scale) { - // use default datetime64 scale - return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + return time_zone.fromDayNum(ExtendedDayNum(d)) * DecimalUtils::scaleMultiplier(scale) + delta; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = DataTypeDateTime64::default_scale) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -72,14 +70,14 @@ struct AddMicrosecondsImpl static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - Int64 multiplier = std::pow(10, std::abs(6 - scale)); + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); if (scale <= 6) { - return {t.whole * multiplier + (t.fractional * multiplier + delta) / static_cast(10e6 / multiplier), - (t.fractional * multiplier + delta) % static_cast(10e6 / multiplier)}; + auto division = std::div( (t.fractional + delta), static_cast(10e6)); + return {t.whole * multiplier + division.quot, division.rem}; } else { - return {t.whole + (t.fractional + delta * multiplier) / static_cast(10e6 / multiplier), - (t.fractional + delta * multiplier) % static_cast(10e6 / multiplier)}; + auto division = std::div( (t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); + return {t.whole + division.quot, division.rem}; } } @@ -105,14 +103,14 @@ struct AddMillisecondsImpl static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - Int64 multiplier = std::pow(10, std::abs(3 - scale)); + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); if (scale <= 3) { - return {t.whole * multiplier + (t.fractional * multiplier + delta) / static_cast(1000 / multiplier), - (t.fractional * multiplier + delta) % static_cast(1000 / multiplier)}; + auto division = std::div( (t.fractional + delta), static_cast(1000)); + return {t.whole * multiplier + division.quot, division.rem}; } else { - return {t.whole + (t.fractional + delta * multiplier) / static_cast(1000 / multiplier), - (t.fractional + delta * multiplier) % static_cast(1000 / multiplier)}; + auto division = std::div( (t.fractional + delta * multiplier), static_cast(1000 * multiplier)); + return {t.whole + division.quot,division.rem}; } } @@ -428,7 +426,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = DataTypeDateTime64::default_scale) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -610,6 +608,8 @@ public: scale = 9; else if (std::is_same_v) scale = 6; + else if (std::is_same_v) + scale = 3; return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } diff --git a/src/Parsers/parseIntervalKind.cpp b/src/Parsers/parseIntervalKind.cpp index 7d36133e81c..e089148f255 100644 --- a/src/Parsers/parseIntervalKind.cpp +++ b/src/Parsers/parseIntervalKind.cpp @@ -7,6 +7,27 @@ namespace DB { bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & result) { + if (ParserKeyword("NANOSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected) + || ParserKeyword("NS").ignore(pos, expected)) + { + result = IntervalKind::Microsecond; + return true; + } + + if (ParserKeyword("MICROSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MICROSECOND").ignore(pos, expected) + || ParserKeyword("MCS").ignore(pos, expected)) + { + result = IntervalKind::Microsecond; + return true; + } + + if (ParserKeyword("MILLISECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MILLISECOND").ignore(pos, expected) + || ParserKeyword("MS").ignore(pos, expected)) + { + result = IntervalKind::Millisecond; + return true; + } + if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected) || ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected)) { From dcc0f53f8efd5c0a5c1de147a63293432d8e8b2f Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 13 Feb 2022 17:54:03 +0300 Subject: [PATCH 023/132] updated Interval definitions --- src/Functions/DateTimeTransforms.h | 115 ++++++++++++++++++ .../FunctionDateOrDateTimeToSomething.h | 14 +++ src/Functions/FunctionsTimeWindow.h | 44 +++++++ src/Functions/registerFunctionsDateTime.cpp | 6 + src/Functions/toStartOfSubsecond.cpp | 30 +++++ src/Parsers/parseIntervalKind.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 8 +- 7 files changed, 215 insertions(+), 4 deletions(-) create mode 100644 src/Functions/toStartOfSubsecond.cpp diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a7f06689820..b5ffb34df0f 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -311,6 +311,121 @@ struct ToStartOfSecondImpl using FactorTransform = ZeroTransform; }; +struct ToStartOfMillisecondImpl +{ + static constexpr auto name = "toStartOfMillisecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // given that scale is 6, scale_multiplier is 1000000 + // for DateTime64 value of 123.456789: + // 123456789 - 789 = 123456000 + // for DateTime64 value of -123.456789: + // -123456789 - (1000 + (-789)) = -123457000 + + if (scale_multiplier <= 1000){ + return datetime64 * (1000 / scale_multiplier); + } + else + { + auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier(datetime64, scale_multiplier / 1000); + + if (droppable_part_with_sign < 0) + droppable_part_with_sign += scale_multiplier; + + return datetime64 - droppable_part_with_sign; + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToStartOfMicrosecondImpl +{ + static constexpr auto name = "toStartOfMicrosecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // @see ToStartOfMillisecondImpl + + if (scale_multiplier <= 1000000){ + return datetime64 * (1000000 / scale_multiplier); + } + else + { + auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier(datetime64, scale_multiplier / 1000000); + + if (droppable_part_with_sign < 0) + droppable_part_with_sign += scale_multiplier; + + return datetime64 - droppable_part_with_sign; + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToStartOfNanosecondImpl +{ + static constexpr auto name = "toStartOfNanosecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // @see ToStartOfMillisecondImpl + if (scale_multiplier == 1000000000){ + return datetime64; + } + else if (scale_multiplier <= 1000000000){ + return datetime64 * (1000000000 / scale_multiplier); + } + else + { + throw Exception("Illegal type of argument for function " + std::string(name) + ", DateTime64 expected", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + struct ToStartOfFiveMinuteImpl { static constexpr auto name = "toStartOfFiveMinute"; diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 00678e65364..5269eecea37 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -88,6 +88,20 @@ public: Int64 scale = DataTypeDateTime64::default_scale; if (const auto * dt64 = checkAndGetDataType(arguments[0].type.get())) scale = dt64->getScale(); + auto source_scale = scale; + + if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(3)); + } + else if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(6)); + } + else if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(9)); + } return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); } diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 4394f73061f..3ea397e4c7d 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -82,6 +82,31 @@ struct ToStartOfTransform; TRANSFORM_TIME(Second) #undef TRANSFORM_TIME +#define TRANSFORM_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \ +template<> \ + struct ToStartOfTransform \ + { \ + static Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + { \ + if (scale <= DEF_SCALE) \ + { \ + auto val = t * DecimalUtils::scaleMultiplier(DEF_SCALE - scale); \ + if (delta == 1) \ + return val; \ + else \ + return val - (val % delta); \ + } \ + else \ + { \ + return t - (t % (delta * DecimalUtils::scaleMultiplier(scale - DEF_SCALE))) ; \ + } \ + } \ + }; + TRANSFORM_SUBSECONDS(Millisecond, 3) + TRANSFORM_SUBSECONDS(Microsecond, 6) + TRANSFORM_SUBSECONDS(Nanosecond, 9) +#undef TRANSFORM_SUBSECONDS + template struct AddTime; @@ -117,6 +142,25 @@ struct ToStartOfTransform; ADD_TIME(Second, 1) #undef ADD_TIME +#define ADD_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \ +template <> \ + struct AddTime \ + { \ + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + { \ + if (scale < DEF_SCALE) \ + { \ + return t + delta * DecimalUtils::scaleMultiplier(DEF_SCALE - scale); \ + } \ + else \ + return t + delta * DecimalUtils::scaleMultiplier(scale - DEF_SCALE); \ + } \ + }; + ADD_SUBSECONDS(Millisecond, 3) + ADD_SUBSECONDS(Microsecond, 6) + ADD_SUBSECONDS(Nanosecond, 9) +#undef ADD_SUBSECONDS + template struct TimeWindowImpl { diff --git a/src/Functions/registerFunctionsDateTime.cpp b/src/Functions/registerFunctionsDateTime.cpp index 09c58064bbb..dd7b67c47ac 100644 --- a/src/Functions/registerFunctionsDateTime.cpp +++ b/src/Functions/registerFunctionsDateTime.cpp @@ -11,6 +11,9 @@ void registerFunctionToDayOfWeek(FunctionFactory &); void registerFunctionToDayOfYear(FunctionFactory &); void registerFunctionToHour(FunctionFactory &); void registerFunctionToMinute(FunctionFactory &); +void registerFunctionToStartOfNanosecond(FunctionFactory &); +void registerFunctionToStartOfMicrosecond(FunctionFactory &); +void registerFunctionToStartOfMillisecond(FunctionFactory &); void registerFunctionToStartOfSecond(FunctionFactory &); void registerFunctionToSecond(FunctionFactory &); void registerFunctionToStartOfDay(FunctionFactory &); @@ -99,6 +102,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionToStartOfMonth(factory); registerFunctionToStartOfQuarter(factory); registerFunctionToStartOfYear(factory); + registerFunctionToStartOfNanosecond(factory); + registerFunctionToStartOfMicrosecond(factory); + registerFunctionToStartOfMillisecond(factory); registerFunctionToStartOfSecond(factory); registerFunctionToStartOfMinute(factory); registerFunctionToStartOfFiveMinute(factory); diff --git a/src/Functions/toStartOfSubsecond.cpp b/src/Functions/toStartOfSubsecond.cpp new file mode 100644 index 00000000000..78f1aaea041 --- /dev/null +++ b/src/Functions/toStartOfSubsecond.cpp @@ -0,0 +1,30 @@ +#include +#include +#include + + +namespace DB +{ + +using FunctionToStartOfMillisecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfMillisecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +using FunctionToStartOfMicrosecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfMicrosecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfNanosecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Parsers/parseIntervalKind.cpp b/src/Parsers/parseIntervalKind.cpp index e089148f255..0704aa107ca 100644 --- a/src/Parsers/parseIntervalKind.cpp +++ b/src/Parsers/parseIntervalKind.cpp @@ -10,7 +10,7 @@ bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & r if (ParserKeyword("NANOSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected) || ParserKeyword("NS").ignore(pos, expected)) { - result = IntervalKind::Microsecond; + result = IntervalKind::Nanosecond; return true; } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index f1c5aa067b7..39654d6beb0 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -57,6 +57,7 @@ namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; + extern const int SYNTAX_ERROR; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int INCORRECT_QUERY; @@ -778,11 +779,12 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) switch (window_interval_kind) { case IntervalKind::Nanosecond: - throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); case IntervalKind::Microsecond: - throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); case IntervalKind::Millisecond: - throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ From 196ffb4827733be407a36dbd9537d3bddfe58820 Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 13 Feb 2022 18:41:45 +0300 Subject: [PATCH 024/132] toStartOf[...]second works --- src/Functions/toStartOfSubsecond.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfSubsecond.cpp b/src/Functions/toStartOfSubsecond.cpp index 78f1aaea041..b2257c5e3cd 100644 --- a/src/Functions/toStartOfSubsecond.cpp +++ b/src/Functions/toStartOfSubsecond.cpp @@ -24,7 +24,7 @@ using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething(); + factory.registerFunction(); } } From 2454f1dd13c979f8aa30267995e26992ace0988d Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 14 Feb 2022 00:04:09 +0300 Subject: [PATCH 025/132] Decimal components attempt --- src/Functions/DateTimeTransforms.h | 20 +- .../FunctionDateOrDateTimeAddInterval.h | 320 +++++++++++------- .../Transforms/FillingTransform.cpp | 36 +- 3 files changed, 248 insertions(+), 128 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index b5ffb34df0f..4d1df65d0ea 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -323,7 +323,12 @@ struct ToStartOfMillisecondImpl // for DateTime64 value of -123.456789: // -123456789 - (1000 + (-789)) = -123457000 - if (scale_multiplier <= 1000){ + if (scale_multiplier == 1000) + { + return datetime64; + } + else if (scale_multiplier <= 1000) + { return datetime64 * (1000 / scale_multiplier); } else @@ -361,7 +366,12 @@ struct ToStartOfMicrosecondImpl { // @see ToStartOfMillisecondImpl - if (scale_multiplier <= 1000000){ + if (scale_multiplier == 1000000) + { + return datetime64; + } + else if (scale_multiplier <= 1000000) + { return datetime64 * (1000000 / scale_multiplier); } else @@ -398,10 +408,12 @@ struct ToStartOfNanosecondImpl static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) { // @see ToStartOfMillisecondImpl - if (scale_multiplier == 1000000000){ + if (scale_multiplier == 1000000000) + { return datetime64; } - else if (scale_multiplier <= 1000000000){ + else if (scale_multiplier <= 1000000000) + { return datetime64 * (1000000000 / scale_multiplier); } else diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index cb8b15919bb..0b8750f9d72 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -41,23 +41,32 @@ struct AddNanosecondsImpl { static constexpr auto name = "addNanoseconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); - return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; + return t * multiplier + delta; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) +// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) +// { +// Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); +// auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); +// return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; +// } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { return t; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = DataTypeDateTime64::default_scale) + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { - return time_zone.fromDayNum(ExtendedDayNum(d)) * DecimalUtils::scaleMultiplier(scale) + delta; + return t; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = DataTypeDateTime64::default_scale) + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = DataTypeDateTime64::default_scale) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -67,29 +76,37 @@ struct AddMicrosecondsImpl { static constexpr auto name = "addMicroseconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); - if (scale <= 6) - { - auto division = std::div( (t.fractional + delta), static_cast(10e6)); - return {t.whole * multiplier + division.quot, division.rem}; - } else { - auto division = std::div( (t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); - return {t.whole + division.quot, division.rem}; - } + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) +// { +// Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); +// if (scale <= 6) +// { +// auto division = std::div( (t.fractional + delta), static_cast(10e6)); +// return {t.whole * multiplier + division.quot, division.rem}; +// } else { +// auto division = std::div( (t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); +// return {t.whole + division.quot, division.rem}; +// } +// } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) { return t; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { - // use default datetime64 scale - return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + return t; } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; @@ -100,29 +117,37 @@ struct AddMillisecondsImpl { static constexpr auto name = "addMilliseconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); - if (scale <= 3) - { - auto division = std::div( (t.fractional + delta), static_cast(1000)); - return {t.whole * multiplier + division.quot, division.rem}; - } else { - auto division = std::div( (t.fractional + delta * multiplier), static_cast(1000 * multiplier)); - return {t.whole + division.quot,division.rem}; - } + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) +// { +// Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); +// if (scale <= 3) +// { +// auto division = std::div( (t.fractional + delta), static_cast(1000)); +// return {t.whole * multiplier + division.quot, division.rem}; +// } else { +// auto division = std::div( (t.fractional + delta * multiplier), static_cast(1000 * multiplier)); +// return {t.whole + division.quot,division.rem}; +// } +// } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) { return t; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { - // use default datetime64 scale - return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + return t; } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; @@ -133,21 +158,34 @@ struct AddSecondsImpl { static constexpr auto name = "addSeconds"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {t.whole + delta, t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) +// { +// return {t.whole + delta, t.fractional}; +// } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// { +// // use default datetime64 scale +// return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; +// } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { - // use default datetime64 scale - return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; + return t; } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; @@ -158,21 +196,34 @@ struct AddMinutesImpl { static constexpr auto name = "addMinutes"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {t.whole + delta * 60, t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) +// { +// return {t.whole + delta * 60, t.fractional}; +// } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 60; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { - // use default datetime64 scale - return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; + return t; } + +// static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// { +// // use default datetime64 scale +// return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; +// } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 60; @@ -183,20 +234,33 @@ struct AddHoursImpl { static constexpr auto name = "addHours"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {t.whole + delta * 3600, t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } + +// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) +// { +// return {t.whole + delta * 3600, t.fractional}; +// } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 3600; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) { - // use default datetime64 scale - return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; + return t; } + +// static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// { +// // use default datetime64 scale +// return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; +// } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 3600; @@ -207,12 +271,19 @@ struct AddDaysImpl { static constexpr auto name = "addDays"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {time_zone.addDays(t.whole, delta), t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// { +// return {time_zone.addDays(t.whole, delta), t.fractional}; +// } + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addDays(t, delta); @@ -233,23 +304,24 @@ struct AddWeeksImpl { static constexpr auto name = "addWeeks"; - static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {time_zone.addWeeks(t.whole, delta), t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addWeeks(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) + static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) + static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } @@ -259,12 +331,19 @@ struct AddMonthsImpl { static constexpr auto name = "addMonths"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {time_zone.addMonths(t.whole, delta), t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// { +// return {time_zone.addMonths(t.whole, delta), t.fractional}; +// } + static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(t, delta); @@ -285,12 +364,19 @@ struct AddQuartersImpl { static constexpr auto name = "addQuarters"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {time_zone.addQuarters(t.whole, delta), t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// { +// return {time_zone.addQuarters(t.whole, delta), t.fractional}; +// } + static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(t, delta); @@ -311,12 +397,19 @@ struct AddYearsImpl { static constexpr auto name = "addYears"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - return {time_zone.addYears(t.whole, delta), t.fractional}; + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; } +// static inline DecimalUtils::DecimalComponents +// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) +// { +// return {time_zone.addYears(t.whole, delta), t.fractional}; +// } + static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(t, delta); @@ -537,7 +630,37 @@ public: case TypeIndex::DateTime: return resolveReturnType(arguments); case TypeIndex::DateTime64: - return resolveReturnType(arguments); + if (typeid_cast(arguments[0].type.get())) + { + const auto & datetime64_type = assert_cast(*arguments[0].type); + + auto from_scale = datetime64_type.getScale(); + auto scale = from_scale; + + if (std::is_same_v || std::is_same_v) + scale = 9; + else if (std::is_same_v || std::is_same_v) + scale = 6; + else if (std::is_same_v || std::is_same_v) + scale = 3; + + scale = std::max(scale, from_scale); + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + else + { + auto scale = DataTypeDateTime64::default_scale; + + if (std::is_same_v || std::is_same_v) + scale = 9; + else if (std::is_same_v || std::is_same_v) + scale = 6; + else if (std::is_same_v || std::is_same_v) + scale = 3; + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } default: { throw Exception("Invalid type of 1st argument of function " + getName() + ": " @@ -549,17 +672,17 @@ public: // TransformDateTime64 helps choosing correct overload of exec and does some transformations // on input and output parameters to simplify support of DateTime64 in concrete Transform. - template - using TransformType = std::conditional_t< - std::is_same_v, - TransformDateTime64, - Transform>; +// template +// using TransformType = std::conditional_t< +// std::is_same_v, +// TransformDateTime64, +// Transform>; /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types, /// e.g. addSeconds(Date, 1) => DateTime template - using TransformExecuteReturnType = decltype(std::declval>().execute(FieldType(), 0, std::declval(), 0)); + using TransformExecuteReturnType = decltype(std::declval().execute(FieldType(), 0, std::declval(), 0)); // Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl). // e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return, @@ -580,40 +703,6 @@ public: { return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } - else if constexpr (std::is_same_v) - { - if (typeid_cast(arguments[0].type.get())) - { - const auto & datetime64_type = assert_cast(*arguments[0].type); - - auto from_scale = datetime64_type.getScale(); - auto scale = from_scale; - - if (std::is_same_v) - scale = 9; - else if (std::is_same_v) - scale = 6; - else if (std::is_same_v) - scale = 3; - - scale = std::max(scale, from_scale); - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - } - else - { - auto scale = DataTypeDateTime64::default_scale; - - if (std::is_same_v) - scale = 9; - else if (std::is_same_v) - scale = 6; - else if (std::is_same_v) - scale = 3; - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - } - } else { static_assert("Failed to resolve return type."); @@ -649,9 +738,8 @@ public: else if (const auto * datetime64_type = assert_cast(from_type)) { auto scale = datetime64_type->getScale(); - using WrappedTransformType = TransformType; - return DateTimeAddIntervalImpl, WrappedTransformType>::execute( - WrappedTransformType{datetime64_type->getScale()}, arguments, result_type, scale); + return DateTimeAddIntervalImpl, Transform>::execute( + Transform{}, arguments, result_type, scale); } else throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 1276157cc91..08e866620c4 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -38,12 +38,34 @@ static FillColumnDescription::StepFunction getStepFunction( { switch (kind) { - #define DECLARE_CASE(NAME) \ +#define TIME(NAME, SCALE) \ + case IntervalKind::NAME: \ + return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut, SCALE); }; + + TIME(Nanosecond, 9) + TIME(Microsecond, 6) + TIME(Millisecond, 3) +#undef TIME + + #define TIME(NAME) \ case IntervalKind::NAME: \ return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut); }; - FOR_EACH_INTERVAL_KIND(DECLARE_CASE) - #undef DECLARE_CASE + TIME(Second) + TIME(Minute) + TIME(Hour) + #undef TIME + +#define DAYS(NAME) \ + case IntervalKind::NAME: \ + return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut); }; + + DAYS(Day) + DAYS(Week) + DAYS(Month) + DAYS(Quarter) + DAYS(Year) +#undef DAYS } __builtin_unreachable(); } @@ -92,7 +114,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & Int64 avg_seconds = get(descr.fill_step) * descr.step_kind->toAvgSeconds(); if (avg_seconds < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); + "Value of step is too low ({} seconds). Must be >= 1 day", avg_seconds); } if (which.isDate()) @@ -113,10 +135,8 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ auto field_decimal = get>(field); \ - auto components = DecimalUtils::splitWithScaleMultiplier(field_decimal.getValue(), field_decimal.getScaleMultiplier()); \ - auto res = Add##NAME##sImpl::execute(components, step, time_zone); \ - auto res_decimal = decimalFromComponentsWithMultiplier(res, field_decimal.getScaleMultiplier()); \ - field = DecimalField(res_decimal, field_decimal.getScale()); \ + auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScale()); \ + field = DecimalField(res, field_decimal.getScale()); \ }; \ break; From 888542e29b254bb671055e48585f783342725fe1 Mon Sep 17 00:00:00 2001 From: zvonand Date: Mon, 14 Feb 2022 02:52:56 +0300 Subject: [PATCH 026/132] add[interval] no longer oses decimal components Not only support for better subsecond logic, but also fewer conversions -> faster operation --- .../FunctionDateOrDateTimeAddInterval.h | 400 +++++++++--------- .../Transforms/FillingTransform.cpp | 62 +-- 2 files changed, 219 insertions(+), 243 deletions(-) diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 0b8750f9d72..a138e3d867a 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; + extern const int SYNTAX_ERROR; } /// Type of first argument of 'execute' function overload defines what INPUT DataType it is used for. @@ -41,34 +42,35 @@ struct AddNanosecondsImpl { static constexpr auto name = "addNanoseconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); + return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; + } + + static inline DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); return t * multiplier + delta; } -// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) -// { -// Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); -// auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); -// return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; -// } - - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - return t; + Int64 multiplier = DecimalUtils::scaleMultiplier(9); + return t * multiplier + delta; } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) { - return t; + throw Exception("addNanoSeconds() cannot be used with Date", ErrorCodes::SYNTAX_ERROR); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) { - return time_zone.fromDayNum(DayNum(d)) + delta; + throw Exception("addNanoSeconds() cannot be used with Date32", ErrorCodes::SYNTAX_ERROR); } }; @@ -76,40 +78,47 @@ struct AddMicrosecondsImpl { static constexpr auto name = "addMicroseconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + if (scale <= 6) + { + auto division = std::div( (t.fractional + delta), static_cast(10e6)); + return {t.whole * multiplier + division.quot, division.rem}; + } else { + auto division = std::div( (t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); + return {t.whole + division.quot, division.rem}; + } + } + + static inline DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + if (scale <= 6) + { + return t * multiplier + delta; + } else { + return t + delta * multiplier; + } + + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(6); return t * multiplier + delta; } -// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) -// { -// Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); -// if (scale <= 6) -// { -// auto division = std::div( (t.fractional + delta), static_cast(10e6)); -// return {t.whole * multiplier + division.quot, division.rem}; -// } else { -// auto division = std::div( (t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); -// return {t.whole + division.quot, division.rem}; -// } -// } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) { - return t; + throw Exception("addMicroSeconds() cannot be used with Date", ErrorCodes::SYNTAX_ERROR); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) { - return t; - } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) - { - return time_zone.fromDayNum(DayNum(d)) + delta; + throw Exception("addMicroSeconds() cannot be used with Date32", ErrorCodes::SYNTAX_ERROR); } }; @@ -117,40 +126,47 @@ struct AddMillisecondsImpl { static constexpr auto name = "addMilliseconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + if (scale <= 3) + { + auto division = std::div( (t.fractional + delta), static_cast(1000)); + return {t.whole * multiplier + division.quot, division.rem}; + } else { + auto division = std::div( (t.fractional + delta * multiplier), static_cast(1000 * multiplier)); + return {t.whole + division.quot,division.rem}; + } + } + + static inline DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + if (scale <= 3) + { + return t * multiplier + delta; + } else { + return t + delta * multiplier; + } + + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(3); return t * multiplier + delta; } -// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) -// { -// Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); -// if (scale <= 3) -// { -// auto division = std::div( (t.fractional + delta), static_cast(1000)); -// return {t.whole * multiplier + division.quot, division.rem}; -// } else { -// auto division = std::div( (t.fractional + delta * multiplier), static_cast(1000 * multiplier)); -// return {t.whole + division.quot,division.rem}; -// } -// } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) { - return t; + throw Exception("addMilliSeconds() cannot be used with Date", ErrorCodes::SYNTAX_ERROR); } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) { - return t; - } - - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) - { - return time_zone.fromDayNum(DayNum(d)) + delta; + throw Exception("addMilliSeconds() cannot be used with Date32", ErrorCodes::SYNTAX_ERROR); } }; @@ -158,32 +174,27 @@ struct AddSecondsImpl { static constexpr auto name = "addSeconds"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {t.whole + delta, t.fractional}; } -// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) -// { -// return {t.whole + delta, t.fractional}; -// } + static inline DateTime64 + execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + { + return t; + } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } -// static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) -// { -// // use default datetime64 scale -// return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; -// } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return t; + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -196,34 +207,29 @@ struct AddMinutesImpl { static constexpr auto name = "addMinutes"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {t.whole + delta * 60, t.fractional}; } -// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) -// { -// return {t.whole + delta * 60, t.fractional}; -// } + static inline DateTime64 + execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + { + return t; + } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 60; } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return t; + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; } -// static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) -// { -// // use default datetime64 scale -// return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; -// } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 60; @@ -234,33 +240,29 @@ struct AddHoursImpl { static constexpr auto name = "addHours"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {t.whole + delta * 3600, t.fractional}; + } + + static inline DateTime64 + execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + { + return t; } -// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) -// { -// return {t.whole + delta * 3600, t.fractional}; -// } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 3600; } - static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32 t, Int64, const DateLUTImpl &, UInt16 = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - return t; + // use default datetime64 scale + return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; } -// static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) -// { -// // use default datetime64 scale -// return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; -// } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 3600; @@ -271,18 +273,17 @@ struct AddDaysImpl { static constexpr auto name = "addDays"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {time_zone.addDays(t.whole, delta), t.fractional}; } -// static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) -// { -// return {time_zone.addDays(t.whole, delta), t.fractional}; -// } + static inline DateTime64 + execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + { + return t; + } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { @@ -304,24 +305,29 @@ struct AddWeeksImpl { static constexpr auto name = "addWeeks"; - static inline DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {time_zone.addWeeks(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline DateTime64 + execute(DateTime64 t, Int32, const DateLUTImpl &, UInt16 = 0) + { + return t; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addWeeks(t, delta); } - static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } - static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } @@ -331,18 +337,17 @@ struct AddMonthsImpl { static constexpr auto name = "addMonths"; - static inline DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {time_zone.addMonths(t.whole, delta), t.fractional}; } -// static inline DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) -// { -// return {time_zone.addMonths(t.whole, delta), t.fractional}; -// } + static inline DateTime64 + execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + { + return t; + } static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { @@ -364,18 +369,17 @@ struct AddQuartersImpl { static constexpr auto name = "addQuarters"; - static inline DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {time_zone.addQuarters(t.whole, delta), t.fractional}; } -// static inline DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) -// { -// return {time_zone.addQuarters(t.whole, delta), t.fractional}; -// } + static inline DateTime64 + execute(DateTime64 t, Int32, const DateLUTImpl &, UInt16 = 0) + { + return t; + } static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { @@ -397,18 +401,17 @@ struct AddYearsImpl { static constexpr auto name = "addYears"; - static inline NO_SANITIZE_UNDEFINED DateTime64 - execute(const DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + static inline DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + return {time_zone.addYears(t.whole, delta), t.fractional}; } -// static inline DecimalUtils::DecimalComponents -// execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) -// { -// return {time_zone.addYears(t.whole, delta), t.fractional}; -// } + static inline DateTime64 + execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + { + return t; + } static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { @@ -630,37 +633,7 @@ public: case TypeIndex::DateTime: return resolveReturnType(arguments); case TypeIndex::DateTime64: - if (typeid_cast(arguments[0].type.get())) - { - const auto & datetime64_type = assert_cast(*arguments[0].type); - - auto from_scale = datetime64_type.getScale(); - auto scale = from_scale; - - if (std::is_same_v || std::is_same_v) - scale = 9; - else if (std::is_same_v || std::is_same_v) - scale = 6; - else if (std::is_same_v || std::is_same_v) - scale = 3; - - scale = std::max(scale, from_scale); - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - } - else - { - auto scale = DataTypeDateTime64::default_scale; - - if (std::is_same_v || std::is_same_v) - scale = 9; - else if (std::is_same_v || std::is_same_v) - scale = 6; - else if (std::is_same_v || std::is_same_v) - scale = 3; - - return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); - } + return resolveReturnType(arguments); default: { throw Exception("Invalid type of 1st argument of function " + getName() + ": " @@ -670,17 +643,8 @@ public: } } - // TransformDateTime64 helps choosing correct overload of exec and does some transformations - // on input and output parameters to simplify support of DateTime64 in concrete Transform. -// template -// using TransformType = std::conditional_t< -// std::is_same_v, -// TransformDateTime64, -// Transform>; - /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types, /// e.g. addSeconds(Date, 1) => DateTime - template using TransformExecuteReturnType = decltype(std::declval().execute(FieldType(), 0, std::declval(), 0)); @@ -703,6 +667,40 @@ public: { return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } + else if constexpr (std::is_same_v) + { + if (typeid_cast(arguments[0].type.get())) + { + const auto & datetime64_type = assert_cast(*arguments[0].type); + + auto from_scale = datetime64_type.getScale(); + auto scale = from_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + + scale = std::max(scale, from_scale); + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + else + { + auto scale = DataTypeDateTime64::default_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + } else { static_assert("Failed to resolve return type."); @@ -737,9 +735,9 @@ public: } else if (const auto * datetime64_type = assert_cast(from_type)) { - auto scale = datetime64_type->getScale(); + auto from_scale = datetime64_type->getScale(); return DateTimeAddIntervalImpl, Transform>::execute( - Transform{}, arguments, result_type, scale); + Transform{}, arguments, result_type, from_scale); } else throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 08e866620c4..e77bff09f28 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -34,38 +34,16 @@ Block FillingTransform::transformHeader(Block header, const SortDescription & so template static FillColumnDescription::StepFunction getStepFunction( - IntervalKind kind, Int64 step, const DateLUTImpl & date_lut) + IntervalKind kind, Int64 step, const DateLUTImpl & date_lut, UInt16 scale = DataTypeDateTime64::default_scale) { switch (kind) { -#define TIME(NAME, SCALE) \ +#define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ - return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut, SCALE); }; + return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get>(field).getValue(), step, date_lut, scale); }; - TIME(Nanosecond, 9) - TIME(Microsecond, 6) - TIME(Millisecond, 3) -#undef TIME - - #define TIME(NAME) \ - case IntervalKind::NAME: \ - return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut); }; - - TIME(Second) - TIME(Minute) - TIME(Hour) - #undef TIME - -#define DAYS(NAME) \ - case IntervalKind::NAME: \ - return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut); }; - - DAYS(Day) - DAYS(Week) - DAYS(Month) - DAYS(Quarter) - DAYS(Year) -#undef DAYS + FOR_EACH_INTERVAL_KIND(DECLARE_CASE) +#undef DECLARE_CASE } __builtin_unreachable(); } @@ -114,7 +92,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & Int64 avg_seconds = get(descr.fill_step) * descr.step_kind->toAvgSeconds(); if (avg_seconds < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "Value of step is too low ({} seconds). Must be >= 1 day", avg_seconds); + "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); } if (which.isDate()) @@ -130,23 +108,23 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & switch (*descr.step_kind) { - #define DECLARE_CASE(NAME) \ +#define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ auto field_decimal = get>(field); \ - auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScale()); \ + auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScaleMultiplier()); \ field = DecimalField(res, field_decimal.getScale()); \ }; \ break; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) - #undef DECLARE_CASE +#undef DECLARE_CASE } } else throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName()); + "STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName()); } else { @@ -160,12 +138,12 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & } FillingTransform::FillingTransform( - const Block & header_, const SortDescription & sort_description_, bool on_totals_) - : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) - , sort_description(sort_description_) - , on_totals(on_totals_) - , filling_row(sort_description_) - , next_row(sort_description_) + const Block & header_, const SortDescription & sort_description_, bool on_totals_) + : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) + , sort_description(sort_description_) + , on_totals(on_totals_) + , filling_row(sort_description_) + , next_row(sort_description_) { if (on_totals) return; @@ -182,14 +160,14 @@ FillingTransform::FillingTransform( if (!tryConvertFields(descr, type)) throw Exception("Incompatible types of WITH FILL expression values with column type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); if (type->isValueRepresentedByUnsignedInteger() && ((!descr.fill_from.isNull() && less(descr.fill_from, Field{0}, 1)) || - (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) + (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) { throw Exception("WITH FILL bound values cannot be negative for unsigned type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); } } @@ -234,7 +212,7 @@ void FillingTransform::transform(Chunk & chunk) MutableColumns res_other_columns; auto init_columns_by_positions = [](const Columns & old_columns, Columns & new_columns, - MutableColumns & new_mutable_columns, const Positions & positions) + MutableColumns & new_mutable_columns, const Positions & positions) { for (size_t pos : positions) { From f60a9444836c1e2f2cd09aebbab9cbe33f1048d2 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 16 Feb 2022 02:43:08 +0300 Subject: [PATCH 027/132] fixed toStartOfInterval --- src/Functions/DateTimeTransforms.h | 5 + .../FunctionDateOrDateTimeAddInterval.h | 60 ++-- src/Functions/toStartOfInterval.cpp | 279 ++++++++++-------- 3 files changed, 194 insertions(+), 150 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 4d1df65d0ea..bc1ae807e7d 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -41,6 +41,11 @@ namespace ErrorCodes throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + static inline UInt32 dateTimeIsNotSupported(const char * name) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + /// This factor transformation will say that the function is monotone everywhere. struct ZeroTransform { diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index a138e3d867a..e36201abcb3 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -86,7 +86,9 @@ struct AddMicrosecondsImpl { auto division = std::div( (t.fractional + delta), static_cast(10e6)); return {t.whole * multiplier + division.quot, division.rem}; - } else { + } + else + { auto division = std::div( (t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); return {t.whole + division.quot, division.rem}; } @@ -99,7 +101,9 @@ struct AddMicrosecondsImpl if (scale <= 6) { return t * multiplier + delta; - } else { + } + else + { return t + delta * multiplier; } @@ -134,7 +138,9 @@ struct AddMillisecondsImpl { auto division = std::div( (t.fractional + delta), static_cast(1000)); return {t.whole * multiplier + division.quot, division.rem}; - } else { + } + else + { auto division = std::div( (t.fractional + delta * multiplier), static_cast(1000 * multiplier)); return {t.whole + division.quot,division.rem}; } @@ -147,7 +153,9 @@ struct AddMillisecondsImpl if (scale <= 3) { return t * multiplier + delta; - } else { + } + else + { return t + delta * multiplier; } @@ -181,9 +189,9 @@ struct AddSecondsImpl } static inline DateTime64 - execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return t; + return t + delta * DecimalUtils::scaleMultiplier(scale); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -214,9 +222,9 @@ struct AddMinutesImpl } static inline DateTime64 - execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return t; + return t + 60 * delta * DecimalUtils::scaleMultiplier(scale); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -247,9 +255,9 @@ struct AddHoursImpl } static inline DateTime64 - execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return t; + return t + 3600 * delta * DecimalUtils::scaleMultiplier(scale); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -280,9 +288,11 @@ struct AddDaysImpl } static inline DateTime64 - execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { - return t; + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addDays(d.quot, delta) * multiplier + d.rem; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -312,9 +322,11 @@ struct AddWeeksImpl } static inline DateTime64 - execute(DateTime64 t, Int32, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { - return t; + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -344,9 +356,11 @@ struct AddMonthsImpl } static inline DateTime64 - execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { - return t; + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addMonths(d.quot, delta) * multiplier + d.rem; } static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -376,9 +390,11 @@ struct AddQuartersImpl } static inline DateTime64 - execute(DateTime64 t, Int32, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { - return t; + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem; } static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -408,9 +424,11 @@ struct AddYearsImpl } static inline DateTime64 - execute(DateTime64 t, Int64, const DateLUTImpl &, UInt16 = 0) + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { - return t; + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addYears(d.quot, delta) * multiplier + d.rem; } static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -522,7 +540,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = DataTypeDateTime64::default_scale) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index a48b2cdf075..8bd0cbab3e8 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -33,245 +33,248 @@ namespace template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(DayNum(d), years); } - static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } - static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); } - static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } - static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(DayNum(d), months); } - static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } - static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(DayNum(d), weeks); } - static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } - static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); } - static UInt32 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone) + static Int64 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); + return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } - static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } + static UInt32 execute(Int32, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone, Int64) { + return time_zone.toStartOfHourInterval(t, hours); + } + + static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) { + return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); + } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMinuteInterval(t, minutes); } - static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfMinuteInterval(t, minutes); + return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfSecondInterval(t, seconds); + return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) { - return time_zone.toStartOfSecondInterval(t, seconds); - } - - static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) - { - return time_zone.toStartOfSecondInterval(t, seconds); + if (scale_multiplier < 1000) + { + Int64 t_milliseconds = t * (static_cast(1000) / scale_multiplier); + return t_milliseconds - t_milliseconds % milliseconds; + } + else if (scale_multiplier > 1000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000); + return (t - t % (milliseconds * (scale_diff))) / scale_diff; + } + else + return t - t % milliseconds; } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) { - return time_zone.toStartOfSecondInterval(t, seconds); - } - - static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) - { - return time_zone.toStartOfSecondInterval(t, seconds); + if (scale_multiplier < 1000000) + { + Int64 t_microseconds = t * (static_cast(1000000) / scale_multiplier); + return t_microseconds - t_microseconds % microseconds; + } + else if (scale_multiplier > 1000000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000000); + return (t - t % (microseconds * (scale_diff))) / scale_diff; + } + else + return t - t % microseconds; } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) { - return time_zone.toStartOfSecondInterval(t, seconds); - } - - static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) - { - return time_zone.toStartOfSecondInterval(t, seconds); + if (scale_multiplier < 1000000000) + { + Int64 t_nanoseconds = t * (static_cast(1000000000) / scale_multiplier); + return t_nanoseconds - t_nanoseconds % nanoseconds; + } + else + return t - t % nanoseconds; } }; - class FunctionToStartOfInterval : public IFunction { public: @@ -300,6 +303,7 @@ public: const DataTypeInterval * interval_type = nullptr; bool result_type_is_date = false; + bool result_type_is_datetime = false; auto check_interval_argument = [&] { interval_type = checkAndGetDataType(arguments[1].type.get()); @@ -311,6 +315,8 @@ public: result_type_is_date = (interval_type->getKind() == IntervalKind::Year) || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) || (interval_type->getKind() == IntervalKind::Week); + result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour) + || (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second); }; auto check_timezone_argument = [&] @@ -348,19 +354,33 @@ public: if (result_type_is_date) return std::make_shared(); - else + else if (result_type_is_datetime) return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + else + { + auto scale = 0; + + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, time_zone); + auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); return result_column; } @@ -376,33 +396,36 @@ public: private: ColumnPtr dispatchForColumns( - const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const + const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { const auto & from_datatype = *time_column.type.get(); const auto which_type = WhichDataType(from_datatype); + + if (which_type.isDateTime64()) + { + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + auto scale = assert_cast(from_datatype).getScale(); + + if (time_column_vec) + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale); + } if (which_type.isDateTime()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate32()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); - } - if (which_type.isDateTime64()) - { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } throw Exception( "Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time", @@ -411,7 +434,8 @@ private: template ColumnPtr dispatchForIntervalColumn( - const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const + const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, + const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) @@ -429,54 +453,51 @@ private: switch (interval_type->getKind()) { case IntervalKind::Nanosecond: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Microsecond: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Millisecond: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Second: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Minute: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Hour: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Day: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Week: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Month: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Quarter: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Year: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); } __builtin_unreachable(); } - - template - ColumnPtr execute(const FromDataType & from_datatype, const ColumnType & time_column, UInt64 num_units, const DateLUTImpl & time_zone) const + template + ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, UInt64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { - const auto & time_data = time_column.getData(); - size_t size = time_column.size(); - auto result = ColumnVector::create(); - auto & result_data = result->getData(); + using ToColumnType = typename ToDataType::ColumnType; + + const auto & time_data = time_column_type.getData(); + size_t size = time_data.size(); + + auto result_col = result_type->createColumn(); + auto col_to = assert_cast(result_col.get()); + auto & result_data = col_to->getData(); result_data.resize(size); - if constexpr (std::is_same_v || std::is_same_v) - { - const auto transform = TransformDateTime64>{from_datatype.getScale()}; - for (size_t i = 0; i != size; ++i) - result_data[i] = transform.execute(time_data[i], num_units, time_zone); - } - else - { - for (size_t i = 0; i != size; ++i) - result_data[i] = Transform::execute(time_data[i], num_units, time_zone); - } - return result; + Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + + for (size_t i = 0; i != size; ++i) + result_data[i] = Transform::execute(time_data[i], num_units, time_zone, scale_multiplier); + + return result_col; } }; From cf244689a21c5454911b0010e0b7fc6556e835c0 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 16 Feb 2022 15:14:25 +0300 Subject: [PATCH 028/132] fixed filling transform --- contrib/replxx | 2 +- src/Functions/FunctionDateOrDateTimeAddInterval.h | 8 ++++---- src/Functions/toStartOfInterval.cpp | 6 ++++-- src/Processors/Transforms/FillingTransform.cpp | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/contrib/replxx b/contrib/replxx index 9460e5e0fc1..c745b3fb012 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d +Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index e36201abcb3..5274214baf4 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -84,12 +84,12 @@ struct AddMicrosecondsImpl Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); if (scale <= 6) { - auto division = std::div( (t.fractional + delta), static_cast(10e6)); + auto division = std::div((t.fractional + delta), static_cast(10e6)); return {t.whole * multiplier + division.quot, division.rem}; } else { - auto division = std::div( (t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); + auto division = std::div((t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); return {t.whole + division.quot, division.rem}; } } @@ -136,12 +136,12 @@ struct AddMillisecondsImpl Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); if (scale <= 3) { - auto division = std::div( (t.fractional + delta), static_cast(1000)); + auto division = std::div((t.fractional + delta), static_cast(1000)); return {t.whole * multiplier + division.quot, division.rem}; } else { - auto division = std::div( (t.fractional + delta * multiplier), static_cast(1000 * multiplier)); + auto division = std::div((t.fractional + delta * multiplier), static_cast(1000 * multiplier)); return {t.whole + division.quot,division.rem}; } } diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 8bd0cbab3e8..7695a013a24 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -157,11 +157,13 @@ namespace static UInt32 execute(Int32, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone, Int64) { + static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone, Int64) + { return time_zone.toStartOfHourInterval(t, hours); } - static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) { + static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); } }; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index e77bff09f28..ae97a769b23 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -40,7 +40,7 @@ static FillColumnDescription::StepFunction getStepFunction( { #define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ - return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get>(field).getValue(), step, date_lut, scale); }; + return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) #undef DECLARE_CASE @@ -113,7 +113,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ auto field_decimal = get>(field); \ - auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScaleMultiplier()); \ + auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScale()); \ field = DecimalField(res, field_decimal.getScale()); \ }; \ break; From fca314aa11d179877e8929a84115ce0b2537b9a3 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 16 Feb 2022 19:23:10 +0300 Subject: [PATCH 029/132] fixed clang-tidy --- src/Functions/toStartOfInterval.cpp | 2 +- tests/queries/0_stateless/02207_subseconds_intervals.reference | 0 tests/queries/0_stateless/02207_subseconds_intervals.sql | 0 3 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02207_subseconds_intervals.reference create mode 100644 tests/queries/0_stateless/02207_subseconds_intervals.sql diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 7695a013a24..c3700b84d6d 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -490,7 +490,7 @@ private: size_t size = time_data.size(); auto result_col = result_type->createColumn(); - auto col_to = assert_cast(result_col.get()); + auto *col_to = assert_cast(result_col.get()); auto & result_data = col_to->getData(); result_data.resize(size); diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.sql b/tests/queries/0_stateless/02207_subseconds_intervals.sql new file mode 100644 index 00000000000..e69de29bb2d From b50235f55d15db486b480205f5cee4cd23af4d19 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 16 Feb 2022 21:05:04 +0300 Subject: [PATCH 030/132] added stateless tests --- .../02207_subseconds_intervals.reference | 56 +++++++++++++ .../02207_subseconds_intervals.sql | 83 +++++++++++++++++++ 2 files changed, 139 insertions(+) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index e69de29bb2d..d83cdbf71ab 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -0,0 +1,56 @@ +test intervals +- test nanoseconds +1980-12-12 12:12:12.123456788 +1980-12-12 12:12:12.123456697 +1930-12-12 12:12:12.123456788 +1930-12-12 12:12:12.123456697 +2220-12-12 12:12:12.123456784 +2220-12-12 12:12:12.123456700 +- test microseconds +1980-12-12 12:12:12.123452 +1980-12-12 12:12:12.123396 +1980-12-12 12:12:12.123452 +1930-12-12 12:12:12.123452 +1930-12-12 12:12:12.123396 +1930-12-12 12:12:12.123452 +2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.123400 +2220-12-12 12:12:12.123456 +- test milliseconds +1980-12-12 12:12:12.119 +1980-12-12 12:12:12.119 +1980-12-12 12:12:12.119 +1930-12-12 12:12:12.119 +1930-12-12 12:12:12.119 +1930-12-12 12:12:12.119 +2220-12-12 12:12:12.122 +2220-12-12 12:12:12.115 +2220-12-12 12:12:12.122 +test add[...]seconds() +- test nanoseconds +1980-12-12 12:12:12.123456790 +1980-12-12 12:12:12.123456701 +1930-12-12 12:12:12.123456788 +1930-12-12 12:12:12.123456699 +2220-12-12 12:12:12.123456790 +2220-12-12 12:12:12.123456701 +- test microseconds +1980-12-12 12:12:12.123456001 +1980-12-12 12:12:12.123400001 +1980-12-12 12:12:12.123456781 +1930-12-12 12:12:12.123455999 +1930-12-12 12:12:12.123399999 +1930-12-12 12:12:12.123456779 +2220-12-12 12:12:12.123456001 +2220-12-12 12:12:12.123400001 +2220-12-12 12:12:12.123456781 +- test milliseconds +1980-12-12 12:12:12.123000001 +1980-12-12 12:12:12.120000001 +1980-12-12 12:12:12.123456001 +1930-12-12 12:12:12.122999999 +1930-12-12 12:12:12.119999999 +1930-12-12 12:12:12.123455999 +2220-12-12 12:12:12.123000001 +2220-12-12 12:12:12.120000001 +2220-12-12 12:12:12.123456001 diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.sql b/tests/queries/0_stateless/02207_subseconds_intervals.sql index e69de29bb2d..acaf7b136b2 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.sql +++ b/tests/queries/0_stateless/02207_subseconds_intervals.sql @@ -0,0 +1,83 @@ +SELECT 'test intervals'; + +SELECT '- test nanoseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456789', 9), INTERVAL 7 NANOSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234567', 7), INTERVAL 7 NANOSECOND); -- In normal range, source scale less than result + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456789', 9), INTERVAL 7 NANOSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234567', 7), INTERVAL 7 NANOSECOND); -- Below normal range, source scale less than result + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456789', 9), INTERVAL 7 NANOSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234567', 7), INTERVAL 7 NANOSECOND); -- Above normal range, source scale less than result + + +SELECT '- test microseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 7 MICROSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234', 4), INTERVAL 7 MICROSECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12345678', 8), INTERVAL 7 MICROSECOND); -- In normal range, source scale greater than result + + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 7 MICROSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234', 4), INTERVAL 7 MICROSECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12345678', 8), INTERVAL 7 MICROSECOND); -- Below normal range, source scale greater than result + + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 7 MICROSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234', 4), INTERVAL 7 MICROSECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12345678', 8), INTERVAL 7 MICROSECOND); -- Above normal range, source scale greater than result + + +SELECT '- test milliseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123', 3), INTERVAL 7 MILLISECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12', 2), INTERVAL 7 MILLISECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 7 MILLISECOND); -- In normal range, source scale greater than result + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123', 3), INTERVAL 7 MILLISECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12', 2), INTERVAL 7 MILLISECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 7 MILLISECOND); -- Below normal range, source scale greater than result + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123', 3), INTERVAL 7 MILLISECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12', 2), INTERVAL 7 MILLISECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 7 MILLISECOND); -- Above normal range, source scale greater than result + + +SELECT 'test add[...]seconds()'; + + +SELECT '- test nanoseconds'; +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456789', 9), 1); -- In normal range, source scale matches result +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.1234567', 7), 1); -- In normal range, source scale less than result + +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456789', 9), 1); -- Below normal range, source scale matches result +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.1234567', 7), 1); -- Below normal range, source scale less than result + +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123456789', 9), 1); -- Above normal range, source scale matches result +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.1234567', 7), 1); -- Above normal range, source scale less than result + + +SELECT '- test microseconds'; +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale matches result +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.1234', 4), 1); -- In normal range, source scale less than result +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.12345678', 8), 1); -- In normal range, source scale greater than result + +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale matches result +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.1234', 4), 1); -- Below normal range, source scale less than result +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.12345678', 8), 1); -- Below normal range, source scale greater than result + +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale matches result +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.1234', 4), 1); -- Above normal range, source scale less than result +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.12345678', 8), 1); -- Above normal range, source scale greater than result + + +SELECT '- test milliseconds'; +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123', 3), 1); -- In normal range, source scale matches result +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.12', 2), 1); -- In normal range, source scale less than result +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale greater than result + +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123', 3), 1); -- Below normal range, source scale matches result +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.12', 2), 1); -- Below normal range, source scale less than result +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale greater than result + +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123', 3), 1); -- Above normal range, source scale matches result +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.12', 2), 1); -- Above normal range, source scale less than result +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale greater than result From a5a742e22b359887751606742a62695d74f07063 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 17 Feb 2022 01:39:46 +0300 Subject: [PATCH 031/132] updated negative fracs --- contrib/replxx | 2 +- src/Functions/toStartOfInterval.cpp | 16 +++---- .../02207_subseconds_intervals.reference | 40 ++++++++-------- .../02207_subseconds_intervals.sql | 48 +++++++++---------- 4 files changed, 53 insertions(+), 53 deletions(-) diff --git a/contrib/replxx b/contrib/replxx index c745b3fb012..9460e5e0fc1 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df +Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index c3700b84d6d..b6d2ba688ed 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -218,15 +218,15 @@ namespace if (scale_multiplier < 1000) { Int64 t_milliseconds = t * (static_cast(1000) / scale_multiplier); - return t_milliseconds - t_milliseconds % milliseconds; + return t_milliseconds - std::abs(t_milliseconds % milliseconds); } else if (scale_multiplier > 1000) { Int64 scale_diff = scale_multiplier / static_cast(1000); - return (t - t % (milliseconds * (scale_diff))) / scale_diff; + return (t - t % std::abs(milliseconds * scale_diff)) / scale_diff; } else - return t - t % milliseconds; + return t - std::abs(t % milliseconds); } }; @@ -244,15 +244,15 @@ namespace if (scale_multiplier < 1000000) { Int64 t_microseconds = t * (static_cast(1000000) / scale_multiplier); - return t_microseconds - t_microseconds % microseconds; + return t_microseconds - std::abs(t_microseconds % microseconds); } else if (scale_multiplier > 1000000) { Int64 scale_diff = scale_multiplier / static_cast(1000000); - return (t - t % (microseconds * (scale_diff))) / scale_diff; + return (t - t % std::abs(microseconds * scale_diff)) / scale_diff; } else - return t - t % microseconds; + return t - std::abs(t % microseconds); } }; @@ -270,10 +270,10 @@ namespace if (scale_multiplier < 1000000000) { Int64 t_nanoseconds = t * (static_cast(1000000000) / scale_multiplier); - return t_nanoseconds - t_nanoseconds % nanoseconds; + return t_nanoseconds - std::abs(t_nanoseconds % nanoseconds); } else - return t - t % nanoseconds; + return t - std::abs(t % nanoseconds); } }; diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index d83cdbf71ab..37ad2dca280 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -1,31 +1,31 @@ test intervals - test nanoseconds -1980-12-12 12:12:12.123456788 -1980-12-12 12:12:12.123456697 -1930-12-12 12:12:12.123456788 -1930-12-12 12:12:12.123456697 -2220-12-12 12:12:12.123456784 +1980-12-12 12:12:12.123456789 +1980-12-12 12:12:12.123456700 +1930-12-12 12:12:12.123456789 +1930-12-12 12:12:12.123456700 +2220-12-12 12:12:12.123456789 2220-12-12 12:12:12.123456700 - test microseconds -1980-12-12 12:12:12.123452 -1980-12-12 12:12:12.123396 -1980-12-12 12:12:12.123452 -1930-12-12 12:12:12.123452 -1930-12-12 12:12:12.123396 -1930-12-12 12:12:12.123452 +1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123400 +1980-12-12 12:12:12.123456 +1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.123400 +1930-12-12 12:12:12.123456 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 2220-12-12 12:12:12.123456 - test milliseconds -1980-12-12 12:12:12.119 -1980-12-12 12:12:12.119 -1980-12-12 12:12:12.119 -1930-12-12 12:12:12.119 -1930-12-12 12:12:12.119 -1930-12-12 12:12:12.119 -2220-12-12 12:12:12.122 -2220-12-12 12:12:12.115 -2220-12-12 12:12:12.122 +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.120 +1980-12-12 12:12:12.123 +1930-12-12 12:12:12.123 +1930-12-12 12:12:12.120 +1930-12-12 12:12:12.123 +2220-12-12 12:12:12.123 +2220-12-12 12:12:12.120 +2220-12-12 12:12:12.123 test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.sql b/tests/queries/0_stateless/02207_subseconds_intervals.sql index acaf7b136b2..57b2445d9d1 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.sql +++ b/tests/queries/0_stateless/02207_subseconds_intervals.sql @@ -1,44 +1,44 @@ SELECT 'test intervals'; SELECT '- test nanoseconds'; -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456789', 9), INTERVAL 7 NANOSECOND); -- In normal range, source scale matches result -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234567', 7), INTERVAL 7 NANOSECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- In normal range, source scale less than result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456789', 9), INTERVAL 7 NANOSECOND); -- Below normal range, source scale matches result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234567', 7), INTERVAL 7 NANOSECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- Below normal range, source scale less than result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456789', 9), INTERVAL 7 NANOSECOND); -- Above normal range, source scale matches result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234567', 7), INTERVAL 7 NANOSECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- Above normal range, source scale less than result SELECT '- test microseconds'; -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 7 MICROSECOND); -- In normal range, source scale matches result -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234', 4), INTERVAL 7 MICROSECOND); -- In normal range, source scale less than result -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12345678', 8), INTERVAL 7 MICROSECOND); -- In normal range, source scale greater than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- In normal range, source scale greater than result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 7 MICROSECOND); -- Below normal range, source scale matches result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234', 4), INTERVAL 7 MICROSECOND); -- Below normal range, source scale less than result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12345678', 8), INTERVAL 7 MICROSECOND); -- Below normal range, source scale greater than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- Below normal range, source scale greater than result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 7 MICROSECOND); -- Above normal range, source scale matches result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234', 4), INTERVAL 7 MICROSECOND); -- Above normal range, source scale less than result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12345678', 8), INTERVAL 7 MICROSECOND); -- Above normal range, source scale greater than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- Above normal range, source scale greater than result SELECT '- test milliseconds'; -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123', 3), INTERVAL 7 MILLISECOND); -- In normal range, source scale matches result -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12', 2), INTERVAL 7 MILLISECOND); -- In normal range, source scale less than result -select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 7 MILLISECOND); -- In normal range, source scale greater than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- In normal range, source scale greater than result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123', 3), INTERVAL 7 MILLISECOND); -- Below normal range, source scale matches result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12', 2), INTERVAL 7 MILLISECOND); -- Below normal range, source scale less than result -select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 7 MILLISECOND); -- Below normal range, source scale greater than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- Below normal range, source scale greater than result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123', 3), INTERVAL 7 MILLISECOND); -- Above normal range, source scale matches result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12', 2), INTERVAL 7 MILLISECOND); -- Above normal range, source scale less than result -select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 7 MILLISECOND); -- Above normal range, source scale greater than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- Above normal range, source scale greater than result SELECT 'test add[...]seconds()'; From d37003e4b6c21c4fc649c48bd31859de161905d5 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 17 Feb 2022 10:28:09 +0300 Subject: [PATCH 032/132] mustfix fuzzer --- src/Functions/toStartOfInterval.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index b6d2ba688ed..ad1f84f9b63 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -223,7 +223,7 @@ namespace else if (scale_multiplier > 1000) { Int64 scale_diff = scale_multiplier / static_cast(1000); - return (t - t % std::abs(milliseconds * scale_diff)) / scale_diff; + return (t - std::abs(t % milliseconds * scale_diff)) / scale_diff; } else return t - std::abs(t % milliseconds); @@ -249,7 +249,7 @@ namespace else if (scale_multiplier > 1000000) { Int64 scale_diff = scale_multiplier / static_cast(1000000); - return (t - t % std::abs(microseconds * scale_diff)) / scale_diff; + return (t - std::abs(t % microseconds * scale_diff)) / scale_diff; } else return t - std::abs(t % microseconds); @@ -482,7 +482,7 @@ private: } template - ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, UInt64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const + ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { using ToColumnType = typename ToDataType::ColumnType; From 1afe815bdaa8f150e785d2109ed6214f73ad8bf9 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 17 Feb 2022 15:33:26 +0300 Subject: [PATCH 033/132] fix maths --- src/Functions/toStartOfInterval.cpp | 104 +++++++++++------- .../02207_subseconds_intervals.reference | 4 +- 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index ad1f84f9b63..324dd0c992f 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -33,22 +33,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(DayNum(d), years); } - static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } - static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); } @@ -57,22 +57,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); } - static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } - static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); } @@ -81,22 +81,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(DayNum(d), months); } - static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } - static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); } @@ -105,22 +105,22 @@ namespace template <> struct Transform { - static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(DayNum(d), weeks); } - static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } - static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone, Int64) + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); } @@ -129,22 +129,22 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); } - static Int64 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } @@ -153,16 +153,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfHourInterval(t, hours); } - static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt32 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); } @@ -171,16 +171,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMinuteInterval(t, minutes); } - static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt32 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } @@ -189,16 +189,16 @@ namespace template <> struct Transform { - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone, Int64) + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfSecondInterval(t, seconds); } - static UInt32 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) + static UInt32 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) { return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } @@ -218,15 +218,24 @@ namespace if (scale_multiplier < 1000) { Int64 t_milliseconds = t * (static_cast(1000) / scale_multiplier); - return t_milliseconds - std::abs(t_milliseconds % milliseconds); + if (likely(t >= 0)) + return t_milliseconds / milliseconds * milliseconds; + else + return (t_milliseconds + 1 - milliseconds) / milliseconds * milliseconds; } else if (scale_multiplier > 1000) { Int64 scale_diff = scale_multiplier / static_cast(1000); - return (t - std::abs(t % milliseconds * scale_diff)) / scale_diff; + if (likely(t >= 0)) + return t / milliseconds / scale_diff * milliseconds; + else + return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; } else - return t - std::abs(t % milliseconds); + if (likely(t >= 0)) + return t / milliseconds * milliseconds; + else + return (t + 1 - milliseconds) / milliseconds * milliseconds; } }; @@ -244,15 +253,24 @@ namespace if (scale_multiplier < 1000000) { Int64 t_microseconds = t * (static_cast(1000000) / scale_multiplier); - return t_microseconds - std::abs(t_microseconds % microseconds); + if (likely(t >= 0)) + return t_microseconds / microseconds * microseconds; + else + return (t_microseconds + 1 - microseconds) / microseconds * microseconds; } else if (scale_multiplier > 1000000) { Int64 scale_diff = scale_multiplier / static_cast(1000000); - return (t - std::abs(t % microseconds * scale_diff)) / scale_diff; + if (likely(t >= 0)) + return t / microseconds / scale_diff * microseconds; + else + return ((t + 1) / microseconds / scale_diff - 1) * microseconds; } else - return t - std::abs(t % microseconds); + if (likely(t >= 0)) + return t / microseconds * microseconds; + else + return (t + 1 - microseconds) / microseconds * microseconds; } }; @@ -270,10 +288,16 @@ namespace if (scale_multiplier < 1000000000) { Int64 t_nanoseconds = t * (static_cast(1000000000) / scale_multiplier); - return t_nanoseconds - std::abs(t_nanoseconds % nanoseconds); + if (likely(t >= 0)) + return t_nanoseconds / nanoseconds * nanoseconds; + else + return (t_nanoseconds + 1 - nanoseconds) / nanoseconds * nanoseconds; } else - return t - std::abs(t % nanoseconds); + if (likely(t >= 0)) + return t / nanoseconds * nanoseconds; + else + return (t + 1 - nanoseconds) / nanoseconds * nanoseconds; } }; diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 37ad2dca280..32b7d2a9446 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -12,7 +12,7 @@ test intervals 1980-12-12 12:12:12.123456 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 -1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.123457 2220-12-12 12:12:12.123456 2220-12-12 12:12:12.123400 2220-12-12 12:12:12.123456 @@ -22,7 +22,7 @@ test intervals 1980-12-12 12:12:12.123 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 -1930-12-12 12:12:12.123 +1930-12-12 12:12:12.124 2220-12-12 12:12:12.123 2220-12-12 12:12:12.120 2220-12-12 12:12:12.123 From 1f24f3a5e21ee3c9daba1798252d2115d0c0aa15 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 17 Feb 2022 23:17:06 +0300 Subject: [PATCH 034/132] fixed overflow --- src/Functions/toStartOfInterval.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 324dd0c992f..2a743c5f7a4 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -297,7 +297,7 @@ namespace if (likely(t >= 0)) return t / nanoseconds * nanoseconds; else - return (t + 1 - nanoseconds) / nanoseconds * nanoseconds; + return ((t + 1) / nanoseconds - 1) * nanoseconds; } }; From 71b7b6605e4136aec27a2a0e62316876b8b8db43 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 18 Feb 2022 00:17:29 +0300 Subject: [PATCH 035/132] fixed maths 2 --- src/Functions/toStartOfInterval.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 2a743c5f7a4..bff33f9b061 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -221,7 +221,7 @@ namespace if (likely(t >= 0)) return t_milliseconds / milliseconds * milliseconds; else - return (t_milliseconds + 1 - milliseconds) / milliseconds * milliseconds; + return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; } else if (scale_multiplier > 1000) { @@ -235,7 +235,7 @@ namespace if (likely(t >= 0)) return t / milliseconds * milliseconds; else - return (t + 1 - milliseconds) / milliseconds * milliseconds; + return ((t + 1) / milliseconds - 1) * milliseconds; } }; @@ -256,7 +256,7 @@ namespace if (likely(t >= 0)) return t_microseconds / microseconds * microseconds; else - return (t_microseconds + 1 - microseconds) / microseconds * microseconds; + return ((t_microseconds + 1) / microseconds - 1) * microseconds; } else if (scale_multiplier > 1000000) { @@ -270,7 +270,7 @@ namespace if (likely(t >= 0)) return t / microseconds * microseconds; else - return (t + 1 - microseconds) / microseconds * microseconds; + return ((t + 1) / microseconds - 1) * microseconds; } }; @@ -291,7 +291,7 @@ namespace if (likely(t >= 0)) return t_nanoseconds / nanoseconds * nanoseconds; else - return (t_nanoseconds + 1 - nanoseconds) / nanoseconds * nanoseconds; + return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; } else if (likely(t >= 0)) From 6d47a715dc47ddd10c7b312206ec8b0d2f0ecb17 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 18 Feb 2022 10:18:07 +0300 Subject: [PATCH 036/132] fix decimal overflow --- .../FunctionDateOrDateTimeAddInterval.h | 47 +++++++++++++------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 6ad00e42800..899ef7a2580 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -55,8 +55,15 @@ struct AddNanosecondsImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; + try + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; + } + catch (...) + { + throw Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); + } } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -99,16 +106,22 @@ struct AddMicrosecondsImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); - if (scale <= 6) + try { - return t * multiplier + delta; + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + if (scale <= 6) + { + return t * multiplier + delta; + } + else + { + return t + delta * multiplier; + } } - else + catch (...) { - return t + delta * multiplier; + throw Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); } - } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -151,16 +164,22 @@ struct AddMillisecondsImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); - if (scale <= 3) + try { - return t * multiplier + delta; + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + if (scale <= 3) + { + return t * multiplier + delta; + } + else + { + return t + delta * multiplier; + } } - else + catch (...) { - return t + delta * multiplier; + throw Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); } - } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) From 8d3479721242cca8a12555578af761efd44ec101 Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 18 Feb 2022 13:45:47 +0300 Subject: [PATCH 037/132] AST fix try 123 --- .../FunctionDateOrDateTimeAddInterval.h | 63 +++++-------------- 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 899ef7a2580..1a58fc845be 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -55,15 +55,8 @@ struct AddNanosecondsImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - try - { - Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return t * multiplier + delta; - } - catch (...) - { - throw Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); - } + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return static_cast(t * multiplier + delta); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -106,22 +99,8 @@ struct AddMicrosecondsImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - try - { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); - if (scale <= 6) - { - return t * multiplier + delta; - } - else - { - return t + delta * multiplier; - } - } - catch (...) - { - throw Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); - } + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + return static_cast(scale <= 6 ? t * multiplier + delta : t + delta * multiplier); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -164,22 +143,8 @@ struct AddMillisecondsImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - try - { - Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); - if (scale <= 3) - { - return t * multiplier + delta; - } - else - { - return t + delta * multiplier; - } - } - catch (...) - { - throw Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); - } + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + return static_cast(scale <= 3 ? t * multiplier + delta : t + delta * multiplier); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -212,7 +177,7 @@ struct AddSecondsImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return t + delta * DecimalUtils::scaleMultiplier(scale); + return static_cast(t + delta * DecimalUtils::scaleMultiplier(scale)); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -245,7 +210,7 @@ struct AddMinutesImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return t + 60 * delta * DecimalUtils::scaleMultiplier(scale); + return static_cast(t + 60 * delta * DecimalUtils::scaleMultiplier(scale)); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -278,7 +243,7 @@ struct AddHoursImpl static inline DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return t + 3600 * delta * DecimalUtils::scaleMultiplier(scale); + return static_cast(t + 3600 * delta * DecimalUtils::scaleMultiplier(scale)); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -313,7 +278,7 @@ struct AddDaysImpl { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return time_zone.addDays(d.quot, delta) * multiplier + d.rem; + return static_cast(time_zone.addDays(d.quot, delta) * multiplier + d.rem); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -347,7 +312,7 @@ struct AddWeeksImpl { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem; + return static_cast(time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -381,7 +346,7 @@ struct AddMonthsImpl { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return time_zone.addMonths(d.quot, delta) * multiplier + d.rem; + return static_cast(time_zone.addMonths(d.quot, delta) * multiplier + d.rem); } static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -415,7 +380,7 @@ struct AddQuartersImpl { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem; + return static_cast(time_zone.addQuarters(d.quot, delta) * multiplier + d.rem); } static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -449,7 +414,7 @@ struct AddYearsImpl { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return time_zone.addYears(d.quot, delta) * multiplier + d.rem; + return static_cast(time_zone.addYears(d.quot, delta) * multiplier + d.rem); } static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) From 9367a8df682a6182f0342c6826e51aca7ee0887c Mon Sep 17 00:00:00 2001 From: zvonand Date: Fri, 18 Feb 2022 14:53:22 +0300 Subject: [PATCH 038/132] no_sanitize_undefined --- .../FunctionDateOrDateTimeAddInterval.h | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index 1a58fc845be..fbfc9e9bc1f 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -52,11 +52,11 @@ struct AddNanosecondsImpl return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); - return static_cast(t * multiplier + delta); + return t * multiplier + delta; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -96,11 +96,11 @@ struct AddMicrosecondsImpl } } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); - return static_cast(scale <= 6 ? t * multiplier + delta : t + delta * multiplier); + return scale <= 6 ? t * multiplier + delta : t + delta * multiplier; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -140,11 +140,11 @@ struct AddMillisecondsImpl } } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); - return static_cast(scale <= 3 ? t * multiplier + delta : t + delta * multiplier); + return scale <= 3 ? t * multiplier + delta : t + delta * multiplier; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -174,10 +174,10 @@ struct AddSecondsImpl return {t.whole + delta, t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return static_cast(t + delta * DecimalUtils::scaleMultiplier(scale)); + return t + delta * DecimalUtils::scaleMultiplier(scale); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -207,10 +207,10 @@ struct AddMinutesImpl return {t.whole + delta * 60, t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return static_cast(t + 60 * delta * DecimalUtils::scaleMultiplier(scale)); + return t + 60 * delta * DecimalUtils::scaleMultiplier(scale); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -240,10 +240,10 @@ struct AddHoursImpl return {t.whole + delta * 3600, t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) { - return static_cast(t + 3600 * delta * DecimalUtils::scaleMultiplier(scale)); + return t + 3600 * delta * DecimalUtils::scaleMultiplier(scale); } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) @@ -273,12 +273,12 @@ struct AddDaysImpl return {time_zone.addDays(t.whole, delta), t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return static_cast(time_zone.addDays(d.quot, delta) * multiplier + d.rem); + return time_zone.addDays(d.quot, delta) * multiplier + d.rem; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -307,12 +307,12 @@ struct AddWeeksImpl return {time_zone.addWeeks(t.whole, delta), t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return static_cast(time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem); + return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem; } static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -335,31 +335,31 @@ struct AddMonthsImpl { static constexpr auto name = "addMonths"; - static inline DecimalUtils::DecimalComponents + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addMonths(t.whole, delta), t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return static_cast(time_zone.addMonths(d.quot, delta) * multiplier + d.rem); + return time_zone.addMonths(d.quot, delta) * multiplier + d.rem; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(ExtendedDayNum(d), delta); } @@ -375,12 +375,12 @@ struct AddQuartersImpl return {time_zone.addQuarters(t.whole, delta), t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return static_cast(time_zone.addQuarters(d.quot, delta) * multiplier + d.rem); + return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem; } static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) @@ -403,31 +403,31 @@ struct AddYearsImpl { static constexpr auto name = "addYears"; - static inline DecimalUtils::DecimalComponents + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addYears(t.whole, delta), t.fractional}; } - static inline DateTime64 + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) { auto multiplier = DecimalUtils::scaleMultiplier(scale); auto d = std::div(t, multiplier); - return static_cast(time_zone.addYears(d.quot, delta) * multiplier + d.rem); + return time_zone.addYears(d.quot, delta) * multiplier + d.rem; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(ExtendedDayNum(d), delta); } From 61639bb91bd337c8039d391963abd44f3a6b6c9b Mon Sep 17 00:00:00 2001 From: Rajkumar Date: Thu, 3 Mar 2022 17:54:14 -0800 Subject: [PATCH 039/132] libxml2 package is upgraded to 2.9.13 --- contrib/libxml2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libxml2 b/contrib/libxml2 index 18890f471c4..a075d256fd9 160000 --- a/contrib/libxml2 +++ b/contrib/libxml2 @@ -1 +1 @@ -Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf +Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca From 05b4bbeceb538934212a872566372ee1403d811c Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 4 Mar 2022 11:03:13 +0000 Subject: [PATCH 040/132] Add next batch of random settings in functional tests --- tests/clickhouse-test | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4c94e4d6493..95d7bfb3ed1 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -347,6 +347,11 @@ class SettingsRandomizer: "priority": lambda: int(abs(random.gauss(0, 2))), "output_format_parallel_formatting": lambda: random.randint(0, 1), "input_format_parallel_parsing": lambda: random.randint(0, 1), + "min_chunk_bytes_for_parallel_parsing": lambda: max(1024, int(random.gauss(10 * 1024 * 1024, 5 * 1000 * 1000))), + "max_read_buffer_size": lambda: random.randint(1, 20) if random.random() < 0.1 else random.randint(500000, 1048576), + "prefer_localhost_replica": lambda: random.randint(0, 1), + "max_block_size": lambda: random.randint(8000, 100000), + "max_threads": lambda: random.randint(1, 64), } @staticmethod From 8205646081c8409ad7176f0b6f9326b5d6d04ce4 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 4 Mar 2022 15:43:14 +0000 Subject: [PATCH 041/132] Fix test --- .../01524_do_not_merge_across_partitions_select_final.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index 25c47c008bd..13cd33f493b 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS select_final; SET do_not_merge_across_partitions_select_final = 1; +SET max_threads = 0; CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); From c9de223266becc6df756e1d392fb621de96ee5e6 Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 5 Mar 2022 09:02:13 +0000 Subject: [PATCH 042/132] Fix tests --- .../0_stateless/00135_duplicate_group_by_keys_segfault.sql | 2 ++ .../0_stateless/01099_parallel_distributed_insert_select.sql | 1 + tests/queries/0_stateless/01926_order_by_desc_limit.sql | 2 ++ 3 files changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql index 16356046a36..c54593056cf 100644 --- a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql +++ b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET max_rows_to_read = 1000000; SET read_overflow_mode = 'break'; SELECT concat(toString(number % 256 AS n), '') AS s, n, max(s) FROM system.numbers_mt GROUP BY s, n, n, n, n, n, n, n, n, n ORDER BY s, n; diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql index de93166d891..aa924218360 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql @@ -3,6 +3,7 @@ -- set insert_distributed_sync = 1; -- see https://github.com/ClickHouse/ClickHouse/issues/18971 SET allow_experimental_parallel_reading_from_replicas = 0; -- see https://github.com/ClickHouse/ClickHouse/issues/34525 +SET prefer_localhost_replica = 1; DROP TABLE IF EXISTS local_01099_a; DROP TABLE IF EXISTS local_01099_b; diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 7ea102e11e9..9ee7f4a6aff 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + DROP TABLE IF EXISTS order_by_desc; CREATE TABLE order_by_desc (u UInt32, s String) From b5a3f9974f76511bbe38270e551db48544cd317e Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 9 Mar 2022 17:09:59 +0000 Subject: [PATCH 043/132] Fix test --- ...td_decompression_with_escape_sequence_at_the_end_of_buffer.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index b6721d4a798..47cf6e06b48 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -12,6 +12,7 @@ mkdir -p ${user_files_path}/ cp $CUR_DIR/data_zstd/test_01946.zstd ${user_files_path}/ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +set min_chunk_bytes_for_parallel_parsing=10485760; set max_read_buffer_size = 65536; set input_format_parallel_parsing = 0; select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null; From 2800e6052a5426567c5b7786819f29e5ccdbdeae Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 14 Mar 2022 18:55:54 +0000 Subject: [PATCH 044/132] Fix tests --- .../0_stateless/01059_storage_file_compression.sh | 9 +-------- .../01083_expressions_in_engine_arguments.sql | 2 ++ ...01236_distributed_over_live_view_over_distributed.sql | 2 +- .../0_stateless/01281_group_by_limit_memory_tracking.sh | 2 +- .../01506_buffer_table_alter_block_structure_2.sql | 2 ++ .../01701_parallel_parsing_infinite_segmentation.sh | 2 +- .../01722_long_brotli_http_compression_json_format.sh | 2 +- ...01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- .../01746_long_zlib_http_compression_json_format.sh | 2 +- .../0_stateless/01889_clickhouse_client_config_format.sh | 2 +- .../02222_create_table_without_columns_metadata.sh | 2 +- .../00066_sorting_distributed_many_replicas.sql | 2 +- .../1_stateful/00161_parallel_parsing_with_names.sh | 8 ++++---- 13 files changed, 18 insertions(+), 21 deletions(-) diff --git a/tests/queries/0_stateless/01059_storage_file_compression.sh b/tests/queries/0_stateless/01059_storage_file_compression.sh index fbee5070d8d..e0f0c6d57ec 100755 --- a/tests/queries/0_stateless/01059_storage_file_compression.sh +++ b/tests/queries/0_stateless/01059_storage_file_compression.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -for m in gz br xz zst lz4 bz2 +for m in br do ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS file" ${CLICKHOUSE_CLIENT} --query "CREATE TABLE file (x UInt64) ENGINE = File(TSV, '${CLICKHOUSE_DATABASE}/${m}.tsv.${m}')" @@ -15,10 +15,3 @@ do ${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x) FROM file" ${CLICKHOUSE_CLIENT} --query "DROP TABLE file" done - -${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x) FROM file('${CLICKHOUSE_DATABASE}/{gz,br,xz,zst,lz4,bz2}.tsv.{gz,br,xz,zst,lz4,bz2}', TSV, 'x UInt64')" - -for m in gz br xz zst lz4 bz2 -do - ${CLICKHOUSE_CLIENT} --query "SELECT count() < 4000000, max(x) FROM file('${CLICKHOUSE_DATABASE}/${m}.tsv.${m}', RowBinary, 'x UInt8', 'none')" -done diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index dd39277ee31..b162fdb21fd 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel, no-fasttest +SET prefer_localhost_replica=1; + DROP TABLE IF EXISTS file; DROP TABLE IF EXISTS url; DROP TABLE IF EXISTS view; diff --git a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql index 4aacecc9734..3822f22d353 100644 --- a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql +++ b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql @@ -1,4 +1,4 @@ --- Tags: distributed, no-replicated-database, no-parallel, no-fasttest +-- Tags: distributed, no-replicated-database, no-parallel, no-fasttest, no-random-settings SET allow_experimental_live_view = 1; diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index c9c01455e31..1807da6948a 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql index f9c227942ac..0595e67f2b0 100644 --- a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql +++ b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + DROP TABLE IF EXISTS buf_dest; DROP TABLE IF EXISTS buf; diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh index edc4f6916ff..16225d4e10d 100755 --- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh +++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh @@ -6,6 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()"; -python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: +python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --max_therads=0 --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: ${CLICKHOUSE_CLIENT} -q "drop table insert_big_json" diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh index 7295537a2d2..0f7775b2e4b 100755 --- a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1&max_block_size=65505" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 7f36bcb6c8a..62b578c21d6 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,4 +1,4 @@ --- Tags: long, distributed +-- Tags: long, distributed, no-random-settings drop table if exists data_01730; diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh index feaf4bcf623..424b052dbfa 100755 --- a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON" | gzip -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1&max_block_size=65505" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON" | gzip -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh b/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh index aa162911399..4e970f17d3a 100755 --- a/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh +++ b/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 1ba67fa77ea..9cb4fb939e7 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -29,5 +29,5 @@ $CLICKHOUSE_CLIENT -q "create table test_dist engine=Distributed('test_shard_loc $CLICKHOUSE_CLIENT -q "detach table test_dist" $CLICKHOUSE_CLIENT -q "drop table test" $CLICKHOUSE_CLIENT -q "attach table test_dist" -$CLICKHOUSE_CLIENT -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT --prefer_localhost_replica=1 -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" diff --git a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql index 63a833af114..c7a34c493c9 100644 --- a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql +++ b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql @@ -1,4 +1,4 @@ --- Tags: replica, distributed +-- Tags: replica, distributed, no-random-settings SET max_parallel_replicas = 2; SELECT EventTime::DateTime('Asia/Dubai') FROM remote('127.0.0.{1|2}', test, hits) ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh index a1136a47319..d14a174d3a0 100755 --- a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh +++ b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh @@ -13,9 +13,9 @@ do $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, false"; - $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + $CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" + $CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" @@ -23,9 +23,9 @@ do $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, true"; - $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + $CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" + $CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" From f7c5fe14e44fd1a1c8f43e8939524576b87993fd Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 16 Mar 2022 13:53:50 +0000 Subject: [PATCH 045/132] Fix schema inference for TSKV format while using small max_read_buffer_size --- .../Formats/Impl/TSKVRowInputFormat.cpp | 8 +++---- .../02240_tskv_schema_inference_bug.reference | 8 +++++++ .../02240_tskv_schema_inference_bug.sh | 22 +++++++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference create mode 100755 tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index f63d6fa9c46..dc8b281c1b2 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB @@ -242,15 +241,16 @@ std::unordered_map TSKVSchemaReader::readRowAndGetNamesAndD std::unordered_map names_and_types; StringRef name_ref; - String name_tmp; + String name; String value; do { - bool has_value = readName(in, name_ref, name_tmp); + bool has_value = readName(in, name_ref, name); + name = String(name_ref); if (has_value) { readEscapedString(value, in); - names_and_types[String(name_ref)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); + names_and_types[name] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); } else { diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference new file mode 100644 index 00000000000..a8abc33648e --- /dev/null +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference @@ -0,0 +1,8 @@ +b Nullable(String) +c Nullable(String) +a Nullable(String) +s1 \N 1 +} [2] 2 +\N \N \N +\N \N \N +\N [3] \N diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh new file mode 100755 index 00000000000..8655ffd1e1f --- /dev/null +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME=test_02240.data +DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME + +touch $DATA_FILE + +echo -e 'a=1\tb=s1\tc=\N +c=[2]\ta=2\tb=\N} + +a=\N +c=[3]\ta=\N' > $DATA_FILE +$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "desc file('$FILE_NAME', 'TSKV')" +$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "select * from file('$FILE_NAME', 'TSKV')" + From 07711d484342f12e286c0c6193b7484a472cc38a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 16 Mar 2022 14:37:39 +0000 Subject: [PATCH 046/132] Fix tests --- .../0_stateless/01701_parallel_parsing_infinite_segmentation.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh index 16225d4e10d..0fe04fb95fd 100755 --- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh +++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh @@ -6,6 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()"; -python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --max_therads=0 --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: +python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --min_chunk_bytes_for_parallel_parsing=10485760 --max_threads=0 --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: ${CLICKHOUSE_CLIENT} -q "drop table insert_big_json" From bbe657ba243bbc63722643b11dbdc8b1d1fb22fc Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Wed, 16 Mar 2022 19:41:41 +0100 Subject: [PATCH 047/132] allow parallelization of multipart S3 storage upload fixes #34350 --- src/Common/threadPoolCallbackRunner.cpp | 39 +++++++++++++++++++++++++ src/Common/threadPoolCallbackRunner.h | 13 +++++++++ src/Disks/S3/DiskS3.cpp | 29 ++---------------- src/Storages/StorageS3.cpp | 13 ++++++++- 4 files changed, 66 insertions(+), 28 deletions(-) create mode 100644 src/Common/threadPoolCallbackRunner.cpp create mode 100644 src/Common/threadPoolCallbackRunner.h diff --git a/src/Common/threadPoolCallbackRunner.cpp b/src/Common/threadPoolCallbackRunner.cpp new file mode 100644 index 00000000000..fadad235039 --- /dev/null +++ b/src/Common/threadPoolCallbackRunner.cpp @@ -0,0 +1,39 @@ +#include "threadPoolCallbackRunner.h" + +#include + +#include + +namespace DB +{ + +CallbackRunner threadPoolCallbackRunner(ThreadPool & pool) +{ + return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback) + { + pool->scheduleOrThrow([callback = std::move(callback), thread_group]() + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + + /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. + /// Typically, it may be changes from Process to User. + /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. + /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. + /// When, finally, we destroy the thread (and the ThreadStatus), + /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\ + /// and by this time user-level memory tracker may be already destroyed. + /// + /// As a work-around, reset memory tracker to total, which is always alive. + CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); + }); + callback(); + }); + }; +} + +} diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h new file mode 100644 index 00000000000..69f480e6e7d --- /dev/null +++ b/src/Common/threadPoolCallbackRunner.h @@ -0,0 +1,13 @@ +#pragma once + +#include + + +namespace DB +{ + +using CallbackRunner = std::function)>; + +CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); + +} diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index e46620d9d1f..c36191bf3d4 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -264,32 +265,6 @@ std::unique_ptr DiskS3::writeFile(const String & path, LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); - ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback) - { - pool->scheduleOrThrow([callback = std::move(callback), thread_group]() - { - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE( - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - - /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. - /// Typically, it may be changes from Process to User. - /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. - /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. - /// When, finally, we destroy the thread (and the ThreadStatus), - /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\ - /// and by this time user-level memory tracker may be already destroyed. - /// - /// As a work-around, reset memory tracker to total, which is always alive. - CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); - ); - callback(); - }); - }; - auto s3_buffer = std::make_unique( settings->client, bucket, @@ -299,7 +274,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, std::move(schedule)); + buf_size, threadPoolCallbackRunner(getThreadPoolWriter())); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ec506ad0cd0..0a03382ea97 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -4,6 +4,7 @@ #if USE_AWS_S3 #include +#include #include @@ -374,6 +375,16 @@ static bool checkIfObjectExists(const std::shared_ptr & clien return false; } +// TODO: common thread pool for IO must be used instead after PR #35150 +static ThreadPool & getThreadPoolStorageS3() +{ + constexpr size_t pool_size = 100; + constexpr size_t queue_size = 1000000; + static ThreadPool pool(pool_size, pool_size, queue_size); + return pool; +} + + class StorageS3Sink : public SinkToStorage { public: @@ -398,7 +409,7 @@ public: std::make_unique( client, bucket, key, min_upload_part_size, upload_part_size_multiply_factor, upload_part_size_multiply_parts_count_threshold, - max_single_part_upload_size), compression_method, 3); + max_single_part_upload_size, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(getThreadPoolStorageS3())), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings); } From 5655045854ff7c895efe862492eacd6a12835fcf Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 17 Mar 2022 13:21:32 +0100 Subject: [PATCH 048/132] add comments in threadPoolCallbackRunner.h --- src/Common/threadPoolCallbackRunner.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h index 69f480e6e7d..59d06f2f1bc 100644 --- a/src/Common/threadPoolCallbackRunner.h +++ b/src/Common/threadPoolCallbackRunner.h @@ -6,8 +6,10 @@ namespace DB { +/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously using CallbackRunner = std::function)>; +/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()' CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); } From 082ce9fb4d971d17cdc98098b4e6262fc2086680 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 17 Mar 2022 12:28:43 +0000 Subject: [PATCH 049/132] Fix tests --- tests/queries/0_stateless/00505_secure.sh | 2 +- .../0_stateless/01059_storage_file_compression.sh | 10 +++++++++- .../01722_long_brotli_http_compression_json_format.sh | 2 +- .../01746_long_zlib_http_compression_json_format.sh | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00505_secure.sh b/tests/queries/0_stateless/00505_secure.sh index e69515253ed..69d8a039bca 100755 --- a/tests/queries/0_stateless/00505_secure.sh +++ b/tests/queries/0_stateless/00505_secure.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random=settings # set -x diff --git a/tests/queries/0_stateless/01059_storage_file_compression.sh b/tests/queries/0_stateless/01059_storage_file_compression.sh index e0f0c6d57ec..ab56caee3fe 100755 --- a/tests/queries/0_stateless/01059_storage_file_compression.sh +++ b/tests/queries/0_stateless/01059_storage_file_compression.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -for m in br +for m in gz br xz zst lz4 bz2 do ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS file" ${CLICKHOUSE_CLIENT} --query "CREATE TABLE file (x UInt64) ENGINE = File(TSV, '${CLICKHOUSE_DATABASE}/${m}.tsv.${m}')" @@ -15,3 +15,11 @@ do ${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x) FROM file" ${CLICKHOUSE_CLIENT} --query "DROP TABLE file" done + +${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x) FROM file('${CLICKHOUSE_DATABASE}/{gz,br,xz,zst,lz4,bz2}.tsv.{gz,br,xz,zst,lz4,bz2}', TSV, 'x UInt64')" + +for m in gz br xz zst lz4 bz2 +do + ${CLICKHOUSE_CLIENT} --query "SELECT count() < 4000000, max(x) FROM file('${CLICKHOUSE_DATABASE}/${m}.tsv.${m}', RowBinary, 'x UInt8', 'none')" +done + diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh index 0f7775b2e4b..03f7893eb04 100755 --- a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1&max_block_size=65505" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON SETTINGS max_block_size=65505" | brotli -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh index 424b052dbfa..71b003d2533 100755 --- a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1&max_block_size=65505" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON" | gzip -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON SETTINGS max_block_size=65505" | gzip -d | tail -n30 | head -n23 From 607f785e48d6a08ca975ebbd7070c8366903fbe9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 17 Mar 2022 12:31:43 +0000 Subject: [PATCH 050/132] Revert "Merge pull request #35145 from bigo-sg/lower-column-name" This reverts commit ebf72bf61d867e7540b5c98078f5f9ae3a612fba, reversing changes made to f1b812bdc13766ffd79ab3e3a8e090a0844c83ac. --- src/Core/Settings.h | 1 - src/Formats/FormatFactory.cpp | 1 - src/Formats/FormatSettings.h | 1 - .../Formats/Impl/ArrowColumnToCHColumn.cpp | 6 ++--- .../Formats/Impl/ORCBlockInputFormat.cpp | 23 +--------------- .../Formats/Impl/ORCBlockInputFormat.h | 3 ++- .../Formats/Impl/ParquetBlockInputFormat.cpp | 26 +++---------------- .../Formats/Impl/ParquetBlockInputFormat.h | 1 - ...format_use_lowercase_column_name.reference | 6 ----- 9 files changed, 8 insertions(+), 60 deletions(-) delete mode 100644 tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8d28696094b..05946a1b385 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -614,7 +614,6 @@ class IColumn; M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ - M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 08554cf7e07..3fea8d3eb7b 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -89,7 +89,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.null_as_default = settings.input_format_null_as_default; - format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4881c1a43c8..751b3c51fa8 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -32,7 +32,6 @@ struct FormatSettings bool null_as_default = true; bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; - bool use_lowercase_column_name = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 100; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 5c367bb69f0..ecaa485c3d6 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -32,6 +32,7 @@ #include #include + /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ @@ -65,9 +66,9 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; - extern const int INCORRECT_NUMBER_OF_COLUMNS; } + /// Inserts numeric data right into internal column data to reduce an overhead template > static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr & arrow_column, const String & column_name) @@ -531,9 +532,6 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.rows()); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index aa9f7874ae8..4950e1fb952 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -1,5 +1,4 @@ #include "ORCBlockInputFormat.h" -#include #if USE_ORC #include @@ -53,9 +52,6 @@ Chunk ORCBlockInputFormat::generate() if (!table || !table->num_rows()) return res; - if (format_settings.use_lowercase_column_name) - table = *table->RenameColumns(include_column_names); - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -73,7 +69,6 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); - include_column_names.clear(); block_missing_values.clear(); } @@ -125,20 +120,6 @@ static void getFileReaderAndSchema( if (!read_schema_result.ok()) throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); schema = std::move(read_schema_result).ValueOrDie(); - - if (format_settings.use_lowercase_column_name) - { - std::vector> fields; - fields.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) - { - const auto& field = schema->field(i); - auto name = field->name(); - boost::to_lower(name); - fields.push_back(field->WithName(name)); - } - schema = arrow::schema(fields, schema->metadata()); - } } void ORCBlockInputFormat::prepareReader() @@ -167,11 +148,9 @@ void ORCBlockInputFormat::prepareReader() const auto & name = schema->field(i)->name(); if (getPort().getHeader().has(name) || nested_table_names.contains(name)) { + column_names.push_back(name); for (int j = 0; j != indexes_count; ++j) - { include_indices.push_back(index + j); - include_column_names.push_back(name); - } } index += indexes_count; } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index bd2151d78ff..bb136d02d6e 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -45,9 +45,10 @@ private: std::unique_ptr arrow_column_to_ch_column; + std::vector column_names; + // indices of columns to read from ORC file std::vector include_indices; - std::vector include_column_names; std::vector missing_columns; BlockMissingValues block_missing_values; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 548bf0138f5..3f0d9980573 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -1,6 +1,4 @@ #include "ParquetBlockInputFormat.h" -#include - #if USE_PARQUET #include @@ -15,6 +13,9 @@ #include "ArrowColumnToCHColumn.h" #include +#include + + namespace DB { @@ -56,9 +57,6 @@ Chunk ParquetBlockInputFormat::generate() throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; - if (format_settings.use_lowercase_column_name) - table = *table->RenameColumns(column_names); - ++row_group_current; arrow_column_to_ch_column->arrowTableToCHChunk(res, table); @@ -78,7 +76,6 @@ void ParquetBlockInputFormat::resetParser() file_reader.reset(); column_indices.clear(); - column_names.clear(); row_group_current = 0; block_missing_values.clear(); } @@ -123,20 +120,6 @@ static void getFileReaderAndSchema( return; THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader)); THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema)); - - if (format_settings.use_lowercase_column_name) - { - std::vector> fields; - fields.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) - { - const auto& field = schema->field(i); - auto name = field->name(); - boost::to_lower(name); - fields.push_back(field->WithName(name)); - } - schema = arrow::schema(fields, schema->metadata()); - } } void ParquetBlockInputFormat::prepareReader() @@ -167,10 +150,7 @@ void ParquetBlockInputFormat::prepareReader() if (getPort().getHeader().has(name) || nested_table_names.contains(name)) { for (int j = 0; j != indexes_count; ++j) - { column_indices.push_back(index + j); - column_names.push_back(name); - } } index += indexes_count; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index eba9aac29f2..1faadaa3d21 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -40,7 +40,6 @@ private: int row_group_total = 0; // indices of columns to read from Parquet file std::vector column_indices; - std::vector column_names; std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; std::vector missing_columns; diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference deleted file mode 100644 index 5c383cb3035..00000000000 --- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference +++ /dev/null @@ -1,6 +0,0 @@ -Parquet -123 1 -456 2 -ORC -123 1 -456 2 From 243de091bb7e2df86fe0f66f11bf9664c17ad7f0 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 18 Mar 2022 15:32:42 +0800 Subject: [PATCH 051/132] Validate some thoughts over making sets --- src/Interpreters/ActionsVisitor.cpp | 27 +++-- src/Interpreters/ActionsVisitor.h | 6 +- src/Interpreters/ExpressionAnalyzer.cpp | 103 +++++++++++------- src/Interpreters/ExpressionAnalyzer.h | 6 +- src/Interpreters/GlobalSubqueriesVisitor.h | 61 ++++++----- src/Interpreters/InterpreterSelectQuery.cpp | 19 ++-- .../MergeTree/MergeTreeWhereOptimizer.cpp | 1 + 7 files changed, 127 insertions(+), 96 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index fb9752ae391..181ac9aed7e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -372,8 +372,8 @@ SetPtr makeExplicitSet( element_type = low_cardinality_type->getDictionaryType(); auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types); - if (prepared_sets.count(set_key)) - return prepared_sets.at(set_key); /// Already prepared. + if (auto it = prepared_sets.find(set_key); it != prepared_sets.end()) + return it->second; /// Already prepared. Block block; const auto & right_arg_func = std::dynamic_pointer_cast(right_arg); @@ -388,7 +388,7 @@ SetPtr makeExplicitSet( set->insertFromBlock(block.getColumnsWithTypeAndName()); set->finishInsert(); - prepared_sets[set_key] = set; + prepared_sets.emplace(set_key, set); return set; } @@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat if (tid != 0) tuple_ast = tuple_ast->clone(); - auto literal = std::make_shared(UInt64(++tid)); + auto literal = std::make_shared(UInt64{++tid}); visit(*literal, literal, data); auto func = makeASTFunction("tupleElement", tuple_ast, literal); @@ -814,14 +814,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (!data.only_consts) { /// We are in the part of the tree that we are not going to compute. You just need to define types. - /// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet". + /// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet". auto argument_name = node.arguments->children.at(0)->getColumnName(); - data.addFunction( - FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()), - { argument_name, argument_name }, - column_name); + FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()), + {argument_name, argument_name}, + column_name); } return; } @@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su if (no_subqueries) return {}; auto set_key = PreparedSetKey::forSubquery(*right_in_operand); - if (data.prepared_sets.count(set_key)) - return data.prepared_sets.at(set_key); + if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end()) + return it->second; /// A special case is if the name of the table is specified on the right side of the IN statement, /// and the table has the type Set (a previously prepared set). @@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su StorageSet * storage_set = dynamic_cast(table.get()); if (storage_set) { - data.prepared_sets[set_key] = storage_set->getSet(); + data.prepared_sets.emplace(set_key, storage_set->getSet()); return storage_set->getSet(); } } @@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su /// If you already created a Set with the same subquery / table. if (subquery_for_set.set) { - data.prepared_sets[set_key] = subquery_for_set.set; + data.prepared_sets.emplace(set_key, subquery_for_set.set); return subquery_for_set.set; } @@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su } subquery_for_set.set = set; - data.prepared_sets[set_key] = set; + data.prepared_sets.emplace(set_key, set); return set; } else diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index b6b67bac81c..342cc9eef9d 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -10,6 +10,7 @@ namespace DB { +class ASTExpressionList; class ASTFunction; class ExpressionActions; @@ -89,10 +90,7 @@ struct ScopeStack : WithContext void addColumn(ColumnWithTypeAndName column); void addAlias(const std::string & name, std::string alias); void addArrayJoin(const std::string & source_name, std::string result_name); - void addFunction( - const FunctionOverloadResolverPtr & function, - const Names & argument_names, - std::string result_name); + void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name); ActionsDAGPtr popLevel(); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 7e150f59694..841d7bc567f 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -259,7 +259,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a if (!array_join_expression_list) return src_columns; - getRootActionsNoMakeSet(array_join_expression_list, true, actions, false); + getRootActionsNoMakeSet(array_join_expression_list, actions, false); auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left); auto sample_columns = actions->getResultColumns(); @@ -294,7 +294,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const const ASTTablesInSelectQueryElement * join = select_query->join(); if (join) { - getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, actions, false); + getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false); auto sample_columns = actions->getNamesAndTypesList(); syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true); actions = std::make_shared(sample_columns); @@ -332,14 +332,14 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) { NameSet unique_keys; ASTs & group_asts = group_by_ast->children; - for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i) + for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { ssize_t size = group_asts.size(); if (getContext()->getSettingsRef().enable_positional_arguments) replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY); - getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false); + getRootActionsNoMakeSet(group_asts[i], temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); @@ -405,8 +405,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) { if (do_global) { - GlobalSubqueriesVisitor::Data subqueries_data(getContext(), subquery_depth, isRemoteStorage(), - external_tables, subqueries_for_sets, has_global_subqueries); + GlobalSubqueriesVisitor::Data subqueries_data( + getContext(), subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries); GlobalSubqueriesVisitor(subqueries_data).visit(query); } } @@ -416,7 +416,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_ { auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); - if (prepared_sets.count(set_key)) + if (prepared_sets.contains(set_key)) return; /// Already prepared. if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) @@ -509,33 +509,62 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, false, only_consts, !isRemoteStorage()); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + no_makeset_for_subqueries, + false /* no_makeset */, + only_consts, + !isRemoteStorage() /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, true, only_consts, !isRemoteStorage()); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */, + true /* no_makeset */, + only_consts, + !isRemoteStorage() /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) + +void ExpressionAnalyzer::getRootActionsForHaving( + const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, false, only_consts, true); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + no_makeset_for_subqueries, + false /* no_makeset */, + only_consts, + true /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -547,7 +576,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr { AggregateDescription aggregate; if (node->arguments) - getRootActionsNoMakeSet(node->arguments, true, actions); + getRootActionsNoMakeSet(node->arguments, actions); aggregate.column_name = node->getColumnName(); @@ -746,8 +775,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. // Hence we clone the node (not very sane either, I know). - getRootActionsNoMakeSet(window_function.function_node->clone(), - true, actions); + getRootActionsNoMakeSet(window_function.function_node->clone(), actions); const ASTs & arguments = window_function.function_node->arguments->children; @@ -867,8 +895,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); before_array_join = chain.getLastActions(); - chain.steps.push_back(std::make_unique( - array_join, step.getResultColumns())); + chain.steps.push_back(std::make_unique(array_join, step.getResultColumns())); chain.addStep(); @@ -1099,8 +1126,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - chain.steps.emplace_back(std::make_unique( - std::make_shared(std::move(columns)))); + chain.steps.emplace_back( + std::make_unique(std::make_shared(std::move(columns)))); chain.steps.back()->additional_input = std::move(unused_source_columns); chain.getLastActions(); chain.addStep(); @@ -1210,8 +1237,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // recursively together with (1b) as ASTFunction::window_definition. if (getSelectQuery()->window()) { - getRootActionsNoMakeSet(getSelectQuery()->window(), - true /* no_subqueries */, step.actions()); + getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()); } for (const auto & [_, w] : window_descriptions) @@ -1222,8 +1248,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // definitions (1a). // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. - getRootActionsNoMakeSet(f.function_node->clone(), - true /* no_subqueries */, step.actions()); + getRootActionsNoMakeSet(f.function_node->clone(), step.actions()); // (2b) Required function argument columns. for (const auto & a : f.function_node->arguments->children) @@ -1456,7 +1481,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r alias = name; result_columns.emplace_back(name, alias); result_names.push_back(alias); - getRootActions(ast, false, actions_dag); + getRootActions(ast, false /* no_makeset_for_subqueries */, actions_dag); } if (add_aliases) @@ -1496,7 +1521,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAn { auto actions = std::make_shared(constant_inputs); - getRootActions(query, true, actions, true); + getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); return std::make_shared(actions, ExpressionActionsSettings::fromContext(getContext())); } @@ -1513,13 +1538,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions() } ExpressionAnalysisResult::ExpressionAnalysisResult( - SelectQueryExpressionAnalyzer & query_analyzer, - const StorageMetadataPtr & metadata_snapshot, - bool first_stage_, - bool second_stage_, - bool only_types, - const FilterDAGInfoPtr & filter_info_, - const Block & source_header) + SelectQueryExpressionAnalyzer & query_analyzer, + const StorageMetadataPtr & metadata_snapshot, + bool first_stage_, + bool second_stage_, + bool only_types, + const FilterDAGInfoPtr & filter_info_, + const Block & source_header) : first_stage(first_stage_) , second_stage(second_stage_) , need_aggregate(query_analyzer.hasAggregation()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index b6bb3c5fad5..5dcbdc2486b 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -172,15 +172,15 @@ protected: ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false); - void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5d2df583b9e..50ce7977534 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,11 @@ #include #include #include +#include #include +#include +#include +#include namespace DB { @@ -34,7 +39,6 @@ public: { size_t subquery_depth; bool is_remote; - size_t external_table_id; TemporaryTablesMapping & external_tables; SubqueriesForSets & subqueries_for_sets; bool & has_global_subqueries; @@ -49,7 +53,6 @@ public: : WithContext(context_) , subquery_depth(subquery_depth_) , is_remote(is_remote_) - , external_table_id(1) , external_tables(tables) , subqueries_for_sets(subqueries_for_sets_) , has_global_subqueries(has_global_subqueries_) @@ -92,48 +95,33 @@ public: { /// If this is already an external table, you do not need to add anything. Just remember its presence. auto temporary_table_name = getIdentifierName(subquery_or_table_name); - bool exists_in_local_map = external_tables.end() != external_tables.find(temporary_table_name); + bool exists_in_local_map = external_tables.contains(temporary_table_name); bool exists_in_context = static_cast(getContext()->tryResolveStorageID( StorageID("", temporary_table_name), Context::ResolveExternal)); if (exists_in_local_map || exists_in_context) return; } - String external_table_name = subquery_or_table_name->tryGetAlias(); - if (external_table_name.empty()) + String alias = subquery_or_table_name->tryGetAlias(); + String external_table_name; + if (alias.empty()) { - /// Generate the name for the external table. - external_table_name = "_data" + toString(external_table_id); - while (external_tables.count(external_table_name)) - { - ++external_table_id; - external_table_name = "_data" + toString(external_table_id); - } + auto hash = subquery_or_table_name->getTreeHash(); + external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second); } - - auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {}); - - Block sample = interpreter->getSampleBlock(); - NamesAndTypesList columns = sample.getNamesAndTypesList(); - - auto external_storage_holder = std::make_shared( - getContext(), - ColumnsDescription{columns}, - ConstraintsDescription{}, - nullptr, - /*create_for_global_subquery*/ true); - StoragePtr external_storage = external_storage_holder->getTable(); + else + external_table_name = alias; /** We replace the subquery with the name of the temporary table. * It is in this form, the request will go to the remote server. * This temporary table will go to the remote server, and on its side, * instead of doing a subquery, you just need to read it. + * TODO We can do better than using alias to name external tables */ auto database_and_table_name = std::make_shared(external_table_name); if (set_alias) { - String alias = subquery_or_table_name->tryGetAlias(); if (auto * table_name = subquery_or_table_name->as()) if (alias.empty()) alias = table_name->shortName(); @@ -151,8 +139,27 @@ public: else ast = database_and_table_name; - external_tables[external_table_name] = external_storage_holder; + if (external_tables.contains(external_table_name)) + return; + auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {}); + + Block sample = interpreter->getSampleBlock(); + NamesAndTypesList columns = sample.getNamesAndTypesList(); + + auto external_storage_holder = std::make_shared( + getContext(), + ColumnsDescription{columns}, + ConstraintsDescription{}, + nullptr, + /*create_for_global_subquery*/ true); + StoragePtr external_storage = external_storage_holder->getTable(); + + external_tables.emplace(external_table_name, external_storage_holder); + + /// We need to materialize external tables immediately because reading from distributed + /// tables might generate local plans which can refer to external tables during index + /// analysis. It's too late to populate the external table via CreatingSetsTransform. if (getContext()->getSettingsRef().use_index_for_in_with_subqueries) { auto external_table = external_storage_holder->getTable(); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f2fc17fbf9a..74a7b92bda4 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1239,8 +1239,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries() && !subqueries_for_sets.empty()) - executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); + // if (!query_analyzer->hasGlobalSubqueries() && !subqueries_for_sets.empty()) + // executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } if (expressions.second_stage || from_aggregation_stage) @@ -1423,7 +1423,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries())) + // if (!subqueries_for_sets.empty() && (expressions.hasHaving() || query_analyzer->hasGlobalSubqueries())) + if (!subqueries_for_sets.empty()) executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } @@ -1887,7 +1888,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc && limit_length <= std::numeric_limits::max() - limit_offset && limit_length + limit_offset < max_block_size) { - max_block_size = std::max(UInt64(1), limit_length + limit_offset); + max_block_size = std::max(UInt64{1}, limit_length + limit_offset); max_threads_execute_query = max_streams = 1; } @@ -2577,11 +2578,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets) { - const auto & input_order_info = query_info.input_order_info - ? query_info.input_order_info - : (query_info.projection ? query_info.projection->input_order_info : nullptr); - if (input_order_info) - executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); + // const auto & input_order_info = query_info.input_order_info + // ? query_info.input_order_info + // : (query_info.projection ? query_info.projection->input_order_info : nullptr); + // if (input_order_info) + // executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); const Settings & settings = context->getSettingsRef(); diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 7a85791d172..737e0c9d4b7 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -399,6 +399,7 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr, bool is_final) c return true; /// disallow GLOBAL IN, GLOBAL NOT IN + /// TODO why? if ("globalIn" == function_ptr->name || "globalNotIn" == function_ptr->name) return true; From f75b0542554af61faad7d8fee972b8aae0bd410a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 21 Mar 2022 07:47:37 +0000 Subject: [PATCH 052/132] Allow case insensitive column matching --- src/Common/StringUtils/StringUtils.h | 9 +++ src/Core/Block.cpp | 30 +++++--- src/Core/Block.h | 14 ++-- src/Core/Settings.h | 2 + src/Formats/FormatFactory.cpp | 2 + src/Formats/FormatSettings.h | 2 + .../Formats/Impl/ArrowColumnToCHColumn.cpp | 72 ++++++++++++++----- .../Formats/Impl/ArrowColumnToCHColumn.h | 6 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 27 ++++++- .../Formats/Impl/ParquetBlockInputFormat.cpp | 28 +++++++- 10 files changed, 152 insertions(+), 40 deletions(-) diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h index 21df0f5ae8b..e1a753e816d 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils/StringUtils.h @@ -240,6 +240,15 @@ inline bool equalsCaseInsensitive(char a, char b) return a == b || (isAlphaASCII(a) && alternateCaseIfAlphaASCII(a) == b); } +inline bool equalsCaseInsensitive(const std::string_view a, const std::string_view b) +{ + if (a.length() != b.length()) + return false; + + return std::equal( + a.begin(), a.end(), b.begin(), [](const auto first, const auto second) { return equalsCaseInsensitive(first, second); }); +} + template std::string trim(const std::string & str, F && predicate) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 5c93d6719fa..306f99d7c24 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -269,8 +269,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const } -const ColumnWithTypeAndName * Block::findByName(const std::string & name) const +const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const { + if (case_insensitive) + { + auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return equalsCaseInsensitive(column.name, name); }); + if (found == data.end()) + { + return nullptr; + } + return &*found; + } + auto it = index_by_name.find(name); if (index_by_name.end() == it) { @@ -280,19 +290,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const } -const ColumnWithTypeAndName & Block::getByName(const std::string & name) const +const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const { - const auto * result = findByName(name); + const auto * result = findByName(name, case_insensitive); if (!result) - throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() - , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception( + "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); return *result; } -bool Block::has(const std::string & name) const +bool Block::has(const std::string & name, bool case_insensitive) const { + if (case_insensitive) + return std::find_if(data.begin(), data.end(), [&](const auto & column) { return equalsCaseInsensitive(column.name, name); }) + != data.end(); + return index_by_name.end() != index_by_name.find(name); } @@ -301,8 +315,8 @@ size_t Block::getPositionByName(const std::string & name) const { auto it = index_by_name.find(name); if (index_by_name.end() == it) - throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() - , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception( + "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); return it->second; } diff --git a/src/Core/Block.h b/src/Core/Block.h index 66e16b70f47..c5d3e1ae35a 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -60,21 +60,21 @@ public: ColumnWithTypeAndName & safeGetByPosition(size_t position); const ColumnWithTypeAndName & safeGetByPosition(size_t position) const; - ColumnWithTypeAndName* findByName(const std::string & name) + ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false) { return const_cast( - const_cast(this)->findByName(name)); + const_cast(this)->findByName(name, case_insensitive)); } - const ColumnWithTypeAndName * findByName(const std::string & name) const; + const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const; - ColumnWithTypeAndName & getByName(const std::string & name) + ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) { return const_cast( - const_cast(this)->getByName(name)); + const_cast(this)->getByName(name, case_insensitive)); } - const ColumnWithTypeAndName & getByName(const std::string & name) const; + const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const; Container::iterator begin() { return data.begin(); } Container::iterator end() { return data.end(); } @@ -83,7 +83,7 @@ public: Container::const_iterator cbegin() const { return data.cbegin(); } Container::const_iterator cend() const { return data.cend(); } - bool has(const std::string & name) const; + bool has(const std::string & name, bool case_insensitive = false) const; size_t getPositionByName(const std::string & name) const; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 05946a1b385..6d78b5d71ac 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -617,7 +617,9 @@ class IColumn; M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ + M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ + M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 3fea8d3eb7b..8c25fef53cb 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -92,6 +92,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; + format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; @@ -125,6 +126,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; + format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 751b3c51fa8..de05dda9138 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -136,6 +136,7 @@ struct FormatSettings UInt64 row_group_size = 1000000; bool import_nested = false; bool allow_missing_columns = false; + bool case_insensitive_column_matching = false; } parquet; struct Pretty @@ -216,6 +217,7 @@ struct FormatSettings bool import_nested = false; bool allow_missing_columns = false; int64_t row_batch_size = 100'000; + bool case_insensitive_column_matching = false; } orc; /// For capnProto format we should determine how to diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ecaa485c3d6..eaf4e6be6cc 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1,5 +1,8 @@ #include "ArrowColumnToCHColumn.h" +#include +#include + #if USE_ARROW || USE_ORC || USE_PARQUET #include @@ -66,6 +69,7 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; + extern const int INCORRECT_NUMBER_OF_COLUMNS; } @@ -485,7 +489,7 @@ static void checkStatus(const arrow::Status & status, const String & column_name throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()}; } -Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name) +Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, bool lowercase_names) { ColumnsWithTypeAndName sample_columns; for (const auto & field : schema.fields()) @@ -505,26 +509,36 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, std::unordered_map> dict_values; ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false); + if (lowercase_names) + { + boost::to_lower(sample_column.name); + } + sample_columns.emplace_back(std::move(sample_column)); } return Block(std::move(sample_columns)); } ArrowColumnToCHColumn::ArrowColumnToCHColumn( - const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_) - : header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_) + const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_, bool case_insensitive_matching_) + : header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_), case_insensitive_matching(case_insensitive_matching_) { } void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table) { NameToColumnPtr name_to_column_ptr; - for (const auto & column_name : table->ColumnNames()) + for (auto column_name : table->ColumnNames()) { std::shared_ptr arrow_column = table->GetColumnByName(column_name); if (!arrow_column) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name); - name_to_column_ptr[column_name] = arrow_column; + + if (case_insensitive_matching) + { + boost::to_lower(column_name); + } + name_to_column_ptr[std::move(column_name)] = arrow_column; } arrowColumnsToCHChunk(res, name_to_column_ptr); @@ -532,6 +546,8 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.rows()); @@ -540,22 +556,33 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + auto search_column_name = header_column.name; + if (case_insensitive_matching) + { + boost::to_lower(search_column_name); + } + bool read_from_nested = false; String nested_table_name = Nested::extractTableName(header_column.name); - if (!name_to_column_ptr.contains(header_column.name)) + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + { + boost::to_lower(search_nested_table_name); + } + if (!name_to_column_ptr.contains(search_column_name)) { /// Check if it's a column from nested table. - if (import_nested && name_to_column_ptr.contains(nested_table_name)) + if (import_nested && name_to_column_ptr.contains(search_nested_table_name)) { - if (!nested_tables.contains(nested_table_name)) + if (!nested_tables.contains(search_nested_table_name)) { - std::shared_ptr arrow_column = name_to_column_ptr[nested_table_name]; + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)}; Block block(cols); - nested_tables[nested_table_name] = std::make_shared(Nested::flatten(block)); + nested_tables[search_nested_table_name] = std::make_shared(Nested::flatten(block)); } - read_from_nested = nested_tables[nested_table_name]->has(header_column.name); + read_from_nested = nested_tables[search_nested_table_name]->has(header_column.name, case_insensitive_matching); } if (!read_from_nested) @@ -572,11 +599,17 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } } - std::shared_ptr arrow_column = name_to_column_ptr[header_column.name]; + std::shared_ptr arrow_column = name_to_column_ptr[search_column_name]; ColumnWithTypeAndName column; if (read_from_nested) - column = nested_tables[nested_table_name]->getByName(header_column.name); + { + column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching); + if (case_insensitive_matching) + { + column.name = header_column.name; + } + } else column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true); @@ -605,18 +638,19 @@ std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema auto flatten_block_from_arrow = Nested::flatten(block_from_arrow); for (size_t i = 0, columns = header.columns(); i < columns; ++i) { - const auto & column = header.getByPosition(i); + const auto & header_column = header.getByPosition(i); + auto column_name = header_column.name; bool read_from_nested = false; - String nested_table_name = Nested::extractTableName(column.name); - if (!block_from_arrow.has(column.name)) + String nested_table_name = Nested::extractTableName(column_name); + if (!block_from_arrow.has(column_name, case_insensitive_matching)) { - if (import_nested && block_from_arrow.has(nested_table_name)) - read_from_nested = flatten_block_from_arrow.has(column.name); + if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching)) + read_from_nested = flatten_block_from_arrow.has(column_name, case_insensitive_matching); if (!read_from_nested) { if (!allow_missing_columns) - throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", column.name}; + throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; missing_columns.push_back(i); } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 07e7fb36404..64918d03904 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -25,7 +25,8 @@ public: const Block & header_, const std::string & format_name_, bool import_nested_, - bool allow_missing_columns_); + bool allow_missing_columns_, + bool case_insensitive_matching_ = false); void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table); @@ -34,7 +35,7 @@ public: /// Get missing columns that exists in header but not in arrow::Schema std::vector getMissingColumns(const arrow::Schema & schema) const; - static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name); + static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, bool lowercase_names = false); private: const Block & header; @@ -42,6 +43,7 @@ private: bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; + bool case_insensitive_matching; /// Map {column name : dictionary column}. /// To avoid converting dictionary from Arrow Dictionary diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 4950e1fb952..cb6d4a19d20 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -1,4 +1,5 @@ #include "ORCBlockInputFormat.h" +#include "Common/StringUtils/StringUtils.h" #if USE_ORC #include @@ -130,7 +131,7 @@ void ORCBlockInputFormat::prepareReader() return; arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns); + getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, format_settings.orc.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); std::unordered_set nested_table_names; @@ -146,12 +147,34 @@ void ORCBlockInputFormat::prepareReader() /// so we should recursively count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - if (getPort().getHeader().has(name) || nested_table_names.contains(name)) + const bool contains_column = std::invoke([&] + { + if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) + { + return true; + } + + if (!format_settings.parquet.case_insensitive_column_matching) + { + return nested_table_names.contains(name); + } + + return std::find_if( + nested_table_names.begin(), + nested_table_names.end(), + [&](const auto & nested_table_name) + { + return equalsCaseInsensitive(nested_table_name, name); + }) != nested_table_names.end(); + }); + + if (contains_column) { column_names.push_back(name); for (int j = 0; j != indexes_count; ++j) include_indices.push_back(index + j); } + index += indexes_count; } } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3f0d9980573..0c39ca0498b 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -1,4 +1,5 @@ #include "ParquetBlockInputFormat.h" +#include "Common/StringUtils/StringUtils.h" #if USE_PARQUET #include @@ -132,7 +133,7 @@ void ParquetBlockInputFormat::prepareReader() row_group_total = file_reader->num_row_groups(); row_group_current = 0; - arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns); + arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.parquet.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); std::unordered_set nested_table_names; @@ -147,11 +148,34 @@ void ParquetBlockInputFormat::prepareReader() /// count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - if (getPort().getHeader().has(name) || nested_table_names.contains(name)) + + const bool contains_column = std::invoke([&] + { + if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) + { + return true; + } + + if (!format_settings.parquet.case_insensitive_column_matching) + { + return nested_table_names.contains(name); + } + + return std::find_if( + nested_table_names.begin(), + nested_table_names.end(), + [&](const auto & nested_table_name) + { + return equalsCaseInsensitive(nested_table_name, name); + }) != nested_table_names.end(); + }); + + if (contains_column) { for (int j = 0; j != indexes_count; ++j) column_indices.push_back(index + j); } + index += indexes_count; } } From d73c906e68efe1214e7e84c9cbc10f346cc96b74 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 21 Mar 2022 07:50:17 +0000 Subject: [PATCH 053/132] Format code --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 157 ++++++++++-------- .../Formats/Impl/ArrowColumnToCHColumn.h | 8 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 89 +++++----- .../Formats/Impl/ParquetBlockInputFormat.cpp | 105 ++++++------ 4 files changed, 186 insertions(+), 173 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index eaf4e6be6cc..02eaa3ce952 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1,43 +1,43 @@ #include "ArrowColumnToCHColumn.h" -#include #include +#include #if USE_ARROW || USE_ORC || USE_PARQUET -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. -#define FOR_ARROW_NUMERIC_TYPES(M) \ +# define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ M(arrow::Type::INT8, DB::Int8) \ M(arrow::Type::INT16, DB::Int16) \ @@ -48,7 +48,7 @@ M(arrow::Type::FLOAT, DB::Float32) \ M(arrow::Type::DOUBLE, DB::Float64) -#define FOR_ARROW_INDEXES_TYPES(M) \ +# define FOR_ARROW_INDEXES_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ M(arrow::Type::INT8, DB::UInt8) \ M(arrow::Type::UINT16, DB::UInt16) \ @@ -180,8 +180,12 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr(chunk.Value(value_i)); if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM) - throw Exception{ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, - "Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, column_name, DATE_LUT_MAX_DAY_NUM}; + throw Exception{ + ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, + "Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", + days_num, + column_name, + DATE_LUT_MAX_DAY_NUM}; column_data.emplace_back(days_num); } @@ -230,7 +234,8 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) +static ColumnWithTypeAndName +readColumnWithDecimalDataImpl(std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) { auto internal_column = internal_type->createColumn(); auto & column = assert_cast &>(*internal_column); @@ -242,7 +247,8 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) { - column_data.emplace_back(chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast(chunk.Value(value_i))); // TODO: copy column + column_data.emplace_back( + chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast(chunk.Value(value_i))); // TODO: copy column } } return {std::move(internal_column), internal_type, column_name}; @@ -303,10 +309,9 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr switch (arrow_column->type()->id()) { # define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \ - case ARROW_NUMERIC_TYPE: \ - { \ - return readColumnWithNumericData(arrow_column, "").column; \ - } + case ARROW_NUMERIC_TYPE: { \ + return readColumnWithNumericData(arrow_column, "").column; \ + } FOR_ARROW_INDEXES_TYPES(DISPATCH) # undef DISPATCH default: @@ -360,15 +365,13 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( // ClickHouse writes Date as arrow UINT16 and DateTime as arrow UINT32, // so, read UINT16 as Date and UINT32 as DateTime to perform correct conversion // between Date and DateTime further. - case arrow::Type::UINT16: - { + case arrow::Type::UINT16: { auto column = readColumnWithNumericData(arrow_column, column_name); if (read_ints_as_dates) column.type = std::make_shared(); return column; } - case arrow::Type::UINT32: - { + case arrow::Type::UINT32: { auto column = readColumnWithNumericData(arrow_column, column_name); if (read_ints_as_dates) column.type = std::make_shared(); @@ -380,10 +383,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( return readColumnWithDecimalData(arrow_column, column_name); case arrow::Type::DECIMAL256: return readColumnWithDecimalData(arrow_column, column_name); - case arrow::Type::MAP: - { + case arrow::Type::MAP: { auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto nested_column + = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); const auto * tuple_column = assert_cast(nested_column.column.get()); @@ -392,17 +395,16 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto map_type = std::make_shared(tuple_type->getElements()[0], tuple_type->getElements()[1]); return {std::move(map_column), std::move(map_type), column_name}; } - case arrow::Type::LIST: - { + case arrow::Type::LIST: { auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto nested_column + = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); auto array_column = ColumnArray::create(nested_column.column, offsets_column); auto array_type = std::make_shared(nested_column.type); return {std::move(array_column), std::move(array_type), column_name}; } - case arrow::Type::STRUCT: - { + case arrow::Type::STRUCT: { auto arrow_type = arrow_column->type(); auto * arrow_struct_type = assert_cast(arrow_type.get()); std::vector nested_arrow_columns(arrow_struct_type->num_fields()); @@ -420,7 +422,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( for (int i = 0; i != arrow_struct_type->num_fields(); ++i) { auto nested_arrow_column = std::make_shared(nested_arrow_columns[i]); - auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates); + auto element = readColumnFromArrowColumn( + nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates); tuple_elements.emplace_back(std::move(element.column)); tuple_types.emplace_back(std::move(element.type)); tuple_names.emplace_back(std::move(element.name)); @@ -430,8 +433,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto tuple_type = std::make_shared(std::move(tuple_types), std::move(tuple_names)); return {std::move(tuple_column), std::move(tuple_type), column_name}; } - case arrow::Type::DICTIONARY: - { + case arrow::Type::DICTIONARY: { auto & dict_values = dictionary_values[column_name]; /// Load dictionary values only once and reuse it. if (!dict_values) @@ -443,12 +445,14 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( dict_array.emplace_back(dict_chunk.dictionary()); } auto arrow_dict_column = std::make_shared(dict_array); - auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto dict_column + = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); /// We should convert read column to ColumnUnique. auto tmp_lc_column = DataTypeLowCardinality(dict_column.type).createColumn(); auto tmp_dict_column = IColumn::mutate(assert_cast(tmp_lc_column.get())->getDictionaryPtr()); - static_cast(tmp_dict_column.get())->uniqueInsertRangeFrom(*dict_column.column, 0, dict_column.column->size()); + static_cast(tmp_dict_column.get()) + ->uniqueInsertRangeFrom(*dict_column.column, 0, dict_column.column->size()); dict_column.column = std::move(tmp_dict_column); dict_values = std::make_shared(std::move(dict_column)); } @@ -469,13 +473,17 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( # define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \ case ARROW_NUMERIC_TYPE: \ return readColumnWithNumericData(arrow_column, column_name); - FOR_ARROW_NUMERIC_TYPES(DISPATCH) + FOR_ARROW_NUMERIC_TYPES(DISPATCH) # undef DISPATCH // TODO: read JSON as a string? // TODO: read UUID as a string? default: - throw Exception(ErrorCodes::UNKNOWN_TYPE, - "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name); + throw Exception( + ErrorCodes::UNKNOWN_TYPE, + "Unsupported {} type '{}' of an input column '{}'.", + format_name, + arrow_column->type()->name(), + column_name); } } @@ -495,7 +503,7 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, for (const auto & field : schema.fields()) { /// Create empty arrow column by it's type and convert it to ClickHouse column. - arrow::MemoryPool* pool = arrow::default_memory_pool(); + arrow::MemoryPool * pool = arrow::default_memory_pool(); std::unique_ptr array_builder; arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); checkStatus(status, field->name(), format_name); @@ -507,7 +515,8 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, arrow::ArrayVector array_vector = {arrow_array}; auto arrow_column = std::make_shared(array_vector); std::unordered_map> dict_values; - ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false); + ColumnWithTypeAndName sample_column + = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false); if (lowercase_names) { @@ -520,8 +529,16 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, } ArrowColumnToCHColumn::ArrowColumnToCHColumn( - const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_, bool case_insensitive_matching_) - : header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_), case_insensitive_matching(case_insensitive_matching_) + const Block & header_, + const std::string & format_name_, + bool import_nested_, + bool allow_missing_columns_, + bool case_insensitive_matching_) + : header(header_) + , format_name(format_name_) + , import_nested(import_nested_) + , allow_missing_columns(allow_missing_columns_) + , case_insensitive_matching(case_insensitive_matching_) { } @@ -567,7 +584,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & String search_nested_table_name = nested_table_name; if (case_insensitive_matching) { - boost::to_lower(search_nested_table_name); + boost::to_lower(search_nested_table_name); } if (!name_to_column_ptr.contains(search_column_name)) { @@ -577,7 +594,8 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & if (!nested_tables.contains(search_nested_table_name)) { std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)}; + ColumnsWithTypeAndName cols + = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)}; Block block(cols); nested_tables[search_nested_table_name] = std::make_shared(Nested::flatten(block)); } @@ -619,8 +637,11 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } catch (Exception & e) { - e.addMessage(fmt::format("while converting column {} from type {} to type {}", - backQuote(header_column.name), column.type->getName(), header_column.type->getName())); + e.addMessage(fmt::format( + "while converting column {} from type {} to type {}", + backQuote(header_column.name), + column.type->getName(), + header_column.type->getName())); throw; } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 64918d03904..38887f06303 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -4,10 +4,10 @@ #if USE_ARROW || USE_ORC || USE_PARQUET -#include -#include -#include -#include +# include +# include +# include +# include namespace DB diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index cb6d4a19d20..1eab922c397 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -2,13 +2,13 @@ #include "Common/StringUtils/StringUtils.h" #if USE_ORC -#include -#include -#include -#include -#include "ArrowBufferedStreams.h" -#include "ArrowColumnToCHColumn.h" -#include +# include +# include +# include +# include +# include +# include "ArrowBufferedStreams.h" +# include "ArrowColumnToCHColumn.h" namespace DB { @@ -131,7 +131,11 @@ void ORCBlockInputFormat::prepareReader() return; arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns, format_settings.orc.case_insensitive_column_matching); + getPort().getHeader(), + "ORC", + format_settings.orc.import_nested, + format_settings.orc.allow_missing_columns, + format_settings.orc.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); std::unordered_set nested_table_names; @@ -147,26 +151,25 @@ void ORCBlockInputFormat::prepareReader() /// so we should recursively count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - const bool contains_column = std::invoke([&] - { - if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) + const bool contains_column = std::invoke( + [&] { - return true; - } - - if (!format_settings.parquet.case_insensitive_column_matching) - { - return nested_table_names.contains(name); - } - - return std::find_if( - nested_table_names.begin(), - nested_table_names.end(), - [&](const auto & nested_table_name) + if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) { - return equalsCaseInsensitive(nested_table_name, name); - }) != nested_table_names.end(); - }); + return true; + } + + if (!format_settings.parquet.case_insensitive_column_matching) + { + return nested_table_names.contains(name); + } + + return std::find_if( + nested_table_names.begin(), + nested_table_names.end(), + [&](const auto & nested_table_name) { return equalsCaseInsensitive(nested_table_name, name); }) + != nested_table_names.end(); + }); if (contains_column) { @@ -179,7 +182,8 @@ void ORCBlockInputFormat::prepareReader() } } -ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_) +ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) { } @@ -196,14 +200,9 @@ NamesAndTypesList ORCSchemaReader::readSchema() void registerInputFormatORC(FormatFactory & factory) { factory.registerInputFormat( - "ORC", - [](ReadBuffer &buf, - const Block &sample, - const RowInputFormatParams &, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, settings); - }); + "ORC", + [](ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, const FormatSettings & settings) + { return std::make_shared(buf, sample, settings); }); factory.markFormatAsColumnOriented("ORC"); } @@ -211,11 +210,7 @@ void registerORCSchemaReader(FormatFactory & factory) { factory.registerSchemaReader( "ORC", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) - { - return std::make_shared(buf, settings); - } - ); + [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) { return std::make_shared(buf, settings); }); } } @@ -223,14 +218,14 @@ void registerORCSchemaReader(FormatFactory & factory) namespace DB { - class FormatFactory; - void registerInputFormatORC(FormatFactory &) - { - } +class FormatFactory; +void registerInputFormatORC(FormatFactory &) +{ +} - void registerORCSchemaReader(FormatFactory &) - { - } +void registerORCSchemaReader(FormatFactory &) +{ +} } #endif diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 0c39ca0498b..c2e3c71d671 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -2,19 +2,19 @@ #include "Common/StringUtils/StringUtils.h" #if USE_PARQUET -#include -#include -#include -#include -#include -#include -#include -#include -#include "ArrowBufferedStreams.h" -#include "ArrowColumnToCHColumn.h" -#include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include "ArrowBufferedStreams.h" +# include "ArrowColumnToCHColumn.h" -#include +# include namespace DB @@ -26,12 +26,12 @@ namespace ErrorCodes extern const int CANNOT_READ_ALL_DATA; } -#define THROW_ARROW_NOT_OK(status) \ - do \ - { \ - if (::arrow::Status _s = (status); !_s.ok()) \ - throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ - } while (false) +# define THROW_ARROW_NOT_OK(status) \ + do \ + { \ + if (::arrow::Status _s = (status); !_s.ok()) \ + throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ + } while (false) ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_) : IInputFormat(std::move(header_), in_), format_settings(format_settings_) @@ -55,8 +55,7 @@ Chunk ParquetBlockInputFormat::generate() std::shared_ptr table; arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table); if (!read_status.ok()) - throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), - ErrorCodes::CANNOT_READ_ALL_DATA}; + throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; ++row_group_current; @@ -133,7 +132,12 @@ void ParquetBlockInputFormat::prepareReader() row_group_total = file_reader->num_row_groups(); row_group_current = 0; - arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns, format_settings.parquet.case_insensitive_column_matching); + arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), + "Parquet", + format_settings.parquet.import_nested, + format_settings.parquet.allow_missing_columns, + format_settings.parquet.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); std::unordered_set nested_table_names; @@ -149,26 +153,25 @@ void ParquetBlockInputFormat::prepareReader() int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - const bool contains_column = std::invoke([&] - { - if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) + const bool contains_column = std::invoke( + [&] { - return true; - } - - if (!format_settings.parquet.case_insensitive_column_matching) - { - return nested_table_names.contains(name); - } - - return std::find_if( - nested_table_names.begin(), - nested_table_names.end(), - [&](const auto & nested_table_name) + if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) { - return equalsCaseInsensitive(nested_table_name, name); - }) != nested_table_names.end(); - }); + return true; + } + + if (!format_settings.parquet.case_insensitive_column_matching) + { + return nested_table_names.contains(name); + } + + return std::find_if( + nested_table_names.begin(), + nested_table_names.end(), + [&](const auto & nested_table_name) { return equalsCaseInsensitive(nested_table_name, name); }) + != nested_table_names.end(); + }); if (contains_column) { @@ -180,7 +183,8 @@ void ParquetBlockInputFormat::prepareReader() } } -ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_) +ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) { } @@ -197,14 +201,9 @@ NamesAndTypesList ParquetSchemaReader::readSchema() void registerInputFormatParquet(FormatFactory & factory) { factory.registerInputFormat( - "Parquet", - [](ReadBuffer &buf, - const Block &sample, - const RowInputFormatParams &, - const FormatSettings & settings) - { - return std::make_shared(buf, sample, settings); - }); + "Parquet", + [](ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, const FormatSettings & settings) + { return std::make_shared(buf, sample, settings); }); factory.markFormatAsColumnOriented("Parquet"); } @@ -212,11 +211,7 @@ void registerParquetSchemaReader(FormatFactory & factory) { factory.registerSchemaReader( "Parquet", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) - { - return std::make_shared(buf, settings); - } - ); + [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) { return std::make_shared(buf, settings); }); } } @@ -230,7 +225,9 @@ void registerInputFormatParquet(FormatFactory &) { } -void registerParquetSchemaReader(FormatFactory &) {} +void registerParquetSchemaReader(FormatFactory &) +{ +} } #endif From 0c74fa2c1936af2eec5bead599cffef6c25691aa Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 21 Mar 2022 08:38:15 +0000 Subject: [PATCH 054/132] Remove unecessary code --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 7 +------ src/Processors/Formats/Impl/ArrowColumnToCHColumn.h | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 02eaa3ce952..22867102978 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -497,7 +497,7 @@ static void checkStatus(const arrow::Status & status, const String & column_name throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()}; } -Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, bool lowercase_names) +Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name) { ColumnsWithTypeAndName sample_columns; for (const auto & field : schema.fields()) @@ -518,11 +518,6 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false); - if (lowercase_names) - { - boost::to_lower(sample_column.name); - } - sample_columns.emplace_back(std::move(sample_column)); } return Block(std::move(sample_columns)); diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 38887f06303..d87bbcd0550 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -35,7 +35,7 @@ public: /// Get missing columns that exists in header but not in arrow::Schema std::vector getMissingColumns(const arrow::Schema & schema) const; - static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, bool lowercase_names = false); + static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name); private: const Block & header; From 7e14ab46a3a30d8462f3511d6bb4afb4aae00e57 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 21 Mar 2022 09:03:23 +0000 Subject: [PATCH 055/132] Add tests for case insensitive matching --- .../00900_long_parquet_load.reference | 3 +++ ...case_insensitive_column_matching.reference | 6 +++++ .../02240_case_insensitive_column_matching.sh | 22 ++++++++++++++++++ .../case_insensitive_column_matching.orc | Bin 0 -> 364 bytes .../case_insensitive_column_matching.parquet | Bin 0 -> 811 bytes ...nsensitive_column_matching.parquet.columns | 1 + ...e_insensitive_column_matching.parquet.json | 0 7 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/02240_case_insensitive_column_matching.reference create mode 100755 tests/queries/0_stateless/02240_case_insensitive_column_matching.sh create mode 100644 tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc create mode 100644 tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet create mode 100644 tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns create mode 100644 tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.json diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index 6ecff505b2e..98d8e2c5e3e 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -88,6 +88,9 @@ idx10 ['This','is','a','test'] 22 23 24 +=== Try load data from case_insensitive_column_matching.parquet +123 1 +456 2 === Try load data from datapage_v2.snappy.parquet Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unknown encoding type.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA) diff --git a/tests/queries/0_stateless/02240_case_insensitive_column_matching.reference b/tests/queries/0_stateless/02240_case_insensitive_column_matching.reference new file mode 100644 index 00000000000..5c383cb3035 --- /dev/null +++ b/tests/queries/0_stateless/02240_case_insensitive_column_matching.reference @@ -0,0 +1,6 @@ +Parquet +123 1 +456 2 +ORC +123 1 +456 2 diff --git a/tests/queries/0_stateless/02240_case_insensitive_column_matching.sh b/tests/queries/0_stateless/02240_case_insensitive_column_matching.sh new file mode 100755 index 00000000000..86e9cb7ee4c --- /dev/null +++ b/tests/queries/0_stateless/02240_case_insensitive_column_matching.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-ubsan, no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "Parquet" +DATA_FILE=$CUR_DIR/data_parquet/case_insensitive_column_matching.parquet +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (iD String, scOre Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" +${CLICKHOUSE_CLIENT} --query="drop table parquet_load" + +echo "ORC" +DATA_FILE=$CUR_DIR/data_orc/case_insensitive_column_matching.orc +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (iD String, sCorE Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_orc_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" +${CLICKHOUSE_CLIENT} --query="drop table orc_load" diff --git a/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc b/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc new file mode 100644 index 0000000000000000000000000000000000000000..136f99800641e8492d33e083c54153b976fdfc55 GIT binary patch literal 364 zcmZvX%SyvQ6o$_w8HeKtVL&njkrG@47bUGX$Vyu9ve1_5UhuLO#0Mxof$!oAw8?eTu|L$}q5MRGR2h^f9BrT1xY)YJYFR zmpAuPN#Ax;$F*vfvo9<}F$sR~P*Is%=LxBx>*pf9dwP9)89)U!l-6$jPux#6HStcf m;ZC!5=lt{b3Ln$a@N77?@A}2%GARjMby%B^ALiL&n*RdqV<(FM literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet new file mode 100644 index 0000000000000000000000000000000000000000..922def77cafa3b4fe8f35896b3035e521301896d GIT binary patch literal 811 zcmb_b%}(1u5T0Fcqaa17LhQE@cj$nQvyk`POT9xwoX1wz|3pr)G$zR|6tiT3&H( zwb9h1XSAugsxIl0ZPI)II1JE&j;X4|Qwb5s_)1t^i5iMZQ7Ah{S{h}(3G-dV+j05+ zU{nxF0=?3X0`94v9jho?0nFon?GKehL*?p*<|i@$lWKkiR?9mzPu`@%eBcFL33Dyt z-!lFo?E_r@5GbRq28E^5^my>OuNCRaoOI`$qdkk=S|TkLkx`M2YXA2_;>SLKxyFUW zW%Er!i@D=G^07ze z!o<%5N9QMyt|Mr-+AhZs%!fYWcW}wu56c^05B*fe+nyx7YxV7ZGltNdIX$~h7wo_- zFRZ=eNl}DN@OwQk4*{YTxL@*u!e^s?esVrITS!Nvg?^q7FRK@W{Pg4SOLeWjSYM24 PxKwwqH~7-J_)TsBK^}x5 literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns new file mode 100644 index 00000000000..e25da8f923d --- /dev/null +++ b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns @@ -0,0 +1 @@ +`Id` Nullable(String), `Score` Nullable(Int32) diff --git a/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.json b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.json new file mode 100644 index 00000000000..e69de29bb2d From f67b8c0bada23629cc9d04caa64e1d7560432a11 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 21 Mar 2022 12:44:00 +0100 Subject: [PATCH 056/132] Update src/Processors/Formats/Impl/TSKVRowInputFormat.cpp Co-authored-by: Antonio Andelic --- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index dc8b281c1b2..80fbe133a92 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -241,7 +241,7 @@ std::unordered_map TSKVSchemaReader::readRowAndGetNamesAndD std::unordered_map names_and_types; StringRef name_ref; - String name; + String name_buf; String value; do { From 0b381ebd26caf12ddb53e21c21b2e9f3b41e73ce Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 21 Mar 2022 12:44:06 +0100 Subject: [PATCH 057/132] Update src/Processors/Formats/Impl/TSKVRowInputFormat.cpp Co-authored-by: Antonio Andelic --- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 80fbe133a92..efcc1de1b23 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -245,8 +245,8 @@ std::unordered_map TSKVSchemaReader::readRowAndGetNamesAndD String value; do { - bool has_value = readName(in, name_ref, name); - name = String(name_ref); + bool has_value = readName(in, name_ref, name_buf); + String name = String(name_ref); if (has_value) { readEscapedString(value, in); From 1645b7083f4b4c43639143ee3510ca224c6646d6 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 21 Mar 2022 12:44:12 +0100 Subject: [PATCH 058/132] Update src/Processors/Formats/Impl/TSKVRowInputFormat.cpp Co-authored-by: Antonio Andelic --- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index efcc1de1b23..87ba1b18fa7 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -250,7 +250,7 @@ std::unordered_map TSKVSchemaReader::readRowAndGetNamesAndD if (has_value) { readEscapedString(value, in); - names_and_types[name] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); + names_and_types[std::move(name)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); } else { From 0457a3998ab3f7c23fd97a7a514cf14ca9ee62d9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 21 Mar 2022 11:58:55 +0000 Subject: [PATCH 059/132] remove old test --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 13 +++++------ ..._input_format_use_lowercase_column_name.sh | 22 ------------------- 2 files changed, 6 insertions(+), 29 deletions(-) delete mode 100755 tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 22867102978..40d9149a512 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -612,19 +612,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } } - std::shared_ptr arrow_column = name_to_column_ptr[search_column_name]; ColumnWithTypeAndName column; if (read_from_nested) { column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching); if (case_insensitive_matching) - { column.name = header_column.name; - } } else + { + auto arrow_column = name_to_column_ptr[search_column_name]; column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true); + } try { @@ -655,13 +655,12 @@ std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema for (size_t i = 0, columns = header.columns(); i < columns; ++i) { const auto & header_column = header.getByPosition(i); - auto column_name = header_column.name; bool read_from_nested = false; - String nested_table_name = Nested::extractTableName(column_name); - if (!block_from_arrow.has(column_name, case_insensitive_matching)) + String nested_table_name = Nested::extractTableName(header_column.name); + if (!block_from_arrow.has(header_column.name, case_insensitive_matching)) { if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching)) - read_from_nested = flatten_block_from_arrow.has(column_name, case_insensitive_matching); + read_from_nested = flatten_block_from_arrow.has(header_column.name, case_insensitive_matching); if (!read_from_nested) { diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh deleted file mode 100755 index b946addd01c..00000000000 --- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -echo "Parquet" -DATA_FILE=$CUR_DIR/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (id String, score Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_use_lowercase_column_name=true" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" -${CLICKHOUSE_CLIENT} --query="drop table parquet_load" - -echo "ORC" -DATA_FILE=$CUR_DIR/data_orc/test_setting_input_format_use_lowercase_column_name.orc -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (id String, score Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_use_lowercase_column_name=true" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" -${CLICKHOUSE_CLIENT} --query="drop table orc_load" From cb3703b46eb47c0493c1b817976e80ecaaf7353b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 21 Mar 2022 12:54:56 +0000 Subject: [PATCH 060/132] Style fix --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 40d9149a512..4293eb3c1c2 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -180,12 +180,12 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr(chunk.Value(value_i)); if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM) - throw Exception{ + throw Exception( ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, column_name, - DATE_LUT_MAX_DAY_NUM}; + DATE_LUT_MAX_DAY_NUM); column_data.emplace_back(days_num); } From d0217a0025738b5bc1f3b9330e1e1f21d21d9924 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 21 Mar 2022 14:22:15 +0100 Subject: [PATCH 061/132] make tests more compact --- src/CMakeLists.txt | 2 - tests/integration/helpers/cluster.py | 25 ++++-- .../configs/remote_servers.xml | 23 ----- .../remote_servers_nearest_hostname.xml | 23 ----- .../zookeeper_config_first_or_random.xml | 19 ---- .../zookeeper_config_nearest_hostname.xml | 19 ---- .../configs/zookeeper_config_round_robin.xml | 19 ---- ...order.xml => zookeeper_load_balancing.xml} | 4 +- .../test.py | 86 ++++++++++++++++--- .../test_in_order.py | 35 -------- .../test_nearest_hostname.py | 35 -------- .../test_round_robin.py | 41 --------- 12 files changed, 90 insertions(+), 241 deletions(-) delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml rename tests/integration/test_zookeeper_config_load_balancing/configs/{zookeeper_config_in_order.xml => zookeeper_load_balancing.xml} (66%) delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/test_in_order.py delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py delete mode 100644 tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6acea1c149e..b24181625d3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -225,8 +225,6 @@ endmacro() add_object_library(clickhouse_access Access) add_object_library(clickhouse_backups Backups) add_object_library(clickhouse_core Core) -add_library (clickhouse_core_settings_enums Core/SettingsEnums.cpp) -target_link_libraries(clickhouse_core_settings_enums PRIVATE common clickhouse_common_io) add_object_library(clickhouse_core_mysql Core/MySQL) add_object_library(clickhouse_compression Compression) add_object_library(clickhouse_querypipeline QueryPipeline) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 627e3725232..67d096a1f0c 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -16,20 +16,27 @@ import traceback import urllib.parse import shlex import urllib3 - -from cassandra.policies import RoundRobinPolicy -import cassandra.cluster -import psycopg2 -import pymongo -import pymysql import requests -from confluent_kafka.avro.cached_schema_registry_client import \ - CachedSchemaRegistryClient + +try: + # Please, add modules that required for specific tests only here. + # So contributors will be able to run most tests locally + # without installing tons of unneeded packages that may be not so easy to install. + from cassandra.policies import RoundRobinPolicy + import cassandra.cluster + import psycopg2 + from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT + import pymongo + import pymysql + from confluent_kafka.avro.cached_schema_registry_client import \ + CachedSchemaRegistryClient +except Exception as e: + logging.warning(f"Cannot import some modules, some tests may not work:{e}") + from dict2xml import dict2xml from kazoo.client import KazooClient from kazoo.exceptions import KazooException from minio import Minio -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry from helpers import pytest_xdist_logging_to_separate_files diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml deleted file mode 100644 index 63fdcea5dab..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - node1 - 9000 - - - - node2 - 9000 - - - - node3 - 9000 - - - - - - diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml deleted file mode 100644 index 62f361049c9..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/remote_servers_nearest_hostname.xml +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - nod1 - 9000 - - - - nod2 - 9000 - - - - nod3 - 9000 - - - - - - diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml deleted file mode 100644 index 9688480fa90..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_first_or_random.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - first_or_random - - zoo1 - 2181 - - - zoo2 - 2181 - - - zoo3 - 2181 - - 3000 - - diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml deleted file mode 100644 index 265ebe05fab..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_nearest_hostname.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - nearest_hostname - - zoo1 - 2181 - - - zoo2 - 2181 - - - zoo3 - 2181 - - 3000 - - diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml deleted file mode 100644 index 3b64d629e6e..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_round_robin.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - round_robin - - zoo1 - 2181 - - - zoo2 - 2181 - - - zoo3 - 2181 - - 3000 - - diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml similarity index 66% rename from tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml rename to tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml index ebd266d80b0..5416e5e82de 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_config_in_order.xml +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml @@ -1,7 +1,7 @@ - - in_order + + random zoo1 2181 diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index 144ba14ce40..9c8f5075cc3 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -1,35 +1,93 @@ -import time import pytest -import logging from helpers.cluster import ClickHouseCluster -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_first_or_random.xml') +cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_load_balancing.xml') node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml"]) + main_configs=["configs/zookeeper_load_balancing.xml"]) node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml"]) + main_configs=["configs/zookeeper_load_balancing.xml"]) node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_first_or_random.xml"]) + main_configs=["configs/zookeeper_load_balancing.xml"]) + +def change_balancing(old, new, reload=True): + line = '{}<' + old_line = line.format(old) + new_line = line.format(new) + for node in [node1, node2, node3]: + node.replace_in_config('/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml', old_line, new_line) + if reload: + node.query('system reload config') @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - yield cluster - finally: cluster.shutdown() def test_first_or_random(started_cluster): - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + try: + change_balancing('random', 'first_or_random') + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + finally: + change_balancing('first_or_random', 'random', reload=False) + + +def test_in_order(started_cluster): + try: + change_balancing('random', 'in_order') + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + finally: + change_balancing('first_or_random', 'random', reload=False) + + +def test_nearest_hostname(started_cluster): + try: + change_balancing('random', 'nearest_hostname') + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + finally: + change_balancing('first_or_random', 'random', reload=False) + + +def test_round_robin(started_cluster): + try: + started_cluster.stop_zookeeper_nodes(["zoo1"]) + change_balancing('random', 'round_robin') + + print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) + assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + + started_cluster.start_zookeeper_nodes(["zoo1"]) + finally: + change_balancing('first_or_random', 'random', reload=False) diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py b/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py deleted file mode 100644 index 095aba72217..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/test_in_order.py +++ /dev/null @@ -1,35 +0,0 @@ -import time -import pytest -import logging -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_in_order.xml') - -node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml"]) -node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml"]) -node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_in_order.xml"]) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - yield cluster - - finally: - cluster.shutdown() - -def test_in_order(started_cluster): - - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py b/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py deleted file mode 100644 index 23c0386b1d2..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/test_nearest_hostname.py +++ /dev/null @@ -1,35 +0,0 @@ -import time -import pytest -import logging -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_nearest_hostname.xml') - -node1 = cluster.add_instance('nod1', with_zookeeper=True, - main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml"]) -node2 = cluster.add_instance('nod2', with_zookeeper=True, - main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml"]) -node3 = cluster.add_instance('nod3', with_zookeeper=True, - main_configs=["configs/remote_servers_nearest_hostname.xml", "configs/zookeeper_config_nearest_hostname.xml"]) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - yield cluster - - finally: - cluster.shutdown() - -def test_nearest_hostname(started_cluster): - - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo1_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo3_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() diff --git a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py b/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py deleted file mode 100644 index 3623371c244..00000000000 --- a/tests/integration/test_zookeeper_config_load_balancing/test_round_robin.py +++ /dev/null @@ -1,41 +0,0 @@ -import time -import pytest -import logging -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_config_round_robin.xml') - -node1 = cluster.add_instance('node1', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml"]) -node2 = cluster.add_instance('node2', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml"]) -node3 = cluster.add_instance('node3', with_zookeeper=True, - main_configs=["configs/remote_servers.xml", "configs/zookeeper_config_round_robin.xml"]) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - yield cluster - - finally: - cluster.shutdown() - - -def test_round_robin(started_cluster): - - started_cluster.stop_zookeeper_nodes(["zoo1"]) - time.sleep(1) - - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'roottestzookeeperconfigloadbalancing_zoo2_1.roottestzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - - started_cluster.start_zookeeper_nodes(["zoo1"]) From 3cca5fb181565a397d19e6b754b97ad622b95b0d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 21 Mar 2022 15:55:01 +0100 Subject: [PATCH 062/132] minor fixes --- src/Common/GetPriorityForLoadBalancing.cpp | 7 +++++++ src/Common/GetPriorityForLoadBalancing.h | 9 +++++++-- src/Common/ZooKeeper/ZooKeeper.cpp | 4 ++-- src/Common/ZooKeeper/ZooKeeper.h | 7 ++----- src/Common/isLocalAddress.cpp | 1 + .../test_zookeeper_config_load_balancing/test.py | 15 ++++++++------- 6 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp index fa0eeb14bed..d8e7566e891 100644 --- a/src/Common/GetPriorityForLoadBalancing.cpp +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -3,12 +3,19 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const { std::function get_priority; switch (load_balance) { case LoadBalancing::NEAREST_HOSTNAME: + if (hostname_differences.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized"); get_priority = [&](size_t i) { return hostname_differences[i]; }; break; case LoadBalancing::IN_ORDER: diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h index 4ec686188e4..e57b02b5e90 100644 --- a/src/Common/GetPriorityForLoadBalancing.h +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -11,9 +11,14 @@ public: GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} GetPriorityForLoadBalancing(){} - bool operator!=(const GetPriorityForLoadBalancing & other) + bool operator == (const GetPriorityForLoadBalancing & other) const { - return load_balancing != other.load_balancing || hostname_differences != other.hostname_differences; + return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences; + } + + bool operator != (const GetPriorityForLoadBalancing & other) const + { + return !(*this == other); } std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 22cb15252c4..5e9b3a41dd3 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -290,8 +290,8 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, if (args.get_priority_load_balancing != get_priority_load_balancing) return true; - return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot) - != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot); + return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing) + != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing); } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 570c6e2d817..f901a79591f 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include #include #include @@ -48,7 +48,7 @@ struct ShuffleHost void randomize() { - random = rng(); + random = thread_local_rng(); } static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs) @@ -56,9 +56,6 @@ struct ShuffleHost return std::forward_as_tuple(lhs.priority, lhs.random) < std::forward_as_tuple(rhs.priority, rhs.random); } - -private: - std::minstd_rand rng = std::minstd_rand(randomSeed()); }; using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing; diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp index d79e4cebd15..596fd4caad7 100644 --- a/src/Common/isLocalAddress.cpp +++ b/src/Common/isLocalAddress.cpp @@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_ size_t getHostNameDifference(const std::string & local_hostname, const std::string & host) { + /// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter) size_t hostname_difference = 0; for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i) if (local_hostname[i] != host[i]) diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index 9c8f5075cc3..79f21ad3eca 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -3,11 +3,12 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_load_balancing.xml') -node1 = cluster.add_instance('node1', with_zookeeper=True, +# use 3-letter hostnames, so getHostNameDifference("nod1", "zoo1") will work as expected +node1 = cluster.add_instance('nod1', with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]) -node2 = cluster.add_instance('node2', with_zookeeper=True, +node2 = cluster.add_instance('nod2', with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]) -node3 = cluster.add_instance('node3', with_zookeeper=True, +node3 = cluster.add_instance('nod3', with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]) def change_balancing(old, new, reload=True): @@ -56,7 +57,7 @@ def test_in_order(started_cluster): print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() finally: - change_balancing('first_or_random', 'random', reload=False) + change_balancing('in_order', 'random', reload=False) def test_nearest_hostname(started_cluster): @@ -71,7 +72,7 @@ def test_nearest_hostname(started_cluster): print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() finally: - change_balancing('first_or_random', 'random', reload=False) + change_balancing('nearest_hostname', 'random', reload=False) def test_round_robin(started_cluster): @@ -88,6 +89,6 @@ def test_round_robin(started_cluster): print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() - started_cluster.start_zookeeper_nodes(["zoo1"]) finally: - change_balancing('first_or_random', 'random', reload=False) + started_cluster.start_zookeeper_nodes(["zoo1"]) + change_balancing('round_robin', 'random', reload=False) From 0b8ddedb39528384a2d93076782faada7aee6f84 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 21 Mar 2022 18:01:55 +0000 Subject: [PATCH 063/132] Fix tests --- tests/queries/0_stateless/00505_secure.sh | 2 +- tests/queries/0_stateless/02015_async_inserts_2.sh | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/00505_secure.sh b/tests/queries/0_stateless/00505_secure.sh index 69d8a039bca..b7c12911b90 100755 --- a/tests/queries/0_stateless/00505_secure.sh +++ b/tests/queries/0_stateless/00505_secure.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-random=settings +# Tags: no-fasttest, no-random-settings # set -x diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh index 90f5584d84e..fd20f846897 100755 --- a/tests/queries/0_stateless/02015_async_inserts_2.sh +++ b/tests/queries/0_stateless/02015_async_inserts_2.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash +# Tags: no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&max_insert_threads=0&group_by_two_level_threshold=100000&group_by_two_level_threshold_bytes=50000000&distributed_aggregation_memory_efficient=1&fsync_metadata=1&priority=1&output_format_parallel_formatting=0&input_format_parallel_parsing=0&min_chunk_bytes_for_parallel_parsing=4031398&max_read_buffer_size=554729&prefer_localhost_replica=0&max_block_size=51672&max_threads=20" -${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" @@ -22,7 +23,7 @@ ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV wait -${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts ORDER BY id" -${CLICKHOUSE_CLIENT} -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT * FROM async_inserts ORDER BY id" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" -${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE async_inserts" From 0ab258be010e5d5140e35e2169ba9a9b567d837c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 21 Mar 2022 19:59:47 +0100 Subject: [PATCH 064/132] fix_build --- src/Common/ZooKeeper/ZooKeeper.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 5e9b3a41dd3..5aeeaf9b4ef 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -239,7 +239,12 @@ struct ZooKeeperArgs } else if (key == "zookeeper_load_balancing") { - get_priority_load_balancing.load_balancing = DB::SettingFieldLoadBalancingTraits::fromString(config.getString(config_name + "." + key)); + String load_balancing_str = config.getString(config_name + "." + key); + /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) + auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); + if (!load_balancing) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); + get_priority_load_balancing.load_balancing = *load_balancing; } else throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); From b278600c3eb9462acb00accc36232643a08c9958 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 22 Mar 2022 07:38:26 +0000 Subject: [PATCH 065/132] rename tests --- ...reference => 02242_case_insensitive_column_matching.reference} | 0 ...lumn_matching.sh => 02242_case_insensitive_column_matching.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02240_case_insensitive_column_matching.reference => 02242_case_insensitive_column_matching.reference} (100%) rename tests/queries/0_stateless/{02240_case_insensitive_column_matching.sh => 02242_case_insensitive_column_matching.sh} (100%) diff --git a/tests/queries/0_stateless/02240_case_insensitive_column_matching.reference b/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference similarity index 100% rename from tests/queries/0_stateless/02240_case_insensitive_column_matching.reference rename to tests/queries/0_stateless/02242_case_insensitive_column_matching.reference diff --git a/tests/queries/0_stateless/02240_case_insensitive_column_matching.sh b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh similarity index 100% rename from tests/queries/0_stateless/02240_case_insensitive_column_matching.sh rename to tests/queries/0_stateless/02242_case_insensitive_column_matching.sh From ca7844e3384dd8a7fce5ca1442f1bb40be897dda Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 22 Mar 2022 09:27:20 +0000 Subject: [PATCH 066/132] Fix tests --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 28 ++++++++++++-- .../Formats/Impl/ArrowColumnToCHColumn.h | 3 +- .../02241_parquet_bad_column.reference | 1 + .../0_stateless/02241_parquet_bad_column.sh | 38 ++++++++++--------- 4 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index fa87c5ef811..16ca4314ed8 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -2,6 +2,7 @@ #include #include +#include "Common/StringUtils/StringUtils.h" #if USE_ARROW || USE_ORC || USE_PARQUET @@ -497,15 +498,35 @@ static void checkStatus(const arrow::Status & status, const String & column_name throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()}; } -Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header) +Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( + const arrow::Schema & schema, const std::string & format_name, const Block * hint_header, bool ignore_case) { ColumnsWithTypeAndName sample_columns; std::unordered_set nested_table_names; if (hint_header) nested_table_names = Nested::getAllTableNames(*hint_header); + + const auto accept_field = [&](const auto & field_name) + { + if (!hint_header) + return false; + + if (hint_header->has(field_name, ignore_case)) + return true; + + if (ignore_case) + return nested_table_names.contains(field_name); + + return std::find_if( + nested_table_names.begin(), + nested_table_names.end(), + [&](const auto & nested_table_name) { return equalsCaseInsensitive(nested_table_name, field_name); }) + != nested_table_names.end(); + }; + for (const auto & field : schema.fields()) { - if (hint_header && !hint_header->has(field->name()) && !nested_table_names.contains(field->name())) + if (!accept_field(field->name())) continue; /// Create empty arrow column by it's type and convert it to ClickHouse column. @@ -656,8 +677,9 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const { std::vector missing_columns; - auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header); + auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching); auto flatten_block_from_arrow = Nested::flatten(block_from_arrow); + for (size_t i = 0, columns = header.columns(); i < columns; ++i) { const auto & header_column = header.getByPosition(i); diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index a8b18fabd93..ff99d2b2f11 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -37,7 +37,8 @@ public: /// Transform arrow schema to ClickHouse header. If hint_header is provided, /// we will skip columns in schema that are not in hint_header. - static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr); + static Block arrowSchemaToCHHeader( + const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr, bool ignore_case = false); private: const Block & header; diff --git a/tests/queries/0_stateless/02241_parquet_bad_column.reference b/tests/queries/0_stateless/02241_parquet_bad_column.reference index f599e28b8ab..b2f7f08c170 100644 --- a/tests/queries/0_stateless/02241_parquet_bad_column.reference +++ b/tests/queries/0_stateless/02241_parquet_bad_column.reference @@ -1 +1,2 @@ 10 +10 diff --git a/tests/queries/0_stateless/02241_parquet_bad_column.sh b/tests/queries/0_stateless/02241_parquet_bad_column.sh index a160671a088..9efd11cbbe1 100755 --- a/tests/queries/0_stateless/02241_parquet_bad_column.sh +++ b/tests/queries/0_stateless/02241_parquet_bad_column.sh @@ -5,23 +5,25 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "drop table if exists test_02241" -$CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String), - caption Nullable(String), - NSFW Nullable(String), - similarity Nullable(Float64), - LICENSE Nullable(String), - url Nullable(String), - key Nullable(UInt64), - shard_id Nullable(UInt64), - status Nullable(String), - width Nullable(UInt32), - height Nullable(UInt32), - exif Nullable(String), - original_width Nullable(UInt32), - original_height Nullable(UInt32)) engine=Memory" +for case_insensitive in "true" "false"; do + $CLICKHOUSE_CLIENT -q "drop table if exists test_02241" + $CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String), + caption Nullable(String), + NSFW Nullable(String), + similarity Nullable(Float64), + LICENSE Nullable(String), + url Nullable(String), + key Nullable(UInt64), + shard_id Nullable(UInt64), + status Nullable(String), + width Nullable(UInt32), + height Nullable(UInt32), + exif Nullable(String), + original_width Nullable(UInt32), + original_height Nullable(UInt32)) engine=Memory" -cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet" + cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=$case_insensitive" -$CLICKHOUSE_CLIENT -q "select count() from test_02241" -$CLICKHOUSE_CLIENT -q "drop table test_02241" + $CLICKHOUSE_CLIENT -q "select count() from test_02241" + $CLICKHOUSE_CLIENT -q "drop table test_02241" +done From 7c11295228d61c8f689f01f508f6fecfb6edb9a1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 22 Mar 2022 09:33:11 +0000 Subject: [PATCH 067/132] Remove old test --- .../0_stateless/00900_long_parquet_load.reference | 3 --- ...input_format_use_lowercase_column_name.parquet | Bin 811 -> 0 bytes ...rmat_use_lowercase_column_name.parquet.columns | 1 - 3 files changed, 4 deletions(-) delete mode 100644 tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet delete mode 100644 tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index 98d8e2c5e3e..b295a226853 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -342,9 +342,6 @@ Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unkno (NULL) === Try load data from single_nan.parquet \N -=== Try load data from test_setting_input_format_use_lowercase_column_name.parquet -123 1 -456 2 === Try load data from userdata1.parquet 1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02 1454519043 2 Albert Freeman afreeman1@is.gd Male 218.111.175.34 Canada 1/16/1968 150280.17 Accountant IV diff --git a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet b/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet deleted file mode 100644 index 922def77cafa3b4fe8f35896b3035e521301896d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 811 zcmb_b%}(1u5T0Fcqaa17LhQE@cj$nQvyk`POT9xwoX1wz|3pr)G$zR|6tiT3&H( zwb9h1XSAugsxIl0ZPI)II1JE&j;X4|Qwb5s_)1t^i5iMZQ7Ah{S{h}(3G-dV+j05+ zU{nxF0=?3X0`94v9jho?0nFon?GKehL*?p*<|i@$lWKkiR?9mzPu`@%eBcFL33Dyt z-!lFo?E_r@5GbRq28E^5^my>OuNCRaoOI`$qdkk=S|TkLkx`M2YXA2_;>SLKxyFUW zW%Er!i@D=G^07ze z!o<%5N9QMyt|Mr-+AhZs%!fYWcW}wu56c^05B*fe+nyx7YxV7ZGltNdIX$~h7wo_- zFRZ=eNl}DN@OwQk4*{YTxL@*u!e^s?esVrITS!Nvg?^q7FRK@W{Pg4SOLeWjSYM24 PxKwwqH~7-J_)TsBK^}x5 diff --git a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns b/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns deleted file mode 100644 index e25da8f923d..00000000000 --- a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns +++ /dev/null @@ -1 +0,0 @@ -`Id` Nullable(String), `Score` Nullable(Int32) From 0c23cd7b94de5e200f69267ade7e59fe392423eb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 22 Mar 2022 10:55:10 +0000 Subject: [PATCH 068/132] Add support for case insensitive column matching in arrow --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp | 6 +++++- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 2 +- 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6d78b5d71ac..104d6f7c7bb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -615,6 +615,7 @@ class IColumn; M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ + M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8c25fef53cb..3aa82cb79b4 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -123,6 +123,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; + format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index de05dda9138..bd0a84d9ded 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -74,6 +74,7 @@ struct FormatSettings bool low_cardinality_as_dictionary = false; bool import_nested = false; bool allow_missing_columns = false; + bool case_insensitive_column_matching = false; } arrow; struct diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index cf5cfa681a1..37a107ae367 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -139,7 +139,11 @@ void ArrowBlockInputFormat::prepareReader() } arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns); + getPort().getHeader(), + "Arrow", + format_settings.arrow.import_nested, + format_settings.arrow.allow_missing_columns, + format_settings.arrow.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); if (stream) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 16ca4314ed8..ba037b0cf6e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -514,7 +514,7 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( if (hint_header->has(field_name, ignore_case)) return true; - if (ignore_case) + if (!ignore_case) return nested_table_names.contains(field_name); return std::find_if( From 0a469066e066cb1ef30411edc122b7dd199e27ec Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 22 Mar 2022 10:55:20 +0000 Subject: [PATCH 069/132] Add more tests --- ...case_insensitive_column_matching.reference | 3 ++ .../02242_case_insensitive_column_matching.sh | 8 ++++++ .../02242_case_insensitive_nested.reference | 12 ++++++++ .../02242_case_insensitive_nested.sh | 26 ++++++++++++++++++ .../case_insensitive_column_matching.arrow | Bin 0 -> 658 bytes 5 files changed, 49 insertions(+) create mode 100644 tests/queries/0_stateless/02242_case_insensitive_nested.reference create mode 100755 tests/queries/0_stateless/02242_case_insensitive_nested.sh create mode 100644 tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow diff --git a/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference b/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference index 5c383cb3035..9732211a286 100644 --- a/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference +++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference @@ -4,3 +4,6 @@ Parquet ORC 123 1 456 2 +Arrow +123 1 +456 2 diff --git a/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh index 86e9cb7ee4c..8ebf2952ab3 100755 --- a/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh +++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh @@ -20,3 +20,11 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (iD String, sCorE Int32) ENG cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_orc_case_insensitive_column_matching=true" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" ${CLICKHOUSE_CLIENT} --query="drop table orc_load" + +echo "Arrow" +DATA_FILE=$CUR_DIR/data_arrow/case_insensitive_column_matching.arrow +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (iD String, sCorE Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_load FORMAT Arrow SETTINGS input_format_arrow_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_load" +${CLICKHOUSE_CLIENT} --query="drop table arrow_load" diff --git a/tests/queries/0_stateless/02242_case_insensitive_nested.reference b/tests/queries/0_stateless/02242_case_insensitive_nested.reference new file mode 100644 index 00000000000..58d66d3230a --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_nested.reference @@ -0,0 +1,12 @@ +Arrow +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] +Parquet +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] +ORC +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] diff --git a/tests/queries/0_stateless/02242_case_insensitive_nested.sh b/tests/queries/0_stateless/02242_case_insensitive_nested.sh new file mode 100755 index 00000000000..c22f5695dc3 --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_nested.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_table" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_nested_table" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE nested_table (table Nested(eLeM1 Int32, elEm2 String, ELEM3 Float32)) engine=Memory" + +formats=('Arrow' 'Parquet' 'ORC') +format_files=('arrow' 'parquet' 'orc') + +for ((i = 0; i < 3; i++)) do + echo ${formats[i]} + + ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE nested_table" + cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table FORMAT ${formats[i]} SETTINGS input_format_${format_files[i]}_import_nested = 1, input_format_${format_files[i]}_case_insensitive_column_matching = true" + + ${CLICKHOUSE_CLIENT} --query="SELECT * FROM nested_table" + +done + +${CLICKHOUSE_CLIENT} --query="DROP TABLE nested_table" diff --git a/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow b/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow new file mode 100644 index 0000000000000000000000000000000000000000..4350d5c3e49a3b2d852661d47be41855b6327a5d GIT binary patch literal 658 zcmd5)yAHxI3_MCG70S?|LW~_*c}^@HnLuUW1Aq+)iLc@l_yk5q{s!)x6d@#buCkqT zYoC*}s_WHu1U!igfSd;O;E|$(j4Pt$$U&CTyXb+0Y;76f#(FLj)t52Hz#hP6cRC*! zGj_?)wR+N4NtdHA-Zh0h>cgqefaYj|w3d5uJz?G(V?h%<`f*}!ozQkxU)o!_9=T_2 z*wc|2X{c`l&H4-4zn~f+4Z$?x(t>6dbW#nZ@u{QyH&?DrYx<7Hlj&^ELyF((e)Ns> nJ$Ic_!IOo#92U$EX*wI#Txr5R1iO>Ce20I!&j+Wf>|cBV9KI)< literal 0 HcmV?d00001 From 6b6190554b282725111cc94384cbf3257716ae37 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 22 Mar 2022 11:15:48 +0000 Subject: [PATCH 070/132] Fix conversion of arrow to CH column with hint header --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ba037b0cf6e..91d276ddfe0 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -508,10 +508,7 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( const auto accept_field = [&](const auto & field_name) { - if (!hint_header) - return false; - - if (hint_header->has(field_name, ignore_case)) + if (!hint_header || hint_header->has(field_name, ignore_case)) return true; if (!ignore_case) From c5245c1ad1d411e917913184d19ac1131ee8034d Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 22 Mar 2022 12:00:20 +0000 Subject: [PATCH 071/132] Improve backward compatibility check and stress test --- docker/test/stress/run.sh | 88 ++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4af74d3ba54..747b4b64d52 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -131,9 +131,6 @@ function start() # use root to match with current uid clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log sleep 0.5 - cat /var/log/clickhouse-server/stdout.log - tail -n200 /var/log/clickhouse-server/stderr.log - tail -n200 /var/log/clickhouse-server/clickhouse-server.log counter=$((counter + 1)) done @@ -211,14 +208,12 @@ stop start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv + || echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" -# Print Fatal log messages to stdout -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* - # Grep logs for sanitizer asserts, crashes and other critical errors # Sanitizer asserts @@ -235,20 +230,26 @@ zgrep -Fa " Application: Child process was terminated by signal 9" /var/ || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors -zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \ + && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv +# Remove file logical_errors.txt if it's empty +[ -s /test_output/logical_errors.txt ] || echo rm /test_output/logical_errors.txt + # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \ + && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv +# Remove file fatal_messages.txt if it's empty +[ -s /test_output/logical_errors.txt ] || echo rm /test_output/logical_errors.txt + zgrep -Fa "########################################" /test_output/* > /dev/null \ && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv @@ -259,12 +260,12 @@ echo -e "Backward compatibility check\n" echo "Download previous release server" mkdir previous_release_package_folder -clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv +clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] then - echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv + echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv stop # Uninstall current packages @@ -290,8 +291,8 @@ then mkdir tmp_stress_output ./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv rm -rf tmp_stress_output clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" @@ -301,8 +302,9 @@ then # Start new server configure start 500 - clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt clickhouse-client --query="SELECT 'Server version: ', version()" @@ -312,10 +314,12 @@ then stop # Error messages (we should ignore some errors) + echo "Check for Error messages in server log:" zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \ -e "REPLICA_IS_ALREADY_ACTIVE" \ -e "REPLICA_IS_ALREADY_EXIST" \ + -e "ALL_REPLICAS_LOST" \ -e "DDLWorker: Cannot parse DDL task query" \ -e "RaftInstance: failed to accept a rpc connection due to error 125" \ -e "UNKNOWN_DATABASE" \ @@ -328,47 +332,53 @@ then -e "Code: 1000, e.code() = 111, Connection refused" \ -e "UNFINISHED" \ -e "Renaming unexpected part" \ - /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "" > /dev/null \ - && echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ + && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/bc_check_error_messages.txt ] || echo rm /test_output/bc_check_error_messages.txt # Sanitizer asserts zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ - && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv rm -f /test_output/tmp # OOM zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors - zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \ + && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_logical_errors.txt if it's empty + [ -s /test_output/bc_check_logical_errors.txt ] || echo rm /test_output/bc_check_logical_errors.txt # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) - zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \ + && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_fatal_messages.txt if it's empty + [ -s /test_output/bc_check_fatal_messages.txt ] || echo rm /test_output/bc_check_fatal_messages.txt else - echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv + echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv fi -zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \ - && echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv - - # Put logs into /test_output/ for log_file in /var/log/clickhouse-server/clickhouse-server.log* do From bc5a689899032c2643f7621c2506a684d0f50145 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 22 Mar 2022 15:34:20 +0100 Subject: [PATCH 072/132] add debug logging --- src/Common/ZooKeeper/ZooKeeper.cpp | 1 + tests/integration/helpers/cluster.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 5aeeaf9b4ef..118789c0ffc 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ if (secure) host_string.erase(0, strlen("secure://")); + LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString()); nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); } catch (const Poco::Net::HostNotFoundException & e) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 67d096a1f0c..400a38d8981 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -31,7 +31,7 @@ try: from confluent_kafka.avro.cached_schema_registry_client import \ CachedSchemaRegistryClient except Exception as e: - logging.warning(f"Cannot import some modules, some tests may not work:{e}") + logging.warning(f"Cannot import some modules, some tests may not work: {e}") from dict2xml import dict2xml from kazoo.client import KazooClient From 034d92d817720ec0ef692633551a873f719294bf Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 22 Mar 2022 18:06:35 +0100 Subject: [PATCH 073/132] Fix removing empty files --- docker/test/stress/run.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 747b4b64d52..0fceb6c8223 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -235,7 +235,7 @@ zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-serve || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv # Remove file logical_errors.txt if it's empty -[ -s /test_output/logical_errors.txt ] || echo rm /test_output/logical_errors.txt +[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ @@ -248,7 +248,7 @@ zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /test_ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Remove file fatal_messages.txt if it's empty -[ -s /test_output/logical_errors.txt ] || echo rm /test_output/logical_errors.txt +[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt zgrep -Fa "########################################" /test_output/* > /dev/null \ && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv @@ -337,7 +337,7 @@ then || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Remove file bc_check_error_messages.txt if it's empty - [ -s /test_output/bc_check_error_messages.txt ] || echo rm /test_output/bc_check_error_messages.txt + [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt # Sanitizer asserts zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp @@ -359,7 +359,7 @@ then || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv # Remove file bc_check_logical_errors.txt if it's empty - [ -s /test_output/bc_check_logical_errors.txt ] || echo rm /test_output/bc_check_logical_errors.txt + [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ @@ -373,7 +373,7 @@ then || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Remove file bc_check_fatal_messages.txt if it's empty - [ -s /test_output/bc_check_fatal_messages.txt ] || echo rm /test_output/bc_check_fatal_messages.txt + [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt else echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv From 763a80e405f7495ccbf033c4016a2897fba018f6 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 22 Mar 2022 18:46:59 +0100 Subject: [PATCH 074/132] Add more validations in mask operations --- src/Columns/MaskOperations.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 64d90ae82cb..53cbcfef734 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -85,9 +85,12 @@ size_t extractMaskNumericImpl( { size_t ones_count = 0; size_t data_index = 0; - size_t mask_size = mask.size(); - for (size_t i = 0; i != mask_size; ++i) + size_t mask_size = mask.size(); + size_t data_size = data.size(); + + size_t i = 0; + for (; i != mask_size && data_index != data_size; ++i) { // Change mask only where value is 1. if (!mask[i]) @@ -120,6 +123,10 @@ size_t extractMaskNumericImpl( mask[i] = value; } + + if (i != mask_size || data_index != data_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask"); + return ones_count; } From 7e904ac789700639ddd2f1371c4a2743d0bda9ec Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 22 Mar 2022 19:05:37 +0100 Subject: [PATCH 075/132] more logs --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 2 +- tests/integration/test_zookeeper_config_load_balancing/test.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 0627a70193f..d3c993344b6 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -451,7 +451,7 @@ void ZooKeeper::connect( } else { - LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id); + LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str()); } } diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index 79f21ad3eca..f48b3168325 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -11,6 +11,7 @@ node2 = cluster.add_instance('nod2', with_zookeeper=True, node3 = cluster.add_instance('nod3', with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"]) + def change_balancing(old, new, reload=True): line = '{}<' old_line = line.format(old) @@ -18,6 +19,7 @@ def change_balancing(old, new, reload=True): for node in [node1, node2, node3]: node.replace_in_config('/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml', old_line, new_line) if reload: + node.query("select '{}', '{}'".format(old, new)) node.query('system reload config') From 9bf1cb6636855d2c4cc30b2604f2d99f220b04a1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 22 Mar 2022 20:15:05 +0100 Subject: [PATCH 076/132] Fix --- src/Columns/MaskOperations.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 53cbcfef734..897ee23e1e0 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -83,6 +83,12 @@ size_t extractMaskNumericImpl( const PaddedPODArray * null_bytemap, PaddedPODArray * nulls) { + if constexpr (!column_is_short) + { + if (data.size() != mask.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask"); + } + size_t ones_count = 0; size_t data_index = 0; @@ -124,9 +130,12 @@ size_t extractMaskNumericImpl( mask[i] = value; } - if (i != mask_size || data_index != data_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask"); - + if constexpr (column_is_short) + { + if (i != mask_size || data_index != data_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask"); + } + return ones_count; } From 1a096a87da1212267b1a6d709c253ae413b54505 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 22 Mar 2022 22:44:46 +0100 Subject: [PATCH 077/132] Added support for SystemLog to specify COMMENT in ENGINE section --- src/Interpreters/SystemLog.cpp | 64 +++++++++++++++++-- .../test_system_logs_comment/__init__.py | 0 .../test_system_logs_comment/test.py | 42 ++++++++++++ 3 files changed, 101 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_system_logs_comment/__init__.py create mode 100644 tests/integration/test_system_logs_comment/test.py diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 59545d4314d..dd82ad135ed 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -43,6 +43,55 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace +{ + class StorageWithComment : public IAST + { + public: + ASTPtr storage; + ASTPtr comment; + + String getID(char) const override { return "Storage with comment definition"; } + + ASTPtr clone() const override { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method clone is not supported"); + } + + void formatImpl(const FormatSettings &, FormatState &, FormatStateStacked) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported"); + } + }; + + class ParserStorageWithComment : public IParserBase + { + protected: + const char * getName() const override { return "storage definition with comment"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ParserStorage storage_p; + ASTPtr storage; + + if (!storage_p.parse(pos, storage, expected)) + return false; + + ParserKeyword s_comment("COMMENT"); + ParserStringLiteral string_literal_parser; + ASTPtr comment; + + if (s_comment.ignore(pos, expected)) + string_literal_parser.parse(pos, comment, expected); + + auto storage_with_comment = std::make_shared(); + storage_with_comment->storage = std::move(storage); + storage_with_comment->comment = std::move(comment); + + node = storage_with_comment; + return true; + } + }; +} + namespace { @@ -102,8 +151,9 @@ std::shared_ptr createSystemLog( engine += " TTL " + ttl; engine += " ORDER BY (event_date, event_time)"; } + // Validate engine definition grammatically to prevent some configuration errors - ParserStorage storage_parser; + ParserStorageWithComment storage_parser; parseQuery(storage_parser, engine.data(), engine.data() + engine.size(), "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -450,7 +500,6 @@ void SystemLog::prepareTable() is_prepared = true; } - template ASTPtr SystemLog::getCreateTableQuery() { @@ -465,11 +514,16 @@ ASTPtr SystemLog::getCreateTableQuery() new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns)); create->set(create->columns_list, new_columns_list); - ParserStorage storage_parser; - ASTPtr storage_ast = parseQuery( + ParserStorageWithComment storage_parser; + + ASTPtr storage_with_comment_ast = parseQuery( storage_parser, storage_def.data(), storage_def.data() + storage_def.size(), "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - create->set(create->storage, storage_ast); + + StorageWithComment & storage_with_comment = storage_with_comment_ast->as(); + + create->set(create->storage, storage_with_comment.storage); + create->set(create->comment, storage_with_comment.comment); /// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs /// and recreate tables on settings changes. diff --git a/tests/integration/test_system_logs_comment/__init__.py b/tests/integration/test_system_logs_comment/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_logs_comment/test.py b/tests/integration/test_system_logs_comment/test.py new file mode 100644 index 00000000000..bfc17865296 --- /dev/null +++ b/tests/integration/test_system_logs_comment/test.py @@ -0,0 +1,42 @@ +# pylint: disable=line-too-long +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node_default', stay_alive=True) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_system_logs_comment(): + node.exec_in_container(['bash', '-c', f"""echo " + + + ENGINE = MergeTree + PARTITION BY (event_date) + ORDER BY (event_time) + TTL event_date + INTERVAL 14 DAY DELETE + SETTINGS ttl_only_drop_parts=1 + COMMENT 'test_comment' + + + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """]) + node.restart_clickhouse() + + node.query("select 1") + node.query("system flush logs") + + comment = node.query("SELECT comment FROM system.tables WHERE name = 'query_log'") + assert comment =='test_comment\n' From a03bfa0fae24738786fd56358923f216db0c6979 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 10:46:27 +0100 Subject: [PATCH 078/132] Fixed style check --- src/Interpreters/SystemLog.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index dd82ad135ed..8aa33d8af88 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -53,7 +53,8 @@ namespace String getID(char) const override { return "Storage with comment definition"; } - ASTPtr clone() const override { + ASTPtr clone() const override + { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method clone is not supported"); } From 8ef2f13524c7ddb16e09919fddae02efb32e0552 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 23 Mar 2022 11:26:31 +0100 Subject: [PATCH 079/132] Update run.sh --- docker/test/stress/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 0fceb6c8223..8f6ef09e2e5 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -248,7 +248,7 @@ zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /test_ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Remove file fatal_messages.txt if it's empty -[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt +[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt zgrep -Fa "########################################" /test_output/* > /dev/null \ && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv From 5efad9699c4dba9868b08be5c8aca54dba66b049 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 23 Mar 2022 11:28:40 +0100 Subject: [PATCH 080/132] Update run.sh --- docker/test/stress/run.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 8f6ef09e2e5..3cef5b008db 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -208,8 +208,8 @@ stop start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ - && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt + || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" @@ -303,8 +303,8 @@ then configure start 500 clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ - && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt + || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) clickhouse-client --query="SELECT 'Server version: ', version()" From c4713a9159f41a01c7f70afbd31f7d460958d98c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 23 Mar 2022 11:39:38 +0100 Subject: [PATCH 081/132] Update MaskOperations.cpp --- src/Columns/MaskOperations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 897ee23e1e0..5a9436de3b0 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -132,7 +132,7 @@ size_t extractMaskNumericImpl( if constexpr (column_is_short) { - if (i != mask_size || data_index != data_size) + if (data_index != data_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask"); } From 99cf1560dad94305e0fe7fbf11e42f29f8fe6aaf Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 23 Mar 2022 10:50:55 +0000 Subject: [PATCH 082/132] Decrease data_type_max_parse_depth a little to avoid stack overflow in coroutines --- src/DataTypes/DataTypeFactory.cpp | 2 +- .../0_stateless/01675_data_type_coroutine.reference | 1 + .../queries/0_stateless/01675_data_type_coroutine.sh | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index ce501f4333d..e1567d3a1b0 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -33,7 +33,7 @@ DataTypePtr DataTypeFactory::get(const String & full_name) const /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers) /// let's make the threshold significantly lower. /// It is impractical for user to have complex data types with this depth. - static constexpr size_t data_type_max_parse_depth = 200; + static constexpr size_t data_type_max_parse_depth = 150; ParserDataType parser; ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth); diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference index 7326d960397..541dab48def 100644 --- a/tests/queries/0_stateless/01675_data_type_coroutine.reference +++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference @@ -1 +1,2 @@ Ok +Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh index 8e80d722a4c..9ae6dadd1dc 100755 --- a/tests/queries/0_stateless/01675_data_type_coroutine.sh +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -16,3 +16,15 @@ done #echo "I = ${I}" echo 'Ok' + +counter=0 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") + ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; + ((++counter)) +done + +#echo "I = ${I}" +echo 'Ok' From 21735db126684c40fe162b0a257b7f24e9a7285e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 11:52:37 +0100 Subject: [PATCH 083/132] Fixed style check --- src/Interpreters/SystemLog.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 8aa33d8af88..3b4d665e41b 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -41,6 +41,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } namespace From e8b7f963b7c8bca178ba2565441f4b22188af773 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 22 Mar 2022 14:43:52 +0100 Subject: [PATCH 084/132] fix too strict assertion --- src/Interpreters/DDLWorker.cpp | 12 +++++++++++- src/Interpreters/DDLWorker.h | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 3e414d5b6de..4d2cdf7dd2c 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -350,6 +350,12 @@ void DDLWorker::scheduleTasks(bool reinitialized) bool maybe_concurrently_deleting = task && !zookeeper->exists(fs::path(task->entry_path) / "active"); return task && !maybe_concurrently_deleting && !maybe_currently_processing; } + else if (last_skipped_entry_name.has_value() && !queue_fully_loaded_after_initialization_debug_helper) + { + /// If connection was lost during queue loading + /// we may start processing from finished task (because we don't know yet that it's finished) and it's ok. + return false; + } else { /// Return true if entry should not be scheduled. @@ -365,7 +371,11 @@ void DDLWorker::scheduleTasks(bool reinitialized) String reason; auto task = initAndCheckTask(entry_name, reason, zookeeper); - if (!task) + if (task) + { + queue_fully_loaded_after_initialization_debug_helper = true; + } + else { LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason); updateMaxDDLEntryID(entry_name); diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index dbdf0e94f06..7cdbf880a2b 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -131,6 +131,9 @@ protected: std::optional first_failed_task_name; std::list current_tasks; + /// This flag is needed for debug assertions only + bool queue_fully_loaded_after_initialization_debug_helper = false; + Coordination::Stat queue_node_stat; std::shared_ptr queue_updated_event = std::make_shared(); std::shared_ptr cleanup_event = std::make_shared(); From 2be956da2216b7a747a50d3f0789e547e42fb602 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 23 Mar 2022 11:58:27 +0100 Subject: [PATCH 085/132] Update 01598_memory_limit_zeros.sql --- tests/queries/0_stateless/01598_memory_limit_zeros.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01598_memory_limit_zeros.sql b/tests/queries/0_stateless/01598_memory_limit_zeros.sql index 5b321687e43..cc2a75e023e 100644 --- a/tests/queries/0_stateless/01598_memory_limit_zeros.sql +++ b/tests/queries/0_stateless/01598_memory_limit_zeros.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-random-settings SET max_memory_usage = 1, max_untracked_memory = 1000000, max_threads=40; select 'test', count(*) from zeros_mt(1000000) where not ignore(zero); -- { serverError 241 } From 3c5c267c17b07f2793fa50a237708add54e34850 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 12:19:38 +0100 Subject: [PATCH 086/132] Improve performance of ASOF join --- src/Common/RadixSort.h | 5 ++ src/Interpreters/HashJoin.cpp | 8 +-- src/Interpreters/RowRefs.cpp | 98 +++++++++++++++++++++++++---------- src/Interpreters/RowRefs.h | 4 +- 4 files changed, 82 insertions(+), 33 deletions(-) diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index 944ab860355..2f02ebb9e03 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -515,6 +515,11 @@ public: radixSortLSDInternal(arr, size, false, nullptr); } + static void executeLSD(Element * arr, size_t size, bool reverse) + { + radixSortLSDInternal(arr, size, reverse, nullptr); + } + /** This function will start to sort inplace (modify 'arr') * but on the last step it will write result directly to the destination * instead of finishing sorting 'arr'. diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index c56529b3214..e81db1427ef 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1237,16 +1237,16 @@ NO_INLINE IColumn::Filter joinRightColumns( { const IColumn & left_asof_key = added_columns.leftAsofKey(); - auto [block, row_num] = mapped->findAsof(left_asof_key, i); - if (block) + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref.block) { setUsed(filter, i); if constexpr (multiple_disjuncts) - used_flags.template setUsed(block, row_num, 0); + used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); else used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*block, row_num); + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num); } else addNotFoundRow(added_columns, current_offset); diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 39fc7965eb2..0385b003f3c 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -44,38 +45,52 @@ class SortedLookupVector : public SortedLookupVectorBase { struct Entry { - /// We don't store a RowRef and instead keep it's members separately (and return a tuple) to reduce the memory usage. - /// For example, for sizeof(T) == 4 => sizeof(Entry) == 16 (while before it would be 20). Then when you put it into a vector, the effect is even greater - decltype(RowRef::block) block; - decltype(RowRef::row_num) row_num; - TKey asof_value; + TKey value; + uint32_t row_ref_index; Entry() = delete; - Entry(TKey v, const Block * b, size_t r) : block(b), row_num(r), asof_value(v) { } + Entry(TKey value_, uint32_t row_ref_index_) + : value(value_) + , row_ref_index(row_ref_index_) + { } - bool operator<(const Entry & other) const { return asof_value < other.asof_value; } + }; + + struct LessEntryOperator + { + ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const + { + return lhs.value > rhs.value; + } }; struct GreaterEntryOperator { - bool operator()(Entry const & a, Entry const & b) const { return a.asof_value > b.asof_value; } + ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const + { + return lhs.value > rhs.value; + } }; public: - using Base = std::vector; using Keys = std::vector; - static constexpr bool isDescending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals); - static constexpr bool isStrict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater); + using Entries = PaddedPODArray; + using RowRefs = PaddedPODArray; + + static constexpr bool is_descending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals); + static constexpr bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater); void insert(const IColumn & asof_column, const Block * block, size_t row_num) override { using ColumnType = ColumnVectorOrDecimal; const auto & column = assert_cast(asof_column); - TKey k = column.getElement(row_num); + TKey key = column.getElement(row_num); assert(!sorted.load(std::memory_order_acquire)); - array.emplace_back(k, block, row_num); + + entries.emplace_back(key, row_refs.size()); + row_refs.emplace_back(RowRef(block, row_num)); } /// Unrolled version of upper_bound and lower_bound @@ -84,30 +99,30 @@ public: /// at https://en.algorithmica.org/hpc/data-structures/s-tree/ size_t boundSearch(TKey value) { - size_t size = array.size(); + size_t size = entries.size(); size_t low = 0; /// This is a single binary search iteration as a macro to unroll. Takes into account the inequality: - /// isStrict -> Equal values are not requested - /// isDescending -> The vector is sorted in reverse (for greater or greaterOrEquals) + /// is_strict -> Equal values are not requested + /// is_descending -> The vector is sorted in reverse (for greater or greaterOrEquals) #define BOUND_ITERATION \ { \ size_t half = size / 2; \ size_t other_half = size - half; \ size_t probe = low + half; \ size_t other_low = low + other_half; \ - TKey v = array[probe].asof_value; \ + TKey & v = entries[probe].value; \ size = half; \ - if constexpr (isDescending) \ + if constexpr (is_descending) \ { \ - if constexpr (isStrict) \ + if constexpr (is_strict) \ low = value <= v ? other_low : low; \ else \ low = value < v ? other_low : low; \ } \ else \ { \ - if constexpr (isStrict) \ + if constexpr (is_strict) \ low = value >= v ? other_low : low; \ else \ low = value > v ? other_low : low; \ @@ -130,7 +145,7 @@ public: return low; } - std::tuple findAsof(const IColumn & asof_column, size_t row_num) override + RowRef findAsof(const IColumn & asof_column, size_t row_num) override { sort(); @@ -139,8 +154,11 @@ public: TKey k = column.getElement(row_num); size_t pos = boundSearch(k); - if (pos != array.size()) - return std::make_tuple(array[pos].block, array[pos].row_num); + if (pos != entries.size()) + { + size_t row_ref_index = entries[pos].row_ref_index; + return row_refs[row_ref_index]; + } return {nullptr, 0}; } @@ -148,7 +166,8 @@ public: private: std::atomic sorted = false; mutable std::mutex lock; - Base array; + Entries entries; + RowRefs row_refs; // Double checked locking with SC atomics works in C++ // https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ @@ -160,12 +179,37 @@ private: if (!sorted.load(std::memory_order_acquire)) { std::lock_guard l(lock); + if (!sorted.load(std::memory_order_relaxed)) { - if constexpr (isDescending) - ::sort(array.begin(), array.end(), GreaterEntryOperator()); + if constexpr (std::is_arithmetic_v && !std::is_floating_point_v) + { + if (likely(entries.size() > 256)) + { + struct RadixSortTraits : RadixSortNumTraits + { + using Element = Entry; + using Result = Element; + + static TKey & extractKey(Element & elem) { return elem.value; } + static Element extractResult(Element & elem) { return elem; } + }; + + if constexpr (is_descending) + RadixSort::executeLSD(entries.data(), entries.size(), true); + else + RadixSort::executeLSD(entries.data(), entries.size(), false); + + sorted.store(true, std::memory_order_release); + return; + } + } + + if constexpr (is_descending) + ::sort(entries.begin(), entries.end(), GreaterEntryOperator()); else - ::sort(array.begin(), array.end()); + ::sort(entries.begin(), entries.end(), LessEntryOperator()); + sorted.store(true, std::memory_order_release); } } diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 02462833050..fa5ce867613 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -146,7 +146,7 @@ private: struct SortedLookupVectorBase { SortedLookupVectorBase() = default; - virtual ~SortedLookupVectorBase() { } + virtual ~SortedLookupVectorBase() = default; static std::optional getTypeSize(const IColumn & asof_column, size_t & type_size); @@ -154,7 +154,7 @@ struct SortedLookupVectorBase virtual void insert(const IColumn &, const Block *, size_t) = 0; // This needs to be synchronized internally - virtual std::tuple findAsof(const IColumn &, size_t) = 0; + virtual RowRef findAsof(const IColumn &, size_t) = 0; }; From 6d0d7c5b20059bb99567117d92ad339f19b38f4d Mon Sep 17 00:00:00 2001 From: erikbaan <3224628+erikbaan@users.noreply.github.com> Date: Wed, 23 Mar 2022 13:12:31 +0100 Subject: [PATCH 087/132] Fix ClickHouse name typo in caches.md --- docs/en/operations/caches.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 279204a8af1..9aa6419d89c 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -5,7 +5,7 @@ toc_title: Caches # Cache Types {#cache-types} -When performing queries, ClichHouse uses different caches. +When performing queries, ClickHouse uses different caches. Main cache types: From cd058699e93581013a1d005a1a2a645f1afb109a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 23 Mar 2022 13:39:07 +0100 Subject: [PATCH 088/132] fix test --- .../test_zookeeper_config_load_balancing/test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index f48b3168325..6a49e4a4dde 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_load_balancing.xml') @@ -78,8 +79,11 @@ def test_nearest_hostname(started_cluster): def test_round_robin(started_cluster): + pm = PartitionManager() try: - started_cluster.stop_zookeeper_nodes(["zoo1"]) + pm._add_rule({"source": node1.ip_address, "destination": cluster.get_instance_ip('zoo1'), "action": 'REJECT --reject-with tcp-reset'}) + pm._add_rule({"source": node2.ip_address, "destination": cluster.get_instance_ip('zoo1'), "action": 'REJECT --reject-with tcp-reset'}) + pm._add_rule({"source": node3.ip_address, "destination": cluster.get_instance_ip('zoo1'), "action": 'REJECT --reject-with tcp-reset'}) change_balancing('random', 'round_robin') print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) @@ -92,5 +96,5 @@ def test_round_robin(started_cluster): assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() finally: - started_cluster.start_zookeeper_nodes(["zoo1"]) + pm.heal_all() change_balancing('round_robin', 'random', reload=False) From 944111183330f3e5941ce3760bca6473ff0e14c5 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Wed, 23 Mar 2022 21:24:59 +0800 Subject: [PATCH 089/132] fix INSERT INTO table FROM INFILE does not display progress bar (#35429) * fix INSERT INTO table FROM INFILE does not display progress bar * Update StorageFile.cpp Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- src/Storages/StorageFile.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 93cceadaf93..761b4ecdeb1 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -179,8 +179,9 @@ std::unique_ptr createReadBuffer( method = chooseCompressionMethod(current_path, compression_method); } - /// For clickhouse-local add progress callback to display progress bar. - if (context->getApplicationType() == Context::ApplicationType::LOCAL) + /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. + if (context->getApplicationType() == Context::ApplicationType::LOCAL + || context->getApplicationType() == Context::ApplicationType::CLIENT) { auto & in = static_cast(*nested_buffer); in.setProgressCallback(context); @@ -643,7 +644,9 @@ Pipe StorageFile::read( /// Set total number of bytes to process. For progress bar. auto progress_callback = context->getFileProgressCallback(); - if (context->getApplicationType() == Context::ApplicationType::LOCAL && progress_callback) + if ((context->getApplicationType() == Context::ApplicationType::LOCAL + || context->getApplicationType() == Context::ApplicationType::CLIENT) + && progress_callback) progress_callback(FileProgress(0, total_bytes_to_read)); for (size_t i = 0; i < num_streams; ++i) From 561ce88ff3f30537dc84bfa67579c86210b0f915 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 14:25:21 +0100 Subject: [PATCH 090/132] Fixed tests --- src/Interpreters/RowRefs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 0385b003f3c..2b791f5a189 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -60,7 +60,7 @@ class SortedLookupVector : public SortedLookupVectorBase { ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const { - return lhs.value > rhs.value; + return lhs.value < rhs.value; } }; From e790a7308121427ecb68ae957af8af9d14393be5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 23 Mar 2022 15:14:30 +0100 Subject: [PATCH 091/132] Simplify strip for new packages --- cmake/strip.sh | 28 ------------------ cmake/strip_binary.cmake | 33 ++++++++++++++++++++-- packages/clickhouse-common-static-dbg.yaml | 8 ++++-- programs/CMakeLists.txt | 9 +----- programs/keeper/CMakeLists.txt | 7 ++++- programs/library-bridge/CMakeLists.txt | 1 + programs/odbc-bridge/CMakeLists.txt | 1 + 7 files changed, 45 insertions(+), 42 deletions(-) delete mode 100755 cmake/strip.sh diff --git a/cmake/strip.sh b/cmake/strip.sh deleted file mode 100755 index f85d82fab31..00000000000 --- a/cmake/strip.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -BINARY_PATH=$1 -BINARY_NAME=$(basename "$BINARY_PATH") -DESTINATION_STRIPPED_DIR=$2 -OBJCOPY_PATH=${3:objcopy} -READELF_PATH=${4:readelf} - -BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }') -BUILD_ID_PREFIX=${BUILD_ID:0:2} -BUILD_ID_SUFFIX=${BUILD_ID:2} - -DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id" -DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin" - -mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX" -mkdir -p "$DESTINATION_STRIP_BINARY_DIR" - - -cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" - -$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" -chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" -chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" - -strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" - -$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index e430807772d..6d0b7227c54 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -11,16 +11,43 @@ macro(clickhouse_strip_binary) message(FATAL_ERROR "A binary path name must be provided for stripping binary") endif() - if (NOT DEFINED STRIP_DESTINATION_DIR) message(FATAL_ERROR "Destination directory for stripped binary must be provided") endif() add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD - COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} - COMMENT "Stripping clickhouse binary" VERBATIM + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/" + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin" + COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" + COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" + COMMAND strip --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMENT "Stripping clickhouse binary" VERBATIM ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endmacro() + + +macro(clickhouse_make_empty_debug_info_for_nfpm) + set(oneValueArgs TARGET DESTINATION_DIR) + cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN}) + + if (NOT DEFINED EMPTY_DEBUG_TARGET) + message(FATAL_ERROR "A target name must be provided for stripping binary") + endif() + + if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR) + message(FATAL_ERROR "Destination directory for empty debug must be provided") + endif() + + add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD + COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug" + COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" + COMMENT "Addiding empty debug info for NFPM" VERBATIM + ) + + install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug" COMPONENT clickhouse) +endmacro() diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index 1213f4215c8..349f9ec0c47 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -21,8 +21,12 @@ description: | This package contains the debugging symbols for clickhouse-common. contents: -- src: root/usr/lib/debug - dst: /usr/lib/debug +- src: root/usr/lib/debug/clickhouse.debug + dst: /usr/lib/debug/clickhouse.debug +- src: root/usr/lib/debug/clickhouse-odbc-bridge.debug + dst: /usr/lib/debug/clickhouse-odbc-bridge.debug +- src: root/usr/lib/debug/clickhouse-library-bridge.debug + dst: /usr/lib/debug/clickhouse-library-bridge.debug # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 1e2420021b6..cca7be97b61 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -473,18 +473,11 @@ else () if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT}) install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() endif() -if (NOT INSTALL_STRIPPED_BINARIES) - # Install dunny debug directory - # TODO: move logic to every place where clickhouse_strip_binary is used - add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty ) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty) -endif() - - if (ENABLE_TESTS) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 92bb5dc45a3..9491d503fbf 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -137,5 +137,10 @@ if (BUILD_STANDALONE_KEEPER) add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper) + else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + endif() endif() diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index aded9664b35..90ce3d8be7f 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 50a8bb629c8..b530e08ca26 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -42,6 +42,7 @@ endif() if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() From ff2d5dae83344637fdb93d4d85b01f2dd6f5adc3 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 15:22:08 +0100 Subject: [PATCH 092/132] Fix cast into IPv4, IPv6 address in IN section --- src/Functions/FunctionsConversion.h | 8 ++++++-- tests/queries/0_stateless/02243_in_ip_address.reference | 2 ++ tests/queries/0_stateless/02243_in_ip_address.sql | 9 +++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02243_in_ip_address.reference create mode 100644 tests/queries/0_stateless/02243_in_ip_address.sql diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index d1564008dfe..bb5b4137312 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -3506,9 +3506,13 @@ private: /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const { - bool convert_to_ipv6 = to_type->getCustomName() && to_type->getCustomName()->getName() == "IPv6"; + /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast + bool safe_convert_into_custom_type = true; - if (from_type->equals(*to_type) && !convert_to_ipv6) + if (const auto * to_type_custom_name = to_type->getCustomName()) + safe_convert_into_custom_type = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); + + if (from_type->equals(*to_type) && safe_convert_into_custom_type) { if (isUInt8(from_type)) return createUInt8ToUInt8Wrapper(from_type, to_type); diff --git a/tests/queries/0_stateless/02243_in_ip_address.reference b/tests/queries/0_stateless/02243_in_ip_address.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02243_in_ip_address.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02243_in_ip_address.sql b/tests/queries/0_stateless/02243_in_ip_address.sql new file mode 100644 index 00000000000..a2c8c37e585 --- /dev/null +++ b/tests/queries/0_stateless/02243_in_ip_address.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (id UInt64, value_ipv4 IPv4, value_ipv6 IPv6) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, '127.0.0.1', '127.0.0.1'); + +SELECT id FROM test_table WHERE value_ipv4 IN (SELECT value_ipv4 FROM test_table); +SELECT id FROM test_table WHERE value_ipv6 IN (SELECT value_ipv6 FROM test_table); + +DROP TABLE test_table; From bd37e2ace17595975989848b8a2101a7427abfee Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 23 Mar 2022 15:26:22 +0100 Subject: [PATCH 093/132] Add workflow dispatch --- .github/workflows/nightly.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a172947b2fc..5b47f94a324 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -7,6 +7,7 @@ env: "on": schedule: - cron: '13 3 * * *' + workflow_dispatch: jobs: DockerHubPushAarch64: From df0d3c93040c1cfb7896b3fb8c80b15c83f8a7e0 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 16:11:46 +0100 Subject: [PATCH 094/132] Fix parsing of IPv6 addresses longer than 39 characters --- src/Common/formatIPv6.h | 2 +- .../0_stateless/02243_ipv6_long_parsing.reference | 3 +++ tests/queries/0_stateless/02243_ipv6_long_parsing.sql | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02243_ipv6_long_parsing.reference create mode 100644 tests/queries/0_stateless/02243_ipv6_long_parsing.sql diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index 1a65adae55b..d6efeed17e6 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -11,7 +11,7 @@ constexpr size_t IPV4_BINARY_LENGTH = 4; constexpr size_t IPV6_BINARY_LENGTH = 16; constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. -constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; +constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte. namespace DB { diff --git a/tests/queries/0_stateless/02243_ipv6_long_parsing.reference b/tests/queries/0_stateless/02243_ipv6_long_parsing.reference new file mode 100644 index 00000000000..c09bfebe9d5 --- /dev/null +++ b/tests/queries/0_stateless/02243_ipv6_long_parsing.reference @@ -0,0 +1,3 @@ +0 ::ffff:1.12.12.12 +1 ::ffff:123.123.123.123 +2 ::ffff:192.168.100.228 diff --git a/tests/queries/0_stateless/02243_ipv6_long_parsing.sql b/tests/queries/0_stateless/02243_ipv6_long_parsing.sql new file mode 100644 index 00000000000..25225ee0fa8 --- /dev/null +++ b/tests/queries/0_stateless/02243_ipv6_long_parsing.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (id UInt64, value IPv6) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, '0000:0000:0000:0000:0000:ffff:1.12.12.12'); +INSERT INTO test_table VALUES (1, '0000:0000:0000:0000:0000:ffff:123.123.123.123'); +INSERT INTO test_table VALUES (2, '0000:0000:0000:0000:0000:ffff:192.168.100.228'); + +SELECT * FROM test_table ORDER BY id; + +DROP TABLE test_table; From ab7923a26ccd22736c7a389b34c8b90ad1ea6fe7 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 23 Mar 2022 23:20:21 +0800 Subject: [PATCH 095/132] Remove comments --- src/Interpreters/InterpreterSelectQuery.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index b3d1210e6df..569c45c41ba 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1241,10 +1241,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries() && !subqueries_for_sets.empty()) - // executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } if (expressions.second_stage || from_aggregation_stage) @@ -1427,7 +1423,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries())) if (!subqueries_for_sets.empty()) executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } From 052057f2ef4226adc606366340c94ce0c0ff2715 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 23 Mar 2022 15:02:19 +0000 Subject: [PATCH 096/132] Address PR comments --- src/Common/StringUtils/StringUtils.h | 9 - src/Core/Block.cpp | 5 +- src/DataTypes/NestedUtils.cpp | 9 +- src/DataTypes/NestedUtils.h | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 164 +++++++----------- .../Formats/Impl/ArrowColumnToCHColumn.h | 8 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 73 ++++---- .../Formats/Impl/ORCBlockInputFormat.h | 2 - .../Formats/Impl/ParquetBlockInputFormat.cpp | 86 ++++----- ...e_insensitive_column_matching.parquet.json | 0 10 files changed, 148 insertions(+), 210 deletions(-) delete mode 100644 tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.json diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h index e1a753e816d..21df0f5ae8b 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils/StringUtils.h @@ -240,15 +240,6 @@ inline bool equalsCaseInsensitive(char a, char b) return a == b || (isAlphaASCII(a) && alternateCaseIfAlphaASCII(a) == b); } -inline bool equalsCaseInsensitive(const std::string_view a, const std::string_view b) -{ - if (a.length() != b.length()) - return false; - - return std::equal( - a.begin(), a.end(), b.begin(), [](const auto first, const auto second) { return equalsCaseInsensitive(first, second); }); -} - template std::string trim(const std::string & str, F && predicate) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 306f99d7c24..a7142ef7f2e 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -13,6 +13,7 @@ #include #include +#include namespace DB @@ -273,7 +274,7 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool c { if (case_insensitive) { - auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return equalsCaseInsensitive(column.name, name); }); + auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); }); if (found == data.end()) { return nullptr; @@ -304,7 +305,7 @@ const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool ca bool Block::has(const std::string & name, bool case_insensitive) const { if (case_insensitive) - return std::find_if(data.begin(), data.end(), [&](const auto & column) { return equalsCaseInsensitive(column.name, name); }) + return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); }) != data.end(); return index_by_name.end() != index_by_name.find(name); diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index df504bc34a8..cfacdd252e2 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -15,6 +15,8 @@ #include +#include + namespace DB { @@ -227,12 +229,15 @@ void validateArraySizes(const Block & block) } -std::unordered_set getAllTableNames(const Block & block) +std::unordered_set getAllTableNames(const Block & block, bool to_lower_case) { std::unordered_set nested_table_names; - for (auto & name : block.getNames()) + for (const auto & name : block.getNames()) { auto nested_table_name = Nested::extractTableName(name); + if (to_lower_case) + boost::to_lower(nested_table_name); + if (!nested_table_name.empty()) nested_table_names.insert(nested_table_name); } diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 2ca5c17dc74..f6dc42d5c58 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -32,7 +32,7 @@ namespace Nested void validateArraySizes(const Block & block); /// Get all nested tables names from a block. - std::unordered_set getAllTableNames(const Block & block); + std::unordered_set getAllTableNames(const Block & block, bool to_lower_case = false); } } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 91d276ddfe0..0a72e561e4e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1,44 +1,40 @@ #include "ArrowColumnToCHColumn.h" -#include -#include -#include "Common/StringUtils/StringUtils.h" - #if USE_ARROW || USE_ORC || USE_PARQUET -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. -# define FOR_ARROW_NUMERIC_TYPES(M) \ +#define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ M(arrow::Type::INT8, DB::Int8) \ M(arrow::Type::INT16, DB::Int16) \ @@ -49,7 +45,7 @@ M(arrow::Type::FLOAT, DB::Float32) \ M(arrow::Type::DOUBLE, DB::Float64) -# define FOR_ARROW_INDEXES_TYPES(M) \ +#define FOR_ARROW_INDEXES_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ M(arrow::Type::INT8, DB::UInt8) \ M(arrow::Type::UINT16, DB::UInt16) \ @@ -73,7 +69,6 @@ namespace ErrorCodes extern const int INCORRECT_NUMBER_OF_COLUMNS; } - /// Inserts numeric data right into internal column data to reduce an overhead template > static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr & arrow_column, const String & column_name) @@ -181,12 +176,8 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr(chunk.Value(value_i)); if (days_num > DATE_LUT_MAX_EXTEND_DAY_NUM) - throw Exception( - ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, - "Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", - days_num, - column_name, - DATE_LUT_MAX_DAY_NUM); + throw Exception{ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, + "Input value {} of a column \"{}\" is greater than max allowed Date value, which is {}", days_num, column_name, DATE_LUT_MAX_DAY_NUM}; column_data.emplace_back(days_num); } @@ -235,8 +226,7 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr -static ColumnWithTypeAndName -readColumnWithDecimalDataImpl(std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) +static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) { auto internal_column = internal_type->createColumn(); auto & column = assert_cast &>(*internal_column); @@ -248,8 +238,7 @@ readColumnWithDecimalDataImpl(std::shared_ptr & arrow_colum auto & chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); for (size_t value_i = 0, length = static_cast(chunk.length()); value_i < length; ++value_i) { - column_data.emplace_back( - chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast(chunk.Value(value_i))); // TODO: copy column + column_data.emplace_back(chunk.IsNull(value_i) ? DecimalType(0) : *reinterpret_cast(chunk.Value(value_i))); // TODO: copy column } } return {std::move(internal_column), internal_type, column_name}; @@ -310,9 +299,10 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr switch (arrow_column->type()->id()) { # define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \ - case ARROW_NUMERIC_TYPE: { \ - return readColumnWithNumericData(arrow_column, "").column; \ - } + case ARROW_NUMERIC_TYPE: \ + { \ + return readColumnWithNumericData(arrow_column, "").column; \ + } FOR_ARROW_INDEXES_TYPES(DISPATCH) # undef DISPATCH default: @@ -366,13 +356,15 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( // ClickHouse writes Date as arrow UINT16 and DateTime as arrow UINT32, // so, read UINT16 as Date and UINT32 as DateTime to perform correct conversion // between Date and DateTime further. - case arrow::Type::UINT16: { + case arrow::Type::UINT16: + { auto column = readColumnWithNumericData(arrow_column, column_name); if (read_ints_as_dates) column.type = std::make_shared(); return column; } - case arrow::Type::UINT32: { + case arrow::Type::UINT32: + { auto column = readColumnWithNumericData(arrow_column, column_name); if (read_ints_as_dates) column.type = std::make_shared(); @@ -384,10 +376,10 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( return readColumnWithDecimalData(arrow_column, column_name); case arrow::Type::DECIMAL256: return readColumnWithDecimalData(arrow_column, column_name); - case arrow::Type::MAP: { + case arrow::Type::MAP: + { auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column - = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); const auto * tuple_column = assert_cast(nested_column.column.get()); @@ -396,16 +388,17 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto map_type = std::make_shared(tuple_type->getElements()[0], tuple_type->getElements()[1]); return {std::move(map_column), std::move(map_type), column_name}; } - case arrow::Type::LIST: { + case arrow::Type::LIST: + { auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column - = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); auto array_column = ColumnArray::create(nested_column.column, offsets_column); auto array_type = std::make_shared(nested_column.type); return {std::move(array_column), std::move(array_type), column_name}; } - case arrow::Type::STRUCT: { + case arrow::Type::STRUCT: + { auto arrow_type = arrow_column->type(); auto * arrow_struct_type = assert_cast(arrow_type.get()); std::vector nested_arrow_columns(arrow_struct_type->num_fields()); @@ -423,8 +416,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( for (int i = 0; i != arrow_struct_type->num_fields(); ++i) { auto nested_arrow_column = std::make_shared(nested_arrow_columns[i]); - auto element = readColumnFromArrowColumn( - nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates); + auto element = readColumnFromArrowColumn(nested_arrow_column, arrow_struct_type->field(i)->name(), format_name, false, dictionary_values, read_ints_as_dates); tuple_elements.emplace_back(std::move(element.column)); tuple_types.emplace_back(std::move(element.type)); tuple_names.emplace_back(std::move(element.name)); @@ -434,7 +426,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto tuple_type = std::make_shared(std::move(tuple_types), std::move(tuple_names)); return {std::move(tuple_column), std::move(tuple_type), column_name}; } - case arrow::Type::DICTIONARY: { + case arrow::Type::DICTIONARY: + { auto & dict_values = dictionary_values[column_name]; /// Load dictionary values only once and reuse it. if (!dict_values) @@ -446,14 +439,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( dict_array.emplace_back(dict_chunk.dictionary()); } auto arrow_dict_column = std::make_shared(dict_array); - auto dict_column - = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); + auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); /// We should convert read column to ColumnUnique. auto tmp_lc_column = DataTypeLowCardinality(dict_column.type).createColumn(); auto tmp_dict_column = IColumn::mutate(assert_cast(tmp_lc_column.get())->getDictionaryPtr()); - static_cast(tmp_dict_column.get()) - ->uniqueInsertRangeFrom(*dict_column.column, 0, dict_column.column->size()); + static_cast(tmp_dict_column.get())->uniqueInsertRangeFrom(*dict_column.column, 0, dict_column.column->size()); dict_column.column = std::move(tmp_dict_column); dict_values = std::make_shared(std::move(dict_column)); } @@ -474,17 +465,13 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( # define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \ case ARROW_NUMERIC_TYPE: \ return readColumnWithNumericData(arrow_column, column_name); - FOR_ARROW_NUMERIC_TYPES(DISPATCH) + FOR_ARROW_NUMERIC_TYPES(DISPATCH) # undef DISPATCH // TODO: read JSON as a string? // TODO: read UUID as a string? default: - throw Exception( - ErrorCodes::UNKNOWN_TYPE, - "Unsupported {} type '{}' of an input column '{}'.", - format_name, - arrow_column->type()->name(), - column_name); + throw Exception(ErrorCodes::UNKNOWN_TYPE, + "Unsupported {} type '{}' of an input column '{}'.", format_name, arrow_column->type()->name(), column_name); } } @@ -504,26 +491,12 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( ColumnsWithTypeAndName sample_columns; std::unordered_set nested_table_names; if (hint_header) - nested_table_names = Nested::getAllTableNames(*hint_header); - - const auto accept_field = [&](const auto & field_name) - { - if (!hint_header || hint_header->has(field_name, ignore_case)) - return true; - - if (!ignore_case) - return nested_table_names.contains(field_name); - - return std::find_if( - nested_table_names.begin(), - nested_table_names.end(), - [&](const auto & nested_table_name) { return equalsCaseInsensitive(nested_table_name, field_name); }) - != nested_table_names.end(); - }; + nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case); for (const auto & field : schema.fields()) { - if (!accept_field(field->name())) + if (hint_header && !hint_header->has(field->name(), ignore_case) + && !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name())) continue; /// Create empty arrow column by it's type and convert it to ClickHouse column. @@ -539,8 +512,7 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( arrow::ArrayVector array_vector = {arrow_array}; auto arrow_column = std::make_shared(array_vector); std::unordered_map> dict_values; - ColumnWithTypeAndName sample_column - = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false); + ColumnWithTypeAndName sample_column = readColumnFromArrowColumn(arrow_column, field->name(), format_name, false, dict_values, false); sample_columns.emplace_back(std::move(sample_column)); } @@ -571,9 +543,7 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.rows()); @@ -594,17 +565,14 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & auto search_column_name = header_column.name; if (case_insensitive_matching) - { boost::to_lower(search_column_name); - } bool read_from_nested = false; String nested_table_name = Nested::extractTableName(header_column.name); String search_nested_table_name = nested_table_name; if (case_insensitive_matching) - { boost::to_lower(search_nested_table_name); - } + if (!name_to_column_ptr.contains(search_column_name)) { /// Check if it's a column from nested table. diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index ff99d2b2f11..0a712326941 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -4,10 +4,10 @@ #if USE_ARROW || USE_ORC || USE_PARQUET -# include -# include -# include -# include +#include +#include +#include +#include namespace DB diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 1eab922c397..c68b59833db 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -1,14 +1,14 @@ #include "ORCBlockInputFormat.h" -#include "Common/StringUtils/StringUtils.h" +#include #if USE_ORC -# include -# include -# include -# include -# include -# include "ArrowBufferedStreams.h" -# include "ArrowColumnToCHColumn.h" +#include +#include +#include +#include +#include "ArrowBufferedStreams.h" +#include "ArrowColumnToCHColumn.h" +#include namespace DB { @@ -138,9 +138,10 @@ void ORCBlockInputFormat::prepareReader() format_settings.orc.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); + const bool ignore_case = format_settings.orc.case_insensitive_column_matching; std::unordered_set nested_table_names; if (format_settings.orc.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader()); + nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); /// In ReadStripe column indices should be started from 1, /// because 0 indicates to select all columns. @@ -151,29 +152,8 @@ void ORCBlockInputFormat::prepareReader() /// so we should recursively count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - const bool contains_column = std::invoke( - [&] - { - if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) - { - return true; - } - - if (!format_settings.parquet.case_insensitive_column_matching) - { - return nested_table_names.contains(name); - } - - return std::find_if( - nested_table_names.begin(), - nested_table_names.end(), - [&](const auto & nested_table_name) { return equalsCaseInsensitive(nested_table_name, name); }) - != nested_table_names.end(); - }); - - if (contains_column) + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { - column_names.push_back(name); for (int j = 0; j != indexes_count; ++j) include_indices.push_back(index + j); } @@ -200,9 +180,14 @@ NamesAndTypesList ORCSchemaReader::readSchema() void registerInputFormatORC(FormatFactory & factory) { factory.registerInputFormat( - "ORC", - [](ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, const FormatSettings & settings) - { return std::make_shared(buf, sample, settings); }); + "ORC", + [](ReadBuffer &buf, + const Block &sample, + const RowInputFormatParams &, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, settings); + }); factory.markFormatAsColumnOriented("ORC"); } @@ -210,7 +195,11 @@ void registerORCSchemaReader(FormatFactory & factory) { factory.registerSchemaReader( "ORC", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) { return std::make_shared(buf, settings); }); + [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + { + return std::make_shared(buf, settings); + } + ); } } @@ -218,14 +207,14 @@ void registerORCSchemaReader(FormatFactory & factory) namespace DB { -class FormatFactory; -void registerInputFormatORC(FormatFactory &) -{ -} + class FormatFactory; + void registerInputFormatORC(FormatFactory &) + { + } -void registerORCSchemaReader(FormatFactory &) -{ -} + void registerORCSchemaReader(FormatFactory &) + { + } } #endif diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index bb136d02d6e..b7a771730ea 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -45,8 +45,6 @@ private: std::unique_ptr arrow_column_to_ch_column; - std::vector column_names; - // indices of columns to read from ORC file std::vector include_indices; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index c2e3c71d671..13582ce5019 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -1,21 +1,19 @@ #include "ParquetBlockInputFormat.h" -#include "Common/StringUtils/StringUtils.h" +#include + #if USE_PARQUET -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include "ArrowBufferedStreams.h" -# include "ArrowColumnToCHColumn.h" - -# include - +#include +#include +#include +#include +#include +#include +#include +#include +#include "ArrowBufferedStreams.h" +#include "ArrowColumnToCHColumn.h" +#include namespace DB { @@ -26,12 +24,12 @@ namespace ErrorCodes extern const int CANNOT_READ_ALL_DATA; } -# define THROW_ARROW_NOT_OK(status) \ - do \ - { \ - if (::arrow::Status _s = (status); !_s.ok()) \ - throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ - } while (false) +#define THROW_ARROW_NOT_OK(status) \ + do \ + { \ + if (::arrow::Status _s = (status); !_s.ok()) \ + throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ + } while (false) ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_) : IInputFormat(std::move(header_), in_), format_settings(format_settings_) @@ -140,9 +138,10 @@ void ParquetBlockInputFormat::prepareReader() format_settings.parquet.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); + const bool ignore_case = format_settings.parquet.case_insensitive_column_matching; std::unordered_set nested_table_names; if (format_settings.parquet.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader()); + nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); int index = 0; for (int i = 0; i < schema->num_fields(); ++i) @@ -153,27 +152,7 @@ void ParquetBlockInputFormat::prepareReader() int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - const bool contains_column = std::invoke( - [&] - { - if (getPort().getHeader().has(name, format_settings.parquet.case_insensitive_column_matching)) - { - return true; - } - - if (!format_settings.parquet.case_insensitive_column_matching) - { - return nested_table_names.contains(name); - } - - return std::find_if( - nested_table_names.begin(), - nested_table_names.end(), - [&](const auto & nested_table_name) { return equalsCaseInsensitive(nested_table_name, name); }) - != nested_table_names.end(); - }); - - if (contains_column) + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { for (int j = 0; j != indexes_count; ++j) column_indices.push_back(index + j); @@ -201,9 +180,14 @@ NamesAndTypesList ParquetSchemaReader::readSchema() void registerInputFormatParquet(FormatFactory & factory) { factory.registerInputFormat( - "Parquet", - [](ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, const FormatSettings & settings) - { return std::make_shared(buf, sample, settings); }); + "Parquet", + [](ReadBuffer &buf, + const Block &sample, + const RowInputFormatParams &, + const FormatSettings & settings) + { + return std::make_shared(buf, sample, settings); + }); factory.markFormatAsColumnOriented("Parquet"); } @@ -211,7 +195,11 @@ void registerParquetSchemaReader(FormatFactory & factory) { factory.registerSchemaReader( "Parquet", - [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) { return std::make_shared(buf, settings); }); + [](ReadBuffer & buf, const FormatSettings & settings, ContextPtr) + { + return std::make_shared(buf, settings); + } + ); } } @@ -225,9 +213,7 @@ void registerInputFormatParquet(FormatFactory &) { } -void registerParquetSchemaReader(FormatFactory &) -{ -} +void registerParquetSchemaReader(FormatFactory &) {} } #endif diff --git a/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.json b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.json deleted file mode 100644 index e69de29bb2d..00000000000 From 8561c366be577be25259260f7253b4fe3c1716be Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 23 Mar 2022 16:19:59 +0000 Subject: [PATCH 097/132] Move nested table name --- src/DataTypes/NestedUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index cfacdd252e2..8f5e40de5b8 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -239,7 +239,7 @@ std::unordered_set getAllTableNames(const Block & block, bool to_lower_c boost::to_lower(nested_table_name); if (!nested_table_name.empty()) - nested_table_names.insert(nested_table_name); + nested_table_names.insert(std::move(nested_table_name)); } return nested_table_names; } From bf9c6be6c64e043b5912b89862e6878c74e43880 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Wed, 23 Mar 2022 17:57:19 +0100 Subject: [PATCH 098/132] move threadPoolCallbackRunner from Common into Interpreters folder to fix split build issue --- src/Disks/S3/DiskS3.cpp | 2 +- src/{Common => Interpreters}/threadPoolCallbackRunner.cpp | 0 src/{Common => Interpreters}/threadPoolCallbackRunner.h | 0 src/Storages/StorageS3.cpp | 3 ++- 4 files changed, 3 insertions(+), 2 deletions(-) rename src/{Common => Interpreters}/threadPoolCallbackRunner.cpp (100%) rename src/{Common => Interpreters}/threadPoolCallbackRunner.h (100%) diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index c36191bf3d4..e05ccef74c0 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -18,9 +18,9 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/src/Common/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp similarity index 100% rename from src/Common/threadPoolCallbackRunner.cpp rename to src/Interpreters/threadPoolCallbackRunner.cpp diff --git a/src/Common/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h similarity index 100% rename from src/Common/threadPoolCallbackRunner.h rename to src/Interpreters/threadPoolCallbackRunner.h diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index a09e02da2a2..ce31308fdd7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -4,7 +4,6 @@ #if USE_AWS_S3 #include -#include #include @@ -13,6 +12,7 @@ #include #include #include +#include #include #include @@ -21,6 +21,7 @@ #include #include #include +#include #include #include From 2772f3181a3044acba9d7e523da7c3a949e51139 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 23 Mar 2022 13:28:17 -0400 Subject: [PATCH 099/132] non-constant parameter tests added, some tests fixed --- .../02207_subseconds_intervals.reference | 42 ++++++++++------- .../02207_subseconds_intervals.sql | 47 ++++++++++++------- 2 files changed, 53 insertions(+), 36 deletions(-) diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference index 32b7d2a9446..f7b91ff48b8 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.reference +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -2,6 +2,7 @@ test intervals - test nanoseconds 1980-12-12 12:12:12.123456789 1980-12-12 12:12:12.123456700 +1980-12-12 12:12:12.123456789 1930-12-12 12:12:12.123456789 1930-12-12 12:12:12.123456700 2220-12-12 12:12:12.123456789 @@ -10,6 +11,7 @@ test intervals 1980-12-12 12:12:12.123456 1980-12-12 12:12:12.123400 1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123456 1930-12-12 12:12:12.123456 1930-12-12 12:12:12.123400 1930-12-12 12:12:12.123457 @@ -20,6 +22,7 @@ test intervals 1980-12-12 12:12:12.123 1980-12-12 12:12:12.120 1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123 1930-12-12 12:12:12.123 1930-12-12 12:12:12.120 1930-12-12 12:12:12.124 @@ -30,27 +33,30 @@ test add[...]seconds() - test nanoseconds 1980-12-12 12:12:12.123456790 1980-12-12 12:12:12.123456701 +1980-12-12 12:12:12.123456790 1930-12-12 12:12:12.123456788 1930-12-12 12:12:12.123456699 2220-12-12 12:12:12.123456790 2220-12-12 12:12:12.123456701 - test microseconds -1980-12-12 12:12:12.123456001 -1980-12-12 12:12:12.123400001 -1980-12-12 12:12:12.123456781 -1930-12-12 12:12:12.123455999 -1930-12-12 12:12:12.123399999 -1930-12-12 12:12:12.123456779 -2220-12-12 12:12:12.123456001 -2220-12-12 12:12:12.123400001 -2220-12-12 12:12:12.123456781 +1980-12-12 12:12:12.123457 +1980-12-12 12:12:12.123401 +1980-12-12 12:12:12.12345778 +1980-12-12 12:12:12.123457 +1930-12-12 12:12:12.123455 +1930-12-12 12:12:12.123399 +1930-12-12 12:12:12.12345578 +2220-12-12 12:12:12.123457 +2220-12-12 12:12:12.123401 +2220-12-12 12:12:12.12345778 - test milliseconds -1980-12-12 12:12:12.123000001 -1980-12-12 12:12:12.120000001 -1980-12-12 12:12:12.123456001 -1930-12-12 12:12:12.122999999 -1930-12-12 12:12:12.119999999 -1930-12-12 12:12:12.123455999 -2220-12-12 12:12:12.123000001 -2220-12-12 12:12:12.120000001 -2220-12-12 12:12:12.123456001 +1980-12-12 12:12:12.124 +1980-12-12 12:12:12.121 +1980-12-12 12:12:12.124456 +1980-12-12 12:12:12.124 +1930-12-12 12:12:12.122 +1930-12-12 12:12:12.119 +1930-12-12 12:12:12.122456 +2220-12-12 12:12:12.124 +2220-12-12 12:12:12.121 +2220-12-12 12:12:12.124456 diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.sql b/tests/queries/0_stateless/02207_subseconds_intervals.sql index 57b2445d9d1..a7ce03d9330 100644 --- a/tests/queries/0_stateless/02207_subseconds_intervals.sql +++ b/tests/queries/0_stateless/02207_subseconds_intervals.sql @@ -4,6 +4,8 @@ SELECT '- test nanoseconds'; select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- In normal range, source scale matches result select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- In normal range, source scale less than result +select toStartOfInterval(a, INTERVAL 1 NANOSECOND) from ( select toDateTime64('1980-12-12 12:12:12.123456789', 9) AS a ); -- Non-constant argument + select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- Below normal range, source scale matches result select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- Below normal range, source scale less than result @@ -16,6 +18,7 @@ select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- In normal range, source scale less than result select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- In normal range, source scale greater than result +select toStartOfInterval(a, INTERVAL 1 MICROSECOND) from ( select toDateTime64('1980-12-12 12:12:12.12345678', 8) AS a ); -- Non-constant argument select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- Below normal range, source scale matches result select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- Below normal range, source scale less than result @@ -32,6 +35,8 @@ select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123', 3), INTERVAL 1 select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- In normal range, source scale less than result select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- In normal range, source scale greater than result +select toStartOfInterval(a, INTERVAL 1 MILLISECOND) from ( select toDateTime64('1980-12-12 12:12:12.123456', 6) AS a ); -- Non-constant argument + select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- Below normal range, source scale matches result select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- Below normal range, source scale less than result select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- Below normal range, source scale greater than result @@ -48,6 +53,8 @@ SELECT '- test nanoseconds'; select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456789', 9), 1); -- In normal range, source scale matches result select addNanoseconds(toDateTime64('1980-12-12 12:12:12.1234567', 7), 1); -- In normal range, source scale less than result +select addNanoseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123456789', 9) AS a ); -- Non-constant argument + select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456789', 9), 1); -- Below normal range, source scale matches result select addNanoseconds(toDateTime64('1930-12-12 12:12:12.1234567', 7), 1); -- Below normal range, source scale less than result @@ -56,28 +63,32 @@ select addNanoseconds(toDateTime64('2220-12-12 12:12:12.1234567', 7), 1); -- Abo SELECT '- test microseconds'; -select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale matches result -select addNanoseconds(toDateTime64('1980-12-12 12:12:12.1234', 4), 1); -- In normal range, source scale less than result -select addNanoseconds(toDateTime64('1980-12-12 12:12:12.12345678', 8), 1); -- In normal range, source scale greater than result +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale matches result +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.1234', 4), 1); -- In normal range, source scale less than result +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.12345678', 8), 1); -- In normal range, source scale greater than result -select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale matches result -select addNanoseconds(toDateTime64('1930-12-12 12:12:12.1234', 4), 1); -- Below normal range, source scale less than result -select addNanoseconds(toDateTime64('1930-12-12 12:12:12.12345678', 8), 1); -- Below normal range, source scale greater than result +select addMicroseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123456', 6) AS a ); -- Non-constant argument -select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale matches result -select addNanoseconds(toDateTime64('2220-12-12 12:12:12.1234', 4), 1); -- Above normal range, source scale less than result -select addNanoseconds(toDateTime64('2220-12-12 12:12:12.12345678', 8), 1); -- Above normal range, source scale greater than result +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale matches result +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.1234', 4), 1); -- Below normal range, source scale less than result +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.12345678', 8), 1); -- Below normal range, source scale greater than result + +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale matches result +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.1234', 4), 1); -- Above normal range, source scale less than result +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.12345678', 8), 1); -- Above normal range, source scale greater than result SELECT '- test milliseconds'; -select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123', 3), 1); -- In normal range, source scale matches result -select addNanoseconds(toDateTime64('1980-12-12 12:12:12.12', 2), 1); -- In normal range, source scale less than result -select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale greater than result +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.123', 3), 1); -- In normal range, source scale matches result +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.12', 2), 1); -- In normal range, source scale less than result +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale greater than result -select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123', 3), 1); -- Below normal range, source scale matches result -select addNanoseconds(toDateTime64('1930-12-12 12:12:12.12', 2), 1); -- Below normal range, source scale less than result -select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale greater than result +select addMilliseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123', 3) AS a ); -- Non-constant argument -select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123', 3), 1); -- Above normal range, source scale matches result -select addNanoseconds(toDateTime64('2220-12-12 12:12:12.12', 2), 1); -- Above normal range, source scale less than result -select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale greater than result +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.123', 3), 1); -- Below normal range, source scale matches result +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.12', 2), 1); -- Below normal range, source scale less than result +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale greater than result + +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123', 3), 1); -- Above normal range, source scale matches result +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.12', 2), 1); -- Above normal range, source scale less than result +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale greater than result From 9e6f0ae9f24da27a7bbe8514f0d15af352d5ce4b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 23 Mar 2022 18:44:09 +0100 Subject: [PATCH 100/132] Fix strip path --- cmake/strip_binary.cmake | 2 +- cmake/tools.cmake | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index 6d0b7227c54..cbfd8a95c33 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -21,7 +21,7 @@ macro(clickhouse_strip_binary) COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" - COMMAND strip --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM ) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index d6fddd0509e..d571a46ad26 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -170,32 +170,32 @@ else () message (FATAL_ERROR "Cannot find objcopy.") endif () -# Readelf (FIXME copypaste) +# Strip (FIXME copypaste) if (COMPILER_GCC) - find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip") else () - find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () -if (NOT READELF_PATH AND OS_DARWIN) +if (NOT STRIP_PATH AND OS_DARWIN) find_program (BREW_PATH NAMES "brew") if (BREW_PATH) execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) if (LLVM_PREFIX) - find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) endif () - if (NOT READELF_PATH) + if (NOT STRIP_PATH) execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) if (BINUTILS_PREFIX) - find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) endif () endif () endif () endif () -if (READELF_PATH) - message (STATUS "Using readelf: ${READELF_PATH}") +if (STRIP_PATH) + message (STATUS "Using strip: ${STRIP_PATH}") else () - message (FATAL_ERROR "Cannot find readelf.") + message (FATAL_ERROR "Cannot find strip.") endif () From 31b3d2e8406ad8410a881567d0fca662afe203e7 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 19:59:26 +0100 Subject: [PATCH 101/132] Fixed tests --- src/Functions/FunctionsConversion.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index bb5b4137312..1ed9d6c5dd0 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2703,13 +2703,10 @@ private: return createWrapper(from_type, to_type, requested_result_is_nullable); } - WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const + WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const { return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr { - if (isBool(from_type) || !isBool(to_type)) - return arguments.front().column; - /// Special case when we convert UInt8 column to Bool column. /// both columns have type UInt8, but we shouldn't use identity wrapper, /// because Bool column can contain only 0 and 1. @@ -3506,19 +3503,19 @@ private: /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const { + if (isUInt8(from_type) && isBool(to_type)) + return createUInt8ToBoolWrapper(from_type, to_type); + /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast - bool safe_convert_into_custom_type = true; + bool safe_convert_custom_types = true; if (const auto * to_type_custom_name = to_type->getCustomName()) - safe_convert_into_custom_type = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); - - if (from_type->equals(*to_type) && safe_convert_into_custom_type) - { - if (isUInt8(from_type)) - return createUInt8ToUInt8Wrapper(from_type, to_type); + safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); + else if (const auto * from_type_custom_name = from_type->getCustomName()) + safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); + if (from_type->equals(*to_type) && safe_convert_custom_types) return createIdentityWrapper(from_type); - } else if (WhichDataType(from_type).isNothing()) return createNothingWrapper(to_type.get()); From 3df6a863874bffd9337933c1e9363f7c224880b2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 20:05:17 +0100 Subject: [PATCH 102/132] ExternalModelsLoader refactoring --- src/Interpreters/CatBoostModel.cpp | 198 ++++++++------------ src/Interpreters/CatBoostModel.h | 50 ++--- src/Interpreters/ExternalModelsLoader.h | 4 +- src/Storages/System/StorageSystemModels.cpp | 2 +- 4 files changed, 102 insertions(+), 152 deletions(-) diff --git a/src/Interpreters/CatBoostModel.cpp b/src/Interpreters/CatBoostModel.cpp index 1b6e30a0959..cffaa81c4f0 100644 --- a/src/Interpreters/CatBoostModel.cpp +++ b/src/Interpreters/CatBoostModel.cpp @@ -26,10 +26,10 @@ extern const int CANNOT_LOAD_CATBOOST_MODEL; extern const int CANNOT_APPLY_CATBOOST_MODEL; } - /// CatBoost wrapper interface functions. -struct CatBoostWrapperAPI +class CatBoostWrapperAPI { +public: using ModelCalcerHandle = void; ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT @@ -68,9 +68,6 @@ struct CatBoostWrapperAPI }; -namespace -{ - class CatBoostModelHolder { private: @@ -84,7 +81,61 @@ public: }; -class CatBoostModelImpl : public ICatBoostModel +/// Holds CatBoost wrapper library and provides wrapper interface. +class CatBoostLibHolder +{ +public: + explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); } + + const CatBoostWrapperAPI & getAPI() const { return api; } + const std::string & getCurrentPath() const { return lib_path; } + +private: + CatBoostWrapperAPI api; + std::string lib_path; + SharedLibrary lib; + + void initAPI() + { + load(api.ModelCalcerCreate, "ModelCalcerCreate"); + load(api.ModelCalcerDelete, "ModelCalcerDelete"); + load(api.GetErrorString, "GetErrorString"); + load(api.LoadFullModelFromFile, "LoadFullModelFromFile"); + load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat"); + load(api.CalcModelPrediction, "CalcModelPrediction"); + load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures"); + load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash"); + load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash"); + load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount"); + load(api.GetCatFeaturesCount, "GetCatFeaturesCount"); + tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey"); + tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize"); + tryLoad(api.GetModelInfoValue, "GetModelInfoValue"); + tryLoad(api.GetTreeCount, "GetTreeCount"); + tryLoad(api.GetDimensionsCount, "GetDimensionsCount"); + } + + template + void load(T& func, const std::string & name) { func = lib.get(name); } + + template + void tryLoad(T& func, const std::string & name) { func = lib.tryGet(name); } +}; + +std::shared_ptr getCatBoostWrapperHolder(const std::string & lib_path) +{ + static std::shared_ptr ptr; + static std::mutex mutex; + + std::lock_guard lock(mutex); + + if (!ptr || ptr->getCurrentPath() != lib_path) + ptr = std::make_shared(lib_path); + + return ptr; +} + +class CatBoostModelImpl { public: CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_) @@ -92,13 +143,15 @@ public: handle = std::make_unique(api); if (!handle) { - std::string msg = "Cannot create CatBoost model: "; - throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL, + "Cannot create CatBoost model: {}", + api->GetErrorString()); } if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str())) { - std::string msg = "Cannot load CatBoost model: "; - throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL, + "Cannot load CatBoost model: {}", + api->GetErrorString()); } float_features_count = api->GetFloatFeaturesCount(handle->get()); @@ -108,32 +161,22 @@ public: tree_count = api->GetDimensionsCount(handle->get()); } - ColumnPtr evaluate(const ColumnRawPtrs & columns) const override + ColumnPtr evaluate(const ColumnRawPtrs & columns) const { if (columns.empty()) - throw Exception("Got empty columns list for CatBoost model.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model."); if (columns.size() != float_features_count + cat_features_count) - { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Number of columns is different with number of features: "; - buffer << columns.size() << " vs " << float_features_count << " + " << cat_features_count; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Number of columns is different with number of features: columns size {} float features size {} + cat features size {}", + float_features_count, + cat_features_count); for (size_t i = 0; i < float_features_count; ++i) { if (!columns[i]->isNumeric()) { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Column " << i << " should be numeric to make float feature."; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i); } } @@ -142,16 +185,13 @@ public: { const auto * column = columns[i]; if (column->isNumeric()) + { cat_features_are_strings = false; + } else if (!(typeid_cast(column) || typeid_cast(column))) { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Column " << i << " should be numeric or string."; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i); } } @@ -187,9 +227,9 @@ public: return ColumnTuple::create(std::move(mutable_columns)); } - size_t getFloatFeaturesCount() const override { return float_features_count; } - size_t getCatFeaturesCount() const override { return cat_features_count; } - size_t getTreeCount() const override { return tree_count; } + size_t getFloatFeaturesCount() const { return float_features_count; } + size_t getCatFeaturesCount() const { return cat_features_count; } + size_t getTreeCount() const { return tree_count; } private: std::unique_ptr handle; @@ -435,66 +475,6 @@ private: } }; - -/// Holds CatBoost wrapper library and provides wrapper interface. -class CatBoostLibHolder: public CatBoostWrapperAPIProvider -{ -public: - explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); } - - const CatBoostWrapperAPI & getAPI() const override { return api; } - const std::string & getCurrentPath() const { return lib_path; } - -private: - CatBoostWrapperAPI api; - std::string lib_path; - SharedLibrary lib; - - void initAPI(); - - template - void load(T& func, const std::string & name) { func = lib.get(name); } - - template - void tryLoad(T& func, const std::string & name) { func = lib.tryGet(name); } -}; - -void CatBoostLibHolder::initAPI() -{ - load(api.ModelCalcerCreate, "ModelCalcerCreate"); - load(api.ModelCalcerDelete, "ModelCalcerDelete"); - load(api.GetErrorString, "GetErrorString"); - load(api.LoadFullModelFromFile, "LoadFullModelFromFile"); - load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat"); - load(api.CalcModelPrediction, "CalcModelPrediction"); - load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures"); - load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash"); - load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash"); - load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount"); - load(api.GetCatFeaturesCount, "GetCatFeaturesCount"); - tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey"); - tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize"); - tryLoad(api.GetModelInfoValue, "GetModelInfoValue"); - tryLoad(api.GetTreeCount, "GetTreeCount"); - tryLoad(api.GetDimensionsCount, "GetDimensionsCount"); -} - -std::shared_ptr getCatBoostWrapperHolder(const std::string & lib_path) -{ - static std::shared_ptr ptr; - static std::mutex mutex; - - std::lock_guard lock(mutex); - - if (!ptr || ptr->getCurrentPath() != lib_path) - ptr = std::make_shared(lib_path); - - return ptr; -} - -} - - CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_, const ExternalLoadableLifetime & lifetime_) : name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_) @@ -502,43 +482,28 @@ CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::st api_provider = getCatBoostWrapperHolder(lib_path); api = &api_provider->getAPI(); model = std::make_unique(api, model_path); - float_features_count = model->getFloatFeaturesCount(); - cat_features_count = model->getCatFeaturesCount(); - tree_count = model->getTreeCount(); } -const ExternalLoadableLifetime & CatBoostModel::getLifetime() const -{ - return lifetime; -} - -bool CatBoostModel::isModified() const -{ - return true; -} - -std::shared_ptr CatBoostModel::clone() const -{ - return std::make_shared(name, model_path, lib_path, lifetime); -} +CatBoostModel::~CatBoostModel() = default; size_t CatBoostModel::getFloatFeaturesCount() const { - return float_features_count; + return model->getFloatFeaturesCount(); } size_t CatBoostModel::getCatFeaturesCount() const { - return cat_features_count; + return model->getCatFeaturesCount(); } size_t CatBoostModel::getTreeCount() const { - return tree_count; + return model->getTreeCount(); } DataTypePtr CatBoostModel::getReturnType() const { + size_t tree_count = getTreeCount(); auto type = std::make_shared(); if (tree_count == 1) return type; @@ -552,6 +517,7 @@ ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const { if (!model) throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR); + return model->evaluate(columns); } diff --git a/src/Interpreters/CatBoostModel.h b/src/Interpreters/CatBoostModel.h index 51bf0ba94f5..7bb1df92b67 100644 --- a/src/Interpreters/CatBoostModel.h +++ b/src/Interpreters/CatBoostModel.h @@ -8,47 +8,32 @@ namespace DB { -/// CatBoost wrapper interface functions. -struct CatBoostWrapperAPI; -class CatBoostWrapperAPIProvider -{ -public: - virtual ~CatBoostWrapperAPIProvider() = default; - virtual const CatBoostWrapperAPI & getAPI() const = 0; -}; - -/// CatBoost model interface. -class ICatBoostModel -{ -public: - virtual ~ICatBoostModel() = default; - /// Evaluate model. Use first `float_features_count` columns as float features, - /// the others `cat_features_count` as categorical features. - virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0; - - virtual size_t getFloatFeaturesCount() const = 0; - virtual size_t getCatFeaturesCount() const = 0; - virtual size_t getTreeCount() const = 0; -}; +class CatBoostLibHolder; +class CatBoostWrapperAPI; +class CatBoostModelImpl; class IDataType; using DataTypePtr = std::shared_ptr; /// General ML model evaluator interface. -class IModel : public IExternalLoadable +class IMLModel : public IExternalLoadable { public: + IMLModel() = default; virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0; virtual std::string getTypeName() const = 0; virtual DataTypePtr getReturnType() const = 0; + virtual ~IMLModel() override = default; }; -class CatBoostModel : public IModel +class CatBoostModel : public IMLModel { public: CatBoostModel(std::string name, std::string model_path, std::string lib_path, const ExternalLoadableLifetime & lifetime); + ~CatBoostModel() override; + ColumnPtr evaluate(const ColumnRawPtrs & columns) const override; std::string getTypeName() const override { return "catboost"; } @@ -59,29 +44,28 @@ public: /// IExternalLoadable interface. - const ExternalLoadableLifetime & getLifetime() const override; + const ExternalLoadableLifetime & getLifetime() const override { return lifetime; } std::string getLoadableName() const override { return name; } bool supportUpdates() const override { return true; } - bool isModified() const override; + bool isModified() const override { return true; } - std::shared_ptr clone() const override; + std::shared_ptr clone() const override + { + return std::make_shared(name, model_path, lib_path, lifetime); + } private: const std::string name; std::string model_path; std::string lib_path; ExternalLoadableLifetime lifetime; - std::shared_ptr api_provider; + std::shared_ptr api_provider; const CatBoostWrapperAPI * api; - std::unique_ptr model; - - size_t float_features_count; - size_t cat_features_count; - size_t tree_count; + std::unique_ptr model; void init(); }; diff --git a/src/Interpreters/ExternalModelsLoader.h b/src/Interpreters/ExternalModelsLoader.h index 18e1f1123f6..042906bee9e 100644 --- a/src/Interpreters/ExternalModelsLoader.h +++ b/src/Interpreters/ExternalModelsLoader.h @@ -15,14 +15,14 @@ namespace DB class ExternalModelsLoader : public ExternalLoader, WithContext { public: - using ModelPtr = std::shared_ptr; + using ModelPtr = std::shared_ptr; /// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds. explicit ExternalModelsLoader(ContextPtr context_); ModelPtr getModel(const std::string & model_name) const { - return std::static_pointer_cast(load(model_name)); + return std::static_pointer_cast(load(model_name)); } void reloadModel(const std::string & model_name) const diff --git a/src/Storages/System/StorageSystemModels.cpp b/src/Storages/System/StorageSystemModels.cpp index 3df48e830bb..4a4dbbc69df 100644 --- a/src/Storages/System/StorageSystemModels.cpp +++ b/src/Storages/System/StorageSystemModels.cpp @@ -38,7 +38,7 @@ void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr cont if (load_result.object) { - const auto model_ptr = std::static_pointer_cast(load_result.object); + const auto model_ptr = std::static_pointer_cast(load_result.object); res_columns[3]->insert(model_ptr->getTypeName()); } else From 97b5cb96f2f31a84af2f3ca782b70223749c3193 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 23 Mar 2022 20:41:28 +0100 Subject: [PATCH 103/132] Fixed style check --- .../test_system_logs_comment/test.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_system_logs_comment/test.py b/tests/integration/test_system_logs_comment/test.py index bfc17865296..0659a2689a0 100644 --- a/tests/integration/test_system_logs_comment/test.py +++ b/tests/integration/test_system_logs_comment/test.py @@ -6,9 +6,10 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node_default', stay_alive=True) +node = cluster.add_instance("node_default", stay_alive=True) -@pytest.fixture(scope='module', autouse=True) + +@pytest.fixture(scope="module", autouse=True) def start_cluster(): try: cluster.start() @@ -18,7 +19,11 @@ def start_cluster(): def test_system_logs_comment(): - node.exec_in_container(['bash', '-c', f"""echo " + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " ENGINE = MergeTree @@ -32,11 +37,13 @@ def test_system_logs_comment(): " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml - """]) + """, + ] + ) node.restart_clickhouse() node.query("select 1") node.query("system flush logs") comment = node.query("SELECT comment FROM system.tables WHERE name = 'query_log'") - assert comment =='test_comment\n' + assert comment == "test_comment\n" From 9ae52910f073986d99fe9a8b8c2d86cbd693aa9f Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 23 Mar 2022 23:15:07 +0100 Subject: [PATCH 104/132] Fix style --- src/Columns/MaskOperations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 5a9436de3b0..9e2d02253be 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -88,7 +88,7 @@ size_t extractMaskNumericImpl( if (data.size() != mask.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask"); } - + size_t ones_count = 0; size_t data_index = 0; @@ -135,7 +135,7 @@ size_t extractMaskNumericImpl( if (data_index != data_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask"); } - + return ones_count; } From 81b2e0bfd9d7c667f53b10c2130b8a72978c7603 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 23 Mar 2022 23:15:08 +0100 Subject: [PATCH 105/132] Fix multiple installation, use a final path for gnu-debuglink --- cmake/strip_binary.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index cbfd8a95c33..1547a814913 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -16,18 +16,18 @@ macro(clickhouse_strip_binary) endif() add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD - COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/" + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin" COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin" COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" - COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" - COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" + COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" - COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_BINDIR}/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endmacro() From d54138425f841cfc34b46b9174a7aba76ead039f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 24 Mar 2022 02:18:25 +0100 Subject: [PATCH 106/132] Rename yandexConsistentHash to kostikConsistentHash --- docs/zh/operations/system-tables/functions.md | 2 +- ...dexConsistentHash.cpp => kostikConsistentHash.cpp} | 11 +++++------ src/Functions/registerFunctionsConsistentHashing.cpp | 4 ++-- tests/fuzz/all.dict | 2 +- tests/fuzz/dictionaries/functions.dict | 2 +- .../test_distributed_queries_stress/test.py | 4 ++-- tests/performance/consistent_hashes.xml | 2 +- .../00580_consistent_hashing_functions.sql | 4 ++-- .../0_stateless/00979_yandex_consistent_hash_fpe.sql | 2 +- 9 files changed, 16 insertions(+), 17 deletions(-) rename src/Functions/{yandexConsistentHash.cpp => kostikConsistentHash.cpp} (64%) diff --git a/docs/zh/operations/system-tables/functions.md b/docs/zh/operations/system-tables/functions.md index 695c7b7fee1..75df1f65c1f 100644 --- a/docs/zh/operations/system-tables/functions.md +++ b/docs/zh/operations/system-tables/functions.md @@ -15,7 +15,7 @@ ``` ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ │ sumburConsistentHash │ 0 │ 0 │ │ -│ yandexConsistentHash │ 0 │ 0 │ │ +│ kostikConsistentHash │ 0 │ 0 │ │ │ demangle │ 0 │ 0 │ │ │ addressToLine │ 0 │ 0 │ │ │ JSONExtractRaw │ 0 │ 0 │ │ diff --git a/src/Functions/yandexConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp similarity index 64% rename from src/Functions/yandexConsistentHash.cpp rename to src/Functions/kostikConsistentHash.cpp index 58617e29af7..59215f87827 100644 --- a/src/Functions/yandexConsistentHash.cpp +++ b/src/Functions/kostikConsistentHash.cpp @@ -7,9 +7,9 @@ namespace DB { /// An O(1) time and space consistent hash algorithm by Konstantin Oblakov -struct YandexConsistentHashImpl +struct KostikConsistentHashImpl { - static constexpr auto name = "yandexConsistentHash"; + static constexpr auto name = "kostikConsistentHash"; using HashType = UInt64; /// Actually it supports UInt64, but it is efficient only if n <= 32768 @@ -23,12 +23,11 @@ struct YandexConsistentHashImpl } }; -using FunctionYandexConsistentHash = FunctionConsistentHashImpl; +using FunctionKostikConsistentHash = FunctionConsistentHashImpl; -void registerFunctionYandexConsistentHash(FunctionFactory & factory) +void registerFunctionKostikConsistentHash(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); } } - diff --git a/src/Functions/registerFunctionsConsistentHashing.cpp b/src/Functions/registerFunctionsConsistentHashing.cpp index d4d740bc92f..84a78cd6765 100644 --- a/src/Functions/registerFunctionsConsistentHashing.cpp +++ b/src/Functions/registerFunctionsConsistentHashing.cpp @@ -2,12 +2,12 @@ namespace DB { class FunctionFactory; -void registerFunctionYandexConsistentHash(FunctionFactory & factory); +void registerFunctionKostikConsistentHash(FunctionFactory & factory); void registerFunctionJumpConsistentHash(FunctionFactory & factory); void registerFunctionsConsistentHashing(FunctionFactory & factory) { - registerFunctionYandexConsistentHash(factory); + registerFunctionKostikConsistentHash(factory); registerFunctionJumpConsistentHash(factory); } diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index bf25f1fa484..1863cd20bdd 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -1459,7 +1459,7 @@ "xor" "xxHash32" "xxHash64" -"yandexConsistentHash" +"kostikConsistentHash" "YEAR" "yearweek" "yesterday" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index 722e931dc09..3f393aa6846 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -26,7 +26,7 @@ "toUnixTimestamp64Nano" "toUnixTimestamp64Micro" "jumpConsistentHash" -"yandexConsistentHash" +"kostikConsistentHash" "addressToSymbol" "toJSONString" "JSON_VALUE" diff --git a/tests/integration/test_distributed_queries_stress/test.py b/tests/integration/test_distributed_queries_stress/test.py index a5df8562676..fce42b4e58b 100644 --- a/tests/integration/test_distributed_queries_stress/test.py +++ b/tests/integration/test_distributed_queries_stress/test.py @@ -67,10 +67,10 @@ def started_cluster(): insert into data (key) select * from numbers(10); create table if not exists dist_one as data engine=Distributed(one_shard, currentDatabase(), data, key); - create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, yandexConsistentHash(key, 2)); + create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, kostikConsistentHash(key, 2)); create table if not exists dist_two as data engine=Distributed(two_shards, currentDatabase(), data, key); - create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, yandexConsistentHash(key, 2)); + create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, kostikConsistentHash(key, 2)); """ ) yield cluster diff --git a/tests/performance/consistent_hashes.xml b/tests/performance/consistent_hashes.xml index 3610579f545..c65a1151536 100644 --- a/tests/performance/consistent_hashes.xml +++ b/tests/performance/consistent_hashes.xml @@ -3,7 +3,7 @@ hash_func - yandexConsistentHash + kostikConsistentHash jumpConsistentHash diff --git a/tests/queries/0_stateless/00580_consistent_hashing_functions.sql b/tests/queries/0_stateless/00580_consistent_hashing_functions.sql index 08e785929c7..f470642d391 100644 --- a/tests/queries/0_stateless/00580_consistent_hashing_functions.sql +++ b/tests/queries/0_stateless/00580_consistent_hashing_functions.sql @@ -1,6 +1,6 @@ -- Tags: no-fasttest SELECT jumpConsistentHash(1, 1), jumpConsistentHash(42, 57), jumpConsistentHash(256, 1024), jumpConsistentHash(3735883980, 1), jumpConsistentHash(3735883980, 666), jumpConsistentHash(16045690984833335023, 255); -SELECT yandexConsistentHash(16045690984833335023, 1), yandexConsistentHash(16045690984833335023, 2), yandexConsistentHash(16045690984833335023, 3), yandexConsistentHash(16045690984833335023, 4), yandexConsistentHash(16045690984833335023, 173), yandexConsistentHash(16045690984833335023, 255); +SELECT kostikConsistentHash(16045690984833335023, 1), kostikConsistentHash(16045690984833335023, 2), kostikConsistentHash(16045690984833335023, 3), kostikConsistentHash(16045690984833335023, 4), kostikConsistentHash(16045690984833335023, 173), kostikConsistentHash(16045690984833335023, 255); SELECT jumpConsistentHash(intHash64(number), 787) FROM system.numbers LIMIT 1000000, 2; -SELECT yandexConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; +SELECT kostikConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; diff --git a/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql b/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql index 79fabeae7ef..3da52f2cb96 100644 --- a/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql +++ b/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql @@ -1 +1 @@ -SELECT yandexConsistentHash(-1, 40000); -- { serverError 36 } +SELECT kostikConsistentHash(-1, 40000); -- { serverError 36 } From 31eeeeb42815cb68114f49196c591f7a443e0eca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 24 Mar 2022 02:21:57 +0100 Subject: [PATCH 107/132] Add compatibility alias --- src/Functions/kostikConsistentHash.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/kostikConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp index 59215f87827..a38c3c965d8 100644 --- a/src/Functions/kostikConsistentHash.cpp +++ b/src/Functions/kostikConsistentHash.cpp @@ -28,6 +28,7 @@ using FunctionKostikConsistentHash = FunctionConsistentHashImpl(); + factory.registerAlias("yandexConsistentHash", "kostikConsistentHash"); } } From d6f558382006de42282596d9e25b4272d7d3020e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 24 Mar 2022 05:20:12 +0300 Subject: [PATCH 108/132] Update adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 9c7fab7424d..ad199ce452e 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -195,5 +195,6 @@ toc_title: Adopters | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | | АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | +| Piwik PRO | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) From 0597612eee257bb6013890beaf8ff59d007dd952 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 24 Mar 2022 03:37:24 +0100 Subject: [PATCH 109/132] Make test data less frightening for random strangers --- .../0_stateless/00153_transform.reference | 2 +- tests/queries/0_stateless/00153_transform.sql | 8 +- ...0165_transform_non_const_default.reference | 2 +- .../00165_transform_non_const_default.sql | 8 +- ...0381_first_significant_subdomain.reference | 2 +- .../00381_first_significant_subdomain.sql | 12 +- .../00909_ngram_distance.reference | 316 +++++++++--------- .../0_stateless/00909_ngram_distance.sql | 12 +- .../0_stateless/00951_ngram_search.reference | 306 ++++++++--------- .../0_stateless/00951_ngram_search.sql | 12 +- 10 files changed, 340 insertions(+), 340 deletions(-) diff --git a/tests/queries/0_stateless/00153_transform.reference b/tests/queries/0_stateless/00153_transform.reference index eea4fa0e1a8..8a38f4f8172 100644 --- a/tests/queries/0_stateless/00153_transform.reference +++ b/tests/queries/0_stateless/00153_transform.reference @@ -99,6 +99,6 @@ abc 1 1 Остальные -Яндекс +Bigmir)net Google Остальные diff --git a/tests/queries/0_stateless/00153_transform.sql b/tests/queries/0_stateless/00153_transform.sql index a5e531d36a4..78ec3cd4d1c 100644 --- a/tests/queries/0_stateless/00153_transform.sql +++ b/tests/queries/0_stateless/00153_transform.sql @@ -8,10 +8,10 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM sys SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1.1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], 1) FROM system.numbers LIMIT 10; -SELECT transform(1, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(2, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(3, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(4, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; +SELECT transform(1, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(2, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(3, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(4, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; SELECT transform('hello', 'wrong', 1); -- { serverError 43 } SELECT transform('hello', ['wrong'], 1); -- { serverError 43 } SELECT transform('hello', ['wrong'], [1]); -- { serverError 43 } diff --git a/tests/queries/0_stateless/00165_transform_non_const_default.reference b/tests/queries/0_stateless/00165_transform_non_const_default.reference index d66471d9741..01890b91309 100644 --- a/tests/queries/0_stateless/00165_transform_non_const_default.reference +++ b/tests/queries/0_stateless/00165_transform_non_const_default.reference @@ -79,6 +79,6 @@ abc 1 1 Остальные -Яндекс +Meta.ua Google Остальные diff --git a/tests/queries/0_stateless/00165_transform_non_const_default.sql b/tests/queries/0_stateless/00165_transform_non_const_default.sql index f68327f7700..ef3b7c1f1c5 100644 --- a/tests/queries/0_stateless/00165_transform_non_const_default.sql +++ b/tests/queries/0_stateless/00165_transform_non_const_default.sql @@ -6,7 +6,7 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1)) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1.1)) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], materialize(1)) FROM system.numbers LIMIT 10; -SELECT transform(1, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(2, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(3, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(4, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; +SELECT transform(1, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(2, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(3, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(4, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; diff --git a/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/tests/queries/0_stateless/00381_first_significant_subdomain.reference index 1f1230a2104..086f3b0f9ce 100644 --- a/tests/queries/0_stateless/00381_first_significant_subdomain.reference +++ b/tests/queries/0_stateless/00381_first_significant_subdomain.reference @@ -1,3 +1,3 @@ canada congo net-domena -yandex yandex yandex яндекс yandex +meta bigmir yahoo гугл meta canada hello hello canada diff --git a/tests/queries/0_stateless/00381_first_significant_subdomain.sql b/tests/queries/0_stateless/00381_first_significant_subdomain.sql index 5badd14f200..5d8c53afc9c 100644 --- a/tests/queries/0_stateless/00381_first_significant_subdomain.sql +++ b/tests/queries/0_stateless/00381_first_significant_subdomain.sql @@ -4,12 +4,12 @@ SELECT firstSignificantSubdomain('http://pochemu.net-domena.ru') AS why; SELECT - firstSignificantSubdomain('ftp://www.yandex.com.tr/news.html'), - firstSignificantSubdomain('https://www.yandex.ua/news.html'), - firstSignificantSubdomain('magnet:yandex.abc'), - firstSignificantSubdomain('ftp://www.yandex.co.uk/news.html'), - firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'), - firstSignificantSubdomain('//www.yandex.com.tr/news.html'); + firstSignificantSubdomain('ftp://www.meta.com.ua/news.html'), + firstSignificantSubdomain('https://www.bigmir.net/news.html'), + firstSignificantSubdomain('magnet:ukr.abc'), + firstSignificantSubdomain('ftp://www.yahoo.co.jp/news.html'), + firstSignificantSubdomain('https://api.www3.static.dev.ввв.гугл.ком'), + firstSignificantSubdomain('//www.meta.com.ua/news.html'); SELECT firstSignificantSubdomain('http://hello.canada.c'), diff --git a/tests/queries/0_stateless/00909_ngram_distance.reference b/tests/queries/0_stateless/00909_ngram_distance.reference index 290e24faac5..4323fa86151 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.reference +++ b/tests/queries/0_stateless/00909_ngram_distance.reference @@ -113,112 +113,112 @@ 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 -пап привет как дела - Яндекс.Видео 684 -привет как дела клип - Яндекс.Видео 692 -привет братан как дела - Яндекс.Видео 707 +пап привет как дела - TUT.BY 625 +привет как дела клип - TUT.BY 636 +привет братан как дела - TUT.BY 657 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 -http://metrica.yandex.com/ 846 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 привет как дела?... Херсон 297 -пап привет как дела - Яндекс.Видео 422 -привет как дела клип - Яндекс.Видео 435 -привет братан как дела - Яндекс.Видео 500 +пап привет как дела - TUT.BY 333 +привет как дела клип - TUT.BY 350 +привет братан как дела - TUT.BY 429 привет 529 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 436 привет как дела?... Херсон 459 -пап привет как дела - Яндекс.Видео 511 +привет как дела клип - TUT.BY 500 +привет братан как дела - TUT.BY 524 привет 529 -привет как дела клип - Яндекс.Видео 565 -привет братан как дела - Яндекс.Видео 583 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 -http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -http://metrica.yandex.com/ 724 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 714 +http://top.bigmir.net/ 667 +http://metrika.ru/ 900 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 0 @@ -335,135 +335,135 @@ http://metris.ru/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 -пап привет как дела - Яндекс.Видео 684 -привет как дела клип - Яндекс.Видео 692 -привет братан как дела - Яндекс.Видео 707 +пап привет как дела - TUT.BY 625 +привет как дела клип - TUT.BY 636 +привет братан как дела - TUT.BY 657 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 -http://metrica.yandex.com/ 846 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 привет как дела?... Херсон 297 -пап привет как дела - Яндекс.Видео 422 -привет как дела клип - Яндекс.Видео 435 -привет братан как дела - Яндекс.Видео 500 +пап привет как дела - TUT.BY 333 +привет как дела клип - TUT.BY 350 +привет братан как дела - TUT.BY 429 привет 529 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 436 привет как дела?... Херсон 459 -пап привет как дела - Яндекс.Видео 511 +привет как дела клип - TUT.BY 500 +привет братан как дела - TUT.BY 524 привет 529 -привет как дела клип - Яндекс.Видео 565 -привет братан как дела - Яндекс.Видео 583 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 -http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -http://metrica.yandex.com/ 724 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 714 +http://top.bigmir.net/ 667 +http://metrika.ru/ 900 + 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - TUT.BY 1000 +привет 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 +привет как дела?... Херсон 1000 +привет как дела клип - TUT.BY 0 +пап привет как дела - TUT.BY 208 +привет братан как дела - TUT.BY 286 +привет как дела?... Херсон 490 +привет 742 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 -привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 169 -привет братан как дела - Яндекс.Видео 235 -привет как дела?... Херсон 544 -привет 784 - 1000 -http://autometric.ru/ 1000 -http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metrika.ru/ 1000 -http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 0 0 0 @@ -576,82 +576,82 @@ http://metris.ru/ 1000 111 429 1000 +пап привет как дела - TUT.BY 242 привет как дела?... Херсон 254 -пап привет как дела - Яндекс.Видео 398 -привет как дела клип - Яндекс.Видео 412 -привет братан как дела - Яндекс.Видео 461 +привет как дела клип - TUT.BY 265 +привет братан как дела - TUT.BY 333 привет 471 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 303 привет как дела?... Херсон 343 -пап привет как дела - Яндекс.Видео 446 +привет как дела клип - TUT.BY 353 +привет братан как дела - TUT.BY 389 привет 471 -привет как дела клип - Яндекс.Видео 482 -привет братан как дела - Яндекс.Видео 506 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 -http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 -http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 769 +http://top.bigmir.net/ 727 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 0 @@ -765,91 +765,91 @@ http://metris.ru/ 1000 111 600 1000 +пап привет как дела - TUT.BY 909 привет как дела?... Херсон 910 -пап привет как дела - Яндекс.Видео 928 -привет как дела клип - Яндекс.Видео 929 -привет братан как дела - Яндекс.Видео 955 +привет как дела клип - TUT.BY 912 +привет братан как дела - TUT.BY 944 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 +пап привет как дела - TUT.BY 667 привет как дела?... Херсон 672 -пап привет как дела - Яндекс.Видео 735 -привет как дела клип - Яндекс.Видео 741 -привет братан как дела - Яндекс.Видео 753 +привет как дела клип - TUT.BY 676 +привет братан как дела - TUT.BY 694 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 -http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 -http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 769 +http://top.bigmir.net/ 727 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 diff --git a/tests/queries/0_stateless/00909_ngram_distance.sql b/tests/queries/0_stateless/00909_ngram_distance.sql index b2f403c415a..28aff50d22e 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.sql +++ b/tests/queries/0_stateless/00909_ngram_distance.sql @@ -32,7 +32,7 @@ select round(1000 * ngramDistanceUTF8('абвгдеёжз', 'ёёёёёёёё')) drop table if exists test_distance; create table test_distance (Title String) engine = Memory; -insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); +insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - TUT.BY'), ('привет'), ('пап привет как дела - TUT.BY'), ('привет братан как дела - TUT.BY'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://top.bigmir.net/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, Title) as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'как дела')) as distance, Title; @@ -44,7 +44,7 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUT SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'bigmir') as distance, Title; select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -91,8 +91,8 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCa SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'BigMIR') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - TuT.by') as distance, Title; select round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5; @@ -134,7 +134,7 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(T SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'bigmir') as distance, Title; select round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -175,6 +175,6 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCa SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'BigMIR') as distance, Title; drop table if exists test_distance; diff --git a/tests/queries/0_stateless/00951_ngram_search.reference b/tests/queries/0_stateless/00951_ngram_search.reference index a98f63a198a..ece03fc649e 100644 --- a/tests/queries/0_stateless/00951_ngram_search.reference +++ b/tests/queries/0_stateless/00951_ngram_search.reference @@ -113,113 +113,113 @@ 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 923 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 923 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 769 -привет как дела клип - Яндекс.Видео 769 +привет братан как дела - TUT.BY 769 +привет как дела клип - TUT.BY 769 привет как дела?... Херсон 769 -пап привет как дела - Яндекс.Видео 846 +пап привет как дела - TUT.BY 846 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 800 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://metrika.ru/ 250 +http://top.bigmir.net/ 1000 1000 1000 1000 @@ -335,135 +335,135 @@ http://metrica.yandex.com/ 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 923 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 923 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 769 -привет как дела клип - Яндекс.Видео 769 +привет братан как дела - TUT.BY 769 +привет как дела клип - TUT.BY 769 привет как дела?... Херсон 769 -пап привет как дела - Яндекс.Видео 846 +пап привет как дела - TUT.BY 846 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 800 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://metrika.ru/ 250 +http://top.bigmir.net/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -привет 121 -привет как дела?... Херсон 394 -привет братан как дела - Яндекс.Видео 788 -пап привет как дела - Яндекс.Видео 818 -привет как дела клип - Яндекс.Видео 1000 +http://metrika.ru/ 32 +привет 129 +http://top.bigmir.net/ 258 +привет как дела?... Херсон 419 +привет братан как дела - TUT.BY 452 +пап привет как дела - TUT.BY 484 +привет как дела клип - TUT.BY 677 1000 1000 1000 @@ -579,80 +579,80 @@ http://metris.ru/ 0 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 360 -привет братан как дела - Яндекс.Видео 960 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 960 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 360 -привет братан как дела - Яндекс.Видео 880 -привет как дела клип - Яндекс.Видео 880 +привет братан как дела - TUT.BY 880 +привет как дела клип - TUT.BY 880 привет как дела?... Херсон 880 -пап привет как дела - Яндекс.Видео 920 +пап привет как дела - TUT.BY 920 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 750 0 http://autometric.ru/ 0 http://metric.ru/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://top.bigmir.net/ 1000 1000 1000 1000 @@ -768,88 +768,88 @@ http://metrica.yandex.com/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 0 -привет братан как дела - Яндекс.Видео 80 -пап привет как дела - Яндекс.Видео 120 -привет как дела клип - Яндекс.Видео 120 +привет братан как дела - TUT.BY 80 +пап привет как дела - TUT.BY 120 +привет как дела клип - TUT.BY 120 привет как дела?... Херсон 120 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 0 -пап привет как дела - Яндекс.Видео 440 -привет братан как дела - Яндекс.Видео 440 -привет как дела клип - Яндекс.Видео 440 +пап привет как дела - TUT.BY 440 +привет братан как дела - TUT.BY 440 +привет как дела клип - TUT.BY 440 привет как дела?... Херсон 440 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 750 0 http://autometric.ru/ 0 http://metric.ru/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://top.bigmir.net/ 1000 diff --git a/tests/queries/0_stateless/00951_ngram_search.sql b/tests/queries/0_stateless/00951_ngram_search.sql index f1a37605ebc..77525d86013 100644 --- a/tests/queries/0_stateless/00951_ngram_search.sql +++ b/tests/queries/0_stateless/00951_ngram_search.sql @@ -32,7 +32,7 @@ select round(1000 * ngramSearchUTF8('абвгдеёжз', 'ёёёёёёёё')); drop table if exists test_entry_distance; create table test_entry_distance (Title String) engine = Memory; -insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); +insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - TUT.BY'), ('привет'), ('пап привет как дела - TUT.BY'), ('привет братан как дела - TUT.BY'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://top.bigmir.net/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, Title) as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'как дела')) as distance, Title; @@ -44,7 +44,7 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'bigmir') as distance, Title; select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -91,8 +91,8 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'BigMIR') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - bigMir.Net') as distance, Title; select round(1000 * ngramSearch(materialize(''), '')) from system.numbers limit 5; @@ -134,7 +134,7 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'bigmir') as distance, Title; select round(1000 * ngramSearchCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramSearchCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -175,6 +175,6 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'BigMIR') as distance, Title; drop table if exists test_entry_distance; From 36e094692614d30e2fb0efcc2172fada1db06679 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 24 Mar 2022 03:55:43 +0100 Subject: [PATCH 110/132] Make test data less frightening for random strangers --- .../00255_array_concat_string.reference | 38 +- .../0_stateless/00255_array_concat_string.sql | 2 +- .../00296_url_parameters.reference | 4 +- .../0_stateless/00296_url_parameters.sql | 368 +++++++++--------- ...query_aggregation_column_removal.reference | 12 +- ...ll_subquery_aggregation_column_removal.sql | 4 +- .../00653_verification_monotonic_data_load.sh | 4 +- .../0_stateless/00926_multimatch.reference | 20 +- .../queries/0_stateless/00926_multimatch.sql | 12 +- .../00998_constraints_all_tables.reference | 4 +- .../00998_constraints_all_tables.sql | 46 +-- ...here_max_parallel_replicas_distributed.sql | 2 +- ...324_if_transform_strings_to_enum.reference | 28 +- .../01324_if_transform_strings_to_enum.sql | 16 +- .../01358_constexpr_constraint.sql | 2 +- .../0_stateless/01407_lambda_arrayJoin.sql | 1 - ...456_ast_optimizations_over_distributed.sql | 8 +- .../01622_constraints_simple_optimization.sql | 18 +- .../0_stateless/01661_referer.reference | 2 +- tests/queries/0_stateless/01661_referer.sh | 4 +- .../0_stateless/01733_transform_ubsan.sql | 2 +- .../0_stateless/01889_tokenize.reference | 4 +- tests/queries/0_stateless/01889_tokenize.sql | 4 +- ...55_clickhouse_benchmark_connection_hang.sh | 2 - 24 files changed, 302 insertions(+), 305 deletions(-) diff --git a/tests/queries/0_stateless/00255_array_concat_string.reference b/tests/queries/0_stateless/00255_array_concat_string.reference index 4ffac8e5de0..edd1101beb6 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.reference +++ b/tests/queries/0_stateless/00255_array_concat_string.reference @@ -34,25 +34,25 @@ Hello, World 0,1,2,3,4,5,6,7 0,1,2,3,4,5,6,7,8 -yandex -yandex google -yandex google test -yandex google test 123 -yandex google test 123 -yandex google test 123 hello -yandex google test 123 hello world -yandex google test 123 hello world goodbye -yandex google test 123 hello world goodbye xyz -yandex google test 123 hello world goodbye xyz yandex -yandex google test 123 hello world goodbye xyz yandex google -yandex google test 123 hello world goodbye xyz yandex google test -yandex google test 123 hello world goodbye xyz yandex google test 123 -yandex google test 123 hello world goodbye xyz yandex google test 123 -yandex google test 123 hello world goodbye xyz yandex google test 123 hello -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz yandex +meta.ua +meta.ua google +meta.ua google test +meta.ua google test 123 +meta.ua google test 123 +meta.ua google test 123 hello +meta.ua google test 123 hello world +meta.ua google test 123 hello world goodbye +meta.ua google test 123 hello world goodbye xyz +meta.ua google test 123 hello world goodbye xyz meta.ua +meta.ua google test 123 hello world goodbye xyz meta.ua google +meta.ua google test 123 hello world goodbye xyz meta.ua google test +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz meta.ua 0 01 diff --git a/tests/queries/0_stateless/00255_array_concat_string.sql b/tests/queries/0_stateless/00255_array_concat_string.sql index f4f95956a16..a18d349bac8 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.sql +++ b/tests/queries/0_stateless/00255_array_concat_string.sql @@ -6,7 +6,7 @@ SELECT arrayStringConcat(emptyArrayString()); SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), '') FROM system.numbers LIMIT 10; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), ',') FROM system.numbers LIMIT 10; -SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; +SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['meta.ua', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null], ';'); SELECT arrayStringConcat([Null::Nullable(String), Null::Nullable(String)], ';'); diff --git a/tests/queries/0_stateless/00296_url_parameters.reference b/tests/queries/0_stateless/00296_url_parameters.reference index 91a7fe8d488..348651d3f7e 100644 --- a/tests/queries/0_stateless/00296_url_parameters.reference +++ b/tests/queries/0_stateless/00296_url_parameters.reference @@ -1,8 +1,8 @@ ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] b d f d f h b d d h f h b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] b d f d f h b d d h f h b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f diff --git a/tests/queries/0_stateless/00296_url_parameters.sql b/tests/queries/0_stateless/00296_url_parameters.sql index f6dad306319..8a96e3888fe 100644 --- a/tests/queries/0_stateless/00296_url_parameters.sql +++ b/tests/queries/0_stateless/00296_url_parameters.sql @@ -1,200 +1,200 @@ SELECT - extractURLParameters('http://yandex.ru/?a=b&c=d'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e=f'), - extractURLParameters('http://yandex.ru/?a&c=d#e=f'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d'), - extractURLParameters('//yandex.ru/?a=b&c=d#e=f'), - extractURLParameters('//yandex.ru/?a&c=d#e=f'), - extractURLParameters('//yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d#e'), - extractURLParameters('//yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameters('http://bigmir.net/?a=b&c=d'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e=f'), + extractURLParameters('http://bigmir.net/?a&c=d#e=f'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameters('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d'), + extractURLParameters('//bigmir.net/?a=b&c=d#e=f'), + extractURLParameters('//bigmir.net/?a&c=d#e=f'), + extractURLParameters('//bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d#e'), + extractURLParameters('//bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d#test?e=f&g=h'); SELECT - extractURLParameterNames('http://yandex.ru/?a=b&c=d'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f'), - extractURLParameterNames('http://yandex.ru/?a&c=d#e=f'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f'), - extractURLParameterNames('//yandex.ru/?a&c=d#e=f'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameterNames('http://bigmir.net/?a=b&c=d'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f'), + extractURLParameterNames('http://bigmir.net/?a&c=d#e=f'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f'), + extractURLParameterNames('//bigmir.net/?a&c=d#e=f'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#test?e=f&g=h'); SELECT - extractURLParameter('http://yandex.ru/?a=b&c=d', 'a'), - extractURLParameter('http://yandex.ru/?a=b&c=d', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d', 'a'), - extractURLParameter('//yandex.ru/?a=b&c=d', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + extractURLParameter('http://bigmir.net/?a=b&c=d', 'a'), + extractURLParameter('http://bigmir.net/?a=b&c=d', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d', 'a'), + extractURLParameter('//bigmir.net/?a=b&c=d', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'a'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'c'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'a'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'); SELECT - cutURLParameter('http://yandex.ru/?a=b&c=d', 'a'), - cutURLParameter('http://yandex.ru/?a=b&c=d', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d', 'a'), - cutURLParameter('//yandex.ru/?a=b&c=d', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + cutURLParameter('http://bigmir.net/?a=b&c=d', 'a'), + cutURLParameter('http://bigmir.net/?a=b&c=d', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d', 'a'), + cutURLParameter('//bigmir.net/?a=b&c=d', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'a'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'c'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'a'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'); SELECT - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f')), - extractURLParameters(materialize('http://yandex.ru/?a&c=d#e=f')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f')), - extractURLParameters(materialize('//yandex.ru/?a&c=d#e=f')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f')), + extractURLParameters(materialize('http://bigmir.net/?a&c=d#e=f')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f')), + extractURLParameters(materialize('//bigmir.net/?a&c=d#e=f')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h')); SELECT - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f')), - extractURLParameterNames(materialize('http://yandex.ru/?a&c=d#e=f')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f')), - extractURLParameterNames(materialize('//yandex.ru/?a&c=d#e=f')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('http://bigmir.net/?a&c=d#e=f')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('//bigmir.net/?a&c=d#e=f')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h')); SELECT - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'); SELECT - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'); diff --git a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference index a0265bdb7ed..2b1089c6840 100644 --- a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference +++ b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference @@ -1,23 +1,23 @@ {"total":"1","domain":"baidu.com"} {"total":"2","domain":"facebook.com"} {"total":"1","domain":"google.com"} -{"total":"2","domain":"yandex.ru"} +{"total":"2","domain":"meta.ua"} {"total":"1","domain":"baidu.com"} {"total":"2","domain":"facebook.com"} {"total":"1","domain":"google.com"} -{"total":"2","domain":"yandex.ru"} +{"total":"2","domain":"meta.ua"} 1 baidu.com 2 facebook.com 1 google.com -2 yandex.ru +2 meta.ua 1 baidu.com 2 facebook.com 1 google.com -2 yandex.ru +2 meta.ua 1 baidu.com 1 google.com 2 facebook.com -2 yandex.ru +2 meta.ua 1 1 2 @@ -25,4 +25,4 @@ baidu.com google.com facebook.com -yandex.ru +meta.ua diff --git a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql index 49975daaa7e..07d39e8d54a 100644 --- a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql +++ b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql @@ -4,8 +4,8 @@ DROP TABLE IF EXISTS transactions; CREATE TABLE clicks (domain String) ENGINE = Memory; CREATE TABLE transactions (domain String) ENGINE = Memory; -INSERT INTO clicks VALUES ('facebook.com'), ('yandex.ru'), ('google.com'); -INSERT INTO transactions VALUES ('facebook.com'), ('yandex.ru'), ('baidu.com'); +INSERT INTO clicks VALUES ('facebook.com'), ('meta.ua'), ('google.com'); +INSERT INTO transactions VALUES ('facebook.com'), ('meta.ua'), ('baidu.com'); SELECT diff --git a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh index f49aeb93184..00a7e3c5232 100755 --- a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh +++ b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh @@ -25,7 +25,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE string_test_table (val String) ENGINE ${CLICKHOUSE_CLIENT} --query="CREATE TABLE fixed_string_test_table (val FixedString(1)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE signed_integer_test_table (val Int32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE unsigned_integer_test_table (val UInt32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'yandex' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'youtube' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE date_test_table (val Date) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES string_test_table;" @@ -40,7 +40,7 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO fixed_string_test_table VALUES ('0'), # 131072 -> 17 bit is 1 ${CLICKHOUSE_CLIENT} --query="INSERT INTO signed_integer_test_table VALUES (-2), (0), (2), (2), (131072), (131073), (131073);" ${CLICKHOUSE_CLIENT} --query="INSERT INTO unsigned_integer_test_table VALUES (0), (2), (2), (131072), (131073), (131073);" -${CLICKHOUSE_CLIENT} --query="INSERT INTO enum_test_table VALUES ('hello'), ('world'), ('world'), ('yandex'), ('clickhouse'), ('clickhouse');" +${CLICKHOUSE_CLIENT} --query="INSERT INTO enum_test_table VALUES ('hello'), ('world'), ('world'), ('youtube'), ('clickhouse'), ('clickhouse');" ${CLICKHOUSE_CLIENT} --query="INSERT INTO date_test_table VALUES (1), (2), (2), (256), (257), (257);" CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') diff --git a/tests/queries/0_stateless/00926_multimatch.reference b/tests/queries/0_stateless/00926_multimatch.reference index 4a2320de57b..7ff32ecd73b 100644 --- a/tests/queries/0_stateless/00926_multimatch.reference +++ b/tests/queries/0_stateless/00926_multimatch.reference @@ -600,16 +600,16 @@ 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 1 1 diff --git a/tests/queries/0_stateless/00926_multimatch.sql b/tests/queries/0_stateless/00926_multimatch.sql index 90cc289b5a5..b9843a1b4ba 100644 --- a/tests/queries/0_stateless/00926_multimatch.sql +++ b/tests/queries/0_stateless/00926_multimatch.sql @@ -75,18 +75,18 @@ select 1 = multiMatchAny(materialize('abcdef'), ['a......', 'a.....']) from syst select 0 = multiMatchAny(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']) from system.numbers limit 10; select 1 = multiMatchAny(materialize('abc'), ['a\0d']) from system.numbers limit 10; -select 1 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google', 'yandex1']) from system.numbers limit 10; -select 2 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google1', 'yandex']) from system.numbers limit 10; -select 0 != multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10; +select 1 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google', 'unian1']) from system.numbers limit 10; +select 2 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google1', 'unian']) from system.numbers limit 10; +select 0 != multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10; select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), ['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me']) from system.numbers limit 10; SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT 1 = multiMatchAny('фабрикант', ['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']); -- All indices tests -SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5; -SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5; -SELECT [] = multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5; +SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5; +SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5; +SELECT [] = multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5; SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]'])); SELECT [1] = multiMatchAllIndices(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT [] = multiMatchAllIndices(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']); diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.reference b/tests/queries/0_stateless/00998_constraints_all_tables.reference index 0ec8b004ae4..90a2380df1e 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.reference +++ b/tests/queries/0_stateless/00998_constraints_all_tables.reference @@ -10,5 +10,5 @@ 0 0 3 -CREATE TABLE default.constrained\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log -CREATE TABLE default.constrained2\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained\n(\n `URL` String,\n CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = \'censor.net\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained2\n(\n `URL` String,\n CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = \'censor.net\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.sql b/tests/queries/0_stateless/00998_constraints_all_tables.sql index e47b7eaf83c..bb0d6933a01 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.sql +++ b/tests/queries/0_stateless/00998_constraints_all_tables.sql @@ -1,53 +1,53 @@ DROP TABLE IF EXISTS constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Null; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Null; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Memory; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Memory; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = StripeLog; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = StripeLog; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = TinyLog; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = TinyLog; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; DROP TABLE IF EXISTS constrained2; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; CREATE TABLE constrained2 AS constrained; SHOW CREATE TABLE constrained; SHOW CREATE TABLE constrained2; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } -INSERT INTO constrained2 VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained2 VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } DROP TABLE constrained; DROP TABLE constrained2; diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql index 6d1c7fd5ef6..86c84427297 100644 --- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql +++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql @@ -5,7 +5,7 @@ set allow_experimental_parallel_reading_from_replicas=0; drop table if exists test_max_parallel_replicas_lr; -- If you wonder why the table is named with "_lr" suffix in this test. --- No reason. Actually it is the name of the table in Yandex.Market and they provided this test case for us. +-- No reason. Actually it is the name of the table in our customer and they provided this test case for us. CREATE TABLE test_max_parallel_replicas_lr (timestamp UInt64) ENGINE = MergeTree ORDER BY (intHash32(timestamp)) SAMPLE BY intHash32(timestamp); INSERT INTO test_max_parallel_replicas_lr select number as timestamp from system.numbers limit 100; diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference index 994e3f24aaf..7cf545176e3 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference @@ -2,13 +2,13 @@ other other google other -yandex +censor.net other yahoo other other other -SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'yandex\', \'yahoo\'], \'Array(Enum8(\\\'google\\\' = 1, \\\'other\\\' = 2, \\\'yahoo\\\' = 3, \\\'yandex\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'google\\\' = 1, \\\'other\\\' = 2, \\\'yahoo\\\' = 3, \\\'yandex\\\' = 4)\')) +SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) FROM system.numbers LIMIT 10 google @@ -17,24 +17,24 @@ google google google google -yandex -yandex -yandex -yandex -SELECT if(number > 5, \'yandex\', \'google\') +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') FROM system.numbers LIMIT 10 other other google other -yandex +censor.net other yahoo other other other -SELECT transform(number, [2, 4, 6], [\'google\', \'yandex\', \'yahoo\'], \'other\') +SELECT transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') FROM system.numbers LIMIT 10 google @@ -43,10 +43,10 @@ google google google google -yandex -yandex -yandex -yandex -SELECT if(number > 5, \'yandex\', \'google\') +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') FROM system.numbers LIMIT 10 diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql index dcb082c650a..ee2f48a53da 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql @@ -1,13 +1,13 @@ set optimize_if_transform_strings_to_enum = 1; -SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; set optimize_if_transform_strings_to_enum = 0; -SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/01358_constexpr_constraint.sql b/tests/queries/0_stateless/01358_constexpr_constraint.sql index 799f6f32259..4560ac47c42 100644 --- a/tests/queries/0_stateless/01358_constexpr_constraint.sql +++ b/tests/queries/0_stateless/01358_constexpr_constraint.sql @@ -1,7 +1,7 @@ CREATE TEMPORARY TABLE constrained ( `URL` String, - CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), + CONSTRAINT identity CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), CONSTRAINT is_utf8 CHECK isValidUTF8(URL) ); diff --git a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql index 4f34bb59527..363b1d92dbb 100644 --- a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql +++ b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql @@ -1,6 +1,5 @@ SELECT arrayFilter((a) -> ((a, arrayJoin([])) IN (Null, [Null])), []); SELECT arrayFilter((a) -> ((a, arrayJoin([[]])) IN (Null, [Null])), []); --- simplified from the https://clickhouse-test-reports.s3.yandex.net/10373/6c4748a63e7acde2cc3283d96ffec590aae1e724/fuzzer/fuzzer.log#fail1 SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43; } SELECT * FROM numbers(1) LEFT ARRAY JOIN arrayFilter((x_0, x_1) -> (arrayJoin([]) IN (NULL)), [], []) AS arr_x; diff --git a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql index ee144264193..1e1d87a5ad5 100644 --- a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql +++ b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql @@ -8,8 +8,8 @@ SET optimize_if_transform_strings_to_enum = 1; SELECT any(number + 1) FROM numbers(1); SELECT uniq(bitNot(number)) FROM numbers(1); SELECT sum(number + 1) FROM numbers(1); -SELECT transform(number, [1, 2], ['google', 'yandex'], 'other') FROM numbers(1); -SELECT number > 0 ? 'yandex' : 'google' FROM numbers(1); +SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM numbers(1); +SELECT number > 0 ? 'censor.net' : 'google' FROM numbers(1); DROP TABLE IF EXISTS local_table; @@ -23,8 +23,8 @@ INSERT INTO local_table SELECT number FROM numbers(1); SELECT any(number + 1) FROM dist; SELECT uniq(bitNot(number)) FROM dist; SELECT sum(number + 1) FROM dist; -SELECT transform(number, [1, 2], ['google', 'yandex'], 'other') FROM dist; -SELECT number > 0 ? 'yandex' : 'google' FROM dist; +SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM dist; +SELECT number > 0 ? 'censor.net' : 'google' FROM dist; DROP TABLE local_table; DROP TABLE dist; diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql index e1922975a2a..7ec9e1a3158 100644 --- a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql +++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql @@ -8,23 +8,23 @@ SET optimize_move_to_prewhere = 1; SET optimize_substitute_columns = 1; SET optimize_append_index = 1; -CREATE TABLE constraint_test_assumption (URL String, a Int32, CONSTRAINT c1 ASSUME domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT c2 ASSUME URL > 'zzz' AND startsWith(URL, 'test') = True) ENGINE = TinyLog; +CREATE TABLE constraint_test_assumption (URL String, a Int32, CONSTRAINT c1 ASSUME domainWithoutWWW(URL) = 'bigmir.net', CONSTRAINT c2 ASSUME URL > 'zzz' AND startsWith(URL, 'test') = True) ENGINE = TinyLog; --- Add wrong rows in order to check optimization INSERT INTO constraint_test_assumption (URL, a) VALUES ('1', 1); INSERT INTO constraint_test_assumption (URL, a) VALUES ('2', 2); -INSERT INTO constraint_test_assumption (URL, a) VALUES ('yandex.ru', 3); +INSERT INTO constraint_test_assumption (URL, a) VALUES ('bigmir.net', 3); INSERT INTO constraint_test_assumption (URL, a) VALUES ('3', 4); -SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'yandex.ru'; --- assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE NOT (domainWithoutWWW(URL) = 'yandex.ru'); --- assumption -> 0 -SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) != 'yandex.ru'; --- assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'bigmir.net'; --- assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE NOT (domainWithoutWWW(URL) = 'bigmir.net'); --- assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) != 'bigmir.net'; --- assumption -> 0 SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'nothing'; --- not optimized -> 0 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz'); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND NOT URL <= 'zzz'); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz') OR (a = 10 AND a + 5 < 100); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL = '111'); ---> assumption & no assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL > 'zzz'); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND NOT URL <= 'zzz'); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL > 'zzz') OR (a = 10 AND a + 5 < 100); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL = '111'); ---> assumption & no assumption -> 0 SELECT count() FROM constraint_test_assumption WHERE (startsWith(URL, 'test') = True); ---> assumption -> 4 DROP TABLE constraint_test_assumption; diff --git a/tests/queries/0_stateless/01661_referer.reference b/tests/queries/0_stateless/01661_referer.reference index 49d29c80f17..d247c53413e 100644 --- a/tests/queries/0_stateless/01661_referer.reference +++ b/tests/queries/0_stateless/01661_referer.reference @@ -1,2 +1,2 @@ 1 -https://yandex.ru/ +https://meta.ua/ diff --git a/tests/queries/0_stateless/01661_referer.sh b/tests/queries/0_stateless/01661_referer.sh index 8123c925454..0299ee063ea 100755 --- a/tests/queries/0_stateless/01661_referer.sh +++ b/tests/queries/0_stateless/01661_referer.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT 1' --referer 'https://yandex.ru/' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT 1' --referer 'https://meta.ua/' ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CLIENT} --query "SELECT http_referer FROM system.query_log WHERE current_database = currentDatabase() AND http_referer LIKE '%yandex%' LIMIT 1" +${CLICKHOUSE_CLIENT} --query "SELECT http_referer FROM system.query_log WHERE current_database = currentDatabase() AND http_referer LIKE '%meta%' LIMIT 1" diff --git a/tests/queries/0_stateless/01733_transform_ubsan.sql b/tests/queries/0_stateless/01733_transform_ubsan.sql index 256603e9087..7c3d8ef653a 100644 --- a/tests/queries/0_stateless/01733_transform_ubsan.sql +++ b/tests/queries/0_stateless/01733_transform_ubsan.sql @@ -1,4 +1,4 @@ -SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['yandex', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '') +SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['censor.net', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '') FROM system.numbers LIMIT 1025 FORMAT Null; diff --git a/tests/queries/0_stateless/01889_tokenize.reference b/tests/queries/0_stateless/01889_tokenize.reference index 4dd6f323929..2861a183c33 100644 --- a/tests/queries/0_stateless/01889_tokenize.reference +++ b/tests/queries/0_stateless/01889_tokenize.reference @@ -1,8 +1,8 @@ ['It','is','quite','a','wonderful','day','isn','t','it'] ['There','is','so','much','to','learn'] -['22','00','email','yandex','ru'] +['22','00','email','tut','by'] ['Токенизация','каких','либо','других','языков'] ['It','is','quite','a','wonderful','day,','isn\'t','it?'] ['There','is....','so','much','to','learn!'] -['22:00','email@yandex.ru'] +['22:00','email@tut.by'] ['Токенизация','каких-либо','других','языков?'] diff --git a/tests/queries/0_stateless/01889_tokenize.sql b/tests/queries/0_stateless/01889_tokenize.sql index c9d29a8632b..287e439d2ce 100644 --- a/tests/queries/0_stateless/01889_tokenize.sql +++ b/tests/queries/0_stateless/01889_tokenize.sql @@ -2,10 +2,10 @@ SET allow_experimental_nlp_functions = 1; SELECT splitByNonAlpha('It is quite a wonderful day, isn\'t it?'); SELECT splitByNonAlpha('There is.... so much to learn!'); -SELECT splitByNonAlpha('22:00 email@yandex.ru'); +SELECT splitByNonAlpha('22:00 email@tut.by'); SELECT splitByNonAlpha('Токенизация каких-либо других языков?'); SELECT splitByWhitespace('It is quite a wonderful day, isn\'t it?'); SELECT splitByWhitespace('There is.... so much to learn!'); -SELECT splitByWhitespace('22:00 email@yandex.ru'); +SELECT splitByWhitespace('22:00 email@tut.by'); SELECT splitByWhitespace('Токенизация каких-либо других языков?'); diff --git a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh index e7871454192..7740ffcce7b 100755 --- a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh +++ b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh @@ -66,8 +66,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # frame #11: 0x000000000fffdfc4 clickhouse`main(argc_=, argv_=) + 356 at main.cpp:366 # frame #12: 0x00007ffff7de6d0a libc.so.6`__libc_start_main(main=(clickhouse`main at main.cpp:339), argc=7, argv=0x00007fffffffe1e8, init=, fini=, rtld_fini=, stack_end=0x00007fffffffe1d8) + 234 at libc-start.c:308 # frame #13: 0x000000000ffdc0aa clickhouse`_start + 42 -# -# [1]: https://clickhouse-test-reports.s3.yandex.net/26656/f17ca450ac991603e6400c7caef49c493ac69739/functional_stateless_tests_(ubsan).html#fail1 # Limit number of files to 50, and we will get EMFILE for some of socket() prlimit --nofile=50 $CLICKHOUSE_BENCHMARK --iterations 1 --concurrency 50 --query 'select 1' 2>&1 From 8bf8632cd8c3715a8a392296fd335f86deee744e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 24 Mar 2022 07:01:20 +0300 Subject: [PATCH 111/132] Revert "Decrease data_type_max_parse_depth a little to avoid stack overflow in coroutines" --- src/DataTypes/DataTypeFactory.cpp | 2 +- .../0_stateless/01675_data_type_coroutine.reference | 1 - .../queries/0_stateless/01675_data_type_coroutine.sh | 12 ------------ 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index e1567d3a1b0..ce501f4333d 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -33,7 +33,7 @@ DataTypePtr DataTypeFactory::get(const String & full_name) const /// Value 315 is known to cause stack overflow in some test configurations (debug build, sanitizers) /// let's make the threshold significantly lower. /// It is impractical for user to have complex data types with this depth. - static constexpr size_t data_type_max_parse_depth = 150; + static constexpr size_t data_type_max_parse_depth = 200; ParserDataType parser; ASTPtr ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", 0, data_type_max_parse_depth); diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference index 541dab48def..7326d960397 100644 --- a/tests/queries/0_stateless/01675_data_type_coroutine.reference +++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference @@ -1,2 +1 @@ Ok -Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh index 9ae6dadd1dc..8e80d722a4c 100755 --- a/tests/queries/0_stateless/01675_data_type_coroutine.sh +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -16,15 +16,3 @@ done #echo "I = ${I}" echo 'Ok' - -counter=0 -I=0 -while [[ $counter -lt $retries ]]; do - I=$((I + 1)) - TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") - ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; - ((++counter)) -done - -#echo "I = ${I}" -echo 'Ok' From 6a8bb34b41fbf92ed09b146c1c857425102cbcf6 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 24 Mar 2022 17:17:25 +0800 Subject: [PATCH 112/132] update doc of hive --- docs/en/engines/table-engines/integrations/hive.md | 2 +- docs/zh/engines/table-engines/integrations/hive.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index b804b9c2279..61147467690 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -137,7 +137,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index aa2c82d902a..24e0834d2fc 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -140,7 +140,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` From b3c021ec97d3fa77252bf81fecab3b055fddd177 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 24 Mar 2022 13:53:06 +0300 Subject: [PATCH 113/132] Update test.py --- tests/integration/test_s3_zero_copy_replication/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index d7aa4feb1d2..22334b0803e 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -361,6 +361,7 @@ def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations): ) node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") + node1.query("SYSTEM SYNC REPLICA ttl_delete_test") node2.query("SYSTEM SYNC REPLICA ttl_delete_test") if large_data: From 4ad4d4b38c1b1aed0661e5c776606ad2af0005e1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 24 Mar 2022 12:23:14 +0100 Subject: [PATCH 114/132] apply formatter --- .../test.py | 417 ++++++++++++++++-- 1 file changed, 372 insertions(+), 45 deletions(-) diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index 6a49e4a4dde..56af7513389 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -2,26 +2,35 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager -cluster = ClickHouseCluster(__file__, zookeeper_config_path='configs/zookeeper_load_balancing.xml') +cluster = ClickHouseCluster( + __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" +) # use 3-letter hostnames, so getHostNameDifference("nod1", "zoo1") will work as expected -node1 = cluster.add_instance('nod1', with_zookeeper=True, - main_configs=["configs/zookeeper_load_balancing.xml"]) -node2 = cluster.add_instance('nod2', with_zookeeper=True, - main_configs=["configs/zookeeper_load_balancing.xml"]) -node3 = cluster.add_instance('nod3', with_zookeeper=True, - main_configs=["configs/zookeeper_load_balancing.xml"]) +node1 = cluster.add_instance( + "nod1", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) +node2 = cluster.add_instance( + "nod2", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) +node3 = cluster.add_instance( + "nod3", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) def change_balancing(old, new, reload=True): - line = '{}<' + line = "{}<" old_line = line.format(old) new_line = line.format(new) for node in [node1, node2, node3]: - node.replace_in_config('/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml', old_line, new_line) + node.replace_in_config( + "/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml", + old_line, + new_line, + ) if reload: node.query("select '{}', '{}'".format(old, new)) - node.query('system reload config') + node.query("system reload config") @pytest.fixture(scope="module") @@ -35,66 +44,384 @@ def started_cluster(): def test_first_or_random(started_cluster): try: - change_balancing('random', 'first_or_random') - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + change_balancing("random", "first_or_random") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) finally: - change_balancing('first_or_random', 'random', reload=False) + change_balancing("first_or_random", "random", reload=False) def test_in_order(started_cluster): try: - change_balancing('random', 'in_order') - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + change_balancing("random", "in_order") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) finally: - change_balancing('in_order', 'random', reload=False) + change_balancing("in_order", "random", reload=False) def test_nearest_hostname(started_cluster): try: - change_balancing('random', 'nearest_hostname') - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + change_balancing("random", "nearest_hostname") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) finally: - change_balancing('nearest_hostname', 'random', reload=False) + change_balancing("nearest_hostname", "random", reload=False) def test_round_robin(started_cluster): pm = PartitionManager() try: - pm._add_rule({"source": node1.ip_address, "destination": cluster.get_instance_ip('zoo1'), "action": 'REJECT --reject-with tcp-reset'}) - pm._add_rule({"source": node2.ip_address, "destination": cluster.get_instance_ip('zoo1'), "action": 'REJECT --reject-with tcp-reset'}) - pm._add_rule({"source": node3.ip_address, "destination": cluster.get_instance_ip('zoo1'), "action": 'REJECT --reject-with tcp-reset'}) - change_balancing('random', 'round_robin') + pm._add_rule( + { + "source": node1.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "source": node2.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "source": node3.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + change_balancing("random", "round_robin") - print(str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node1.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node2.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) - print(str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED"], privileged=True, user='root'))) - assert '1' == str(node3.exec_in_container(['bash', '-c', "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l"], privileged=True, user='root')).strip() + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) finally: pm.heal_all() - change_balancing('round_robin', 'random', reload=False) + change_balancing("round_robin", "random", reload=False) From 37286c6141aba20e62d3852b5e2bf5edf1533f67 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 24 Mar 2022 12:55:56 +0100 Subject: [PATCH 115/132] Increase fiber stack size a bit in attempt to fix stack overflow in tests with address sanitizer --- src/Common/FiberStack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 29f84ee6d85..2cc301dcc62 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -32,7 +32,7 @@ public: /// /// Current value is just enough for all tests in our CI. It's not selected in some special /// way. We will have 40 pages with 4KB page size. - static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests + static constexpr size_t default_stack_size = 256 * 1024; /// 64KB was not enough for tests explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) { From 65598f40b26da6a066a97143648267bad4d4da60 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 24 Mar 2022 13:39:52 +0100 Subject: [PATCH 116/132] Don't run 01318_long_unsuccessful_mutation_zookeeper test in backward compatibility check --- .../0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index 9a4df37d206..34fa822b6ea 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper, no-parallel, no-backward-compatibility-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From a4e8e940bc166744a8026d517d647527196def88 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 24 Mar 2022 13:45:04 +0100 Subject: [PATCH 117/132] Fixed tests --- tests/queries/0_stateless/02118_deserialize_whole_text.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index fe9256df329..e9f35582f15 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -41,16 +41,16 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | echo "[\"255.255.255.255trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "[\"2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash\"]" > $DATA_FILE +echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash" > $DATA_FILE +echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash" > $DATA_FILE +echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "[\"2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash\"]" > $DATA_FILE +echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"{1:2, 2:3}trash\"]" > $DATA_FILE From 98be162d20e4b936146c298fa78d990bc64016c9 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 24 Mar 2022 13:49:26 +0100 Subject: [PATCH 118/132] Update comment --- src/Common/FiberStack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 2cc301dcc62..d55b0fa691c 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -31,7 +31,7 @@ public: /// probably it worth to try to increase stack size for coroutines. /// /// Current value is just enough for all tests in our CI. It's not selected in some special - /// way. We will have 40 pages with 4KB page size. + /// way. We will have 64 pages with 4KB page size. static constexpr size_t default_stack_size = 256 * 1024; /// 64KB was not enough for tests explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) From 3516eb05246a3b8cdc1967baffcfeae186349846 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Mar 2022 15:37:53 +0100 Subject: [PATCH 119/132] Make GITHUB_RUN_URL variable and use it --- tests/ci/ast_fuzzer_check.py | 9 ++++----- tests/ci/build_report_check.py | 6 ++---- tests/ci/env_helper.py | 3 ++- tests/ci/finish_check.py | 4 ++-- tests/ci/performance_comparison_check.py | 9 +++++---- tests/ci/pr_info.py | 8 ++++---- tests/ci/run_check.py | 4 ++-- tests/ci/upload_result_helper.py | 4 ++-- 8 files changed, 23 insertions(+), 24 deletions(-) diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index c330d1c725b..94f5eff51d7 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -9,11 +9,10 @@ from github import Github from env_helper import ( GITHUB_REPOSITORY, - TEMP_PATH, - REPO_COPY, + GITHUB_RUN_URL, REPORTS_PATH, - GITHUB_SERVER_URL, - GITHUB_RUN_ID, + REPO_COPY, + TEMP_PATH, ) from s3_helper import S3Helper from get_robot_token import get_best_robot_token @@ -126,7 +125,7 @@ if __name__ == "__main__": logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" - report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + report_url = GITHUB_RUN_URL if paths["runlog.log"]: report_url = paths["runlog.log"] if paths["main.log"]: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1cee5fd42de..5afe2991073 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -11,7 +11,7 @@ from env_helper import ( TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, - GITHUB_RUN_ID, + GITHUB_RUN_URL, ) from report import create_build_html_report from s3_helper import S3Helper @@ -180,9 +180,7 @@ if __name__ == "__main__": branch_name = "PR #{}".format(pr_info.number) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}" - task_url = ( - f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}" - ) + task_url = GITHUB_RUN_URL report = create_build_html_report( build_check_name, build_results, diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 90178e5c56a..c34162ba51a 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -7,9 +7,10 @@ CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") -GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID") +GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) +GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" IMAGES_PATH = os.getenv("IMAGES_PATH") REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports") REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../")) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 79cea83b1c8..289e32406ef 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -2,7 +2,7 @@ import logging from github import Github -from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -33,7 +33,7 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + url = GITHUB_RUN_URL statuses = filter_statuses(list(commit.get_statuses())) if NAME in statuses and statuses[NAME].state == "pending": commit.create_status( diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 761b1ac9257..c6ce86b2ce1 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -11,6 +11,7 @@ import re from github import Github +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from s3_helper import S3Helper from get_robot_token import get_best_robot_token @@ -88,9 +89,9 @@ if __name__ == "__main__": else: pr_link = f"https://github.com/ClickHouse/ClickHouse/pull/{pr_info.number}" - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - docker_env += ' -e CHPC_ADD_REPORT_LINKS="Job (actions) Tested commit"'.format( - task_url, pr_link + docker_env += ( + f' -e CHPC_ADD_REPORT_LINKS="' + f'Job (actions) Tested commit"' ) if "RUN_BY_HASH_TOTAL" in os.environ: @@ -199,7 +200,7 @@ if __name__ == "__main__": status = "failure" message = "No message in report." - report_url = task_url + report_url = GITHUB_RUN_URL if paths["runlog.log"]: report_url = paths["runlog.log"] diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 64e22712059..ee4399792ae 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -8,7 +8,7 @@ from build_download_helper import get_with_retries from env_helper import ( GITHUB_REPOSITORY, GITHUB_SERVER_URL, - GITHUB_RUN_ID, + GITHUB_RUN_URL, GITHUB_EVENT_PATH, ) @@ -111,7 +111,7 @@ class PRInfo: self.sha = github_event["pull_request"]["head"]["sha"] repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.repo_full_name = GITHUB_REPOSITORY self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" @@ -142,7 +142,7 @@ class PRInfo: self.sha = github_event["after"] pull_request = get_pr_for_commit(self.sha, github_event["ref"]) repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY if pull_request is None or pull_request["state"] == "closed": @@ -180,7 +180,7 @@ class PRInfo: self.number = 0 self.labels = {} repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY self.pr_html_url = f"{repo_prefix}/commits/{ref}" diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 5b89082532d..9c7ba13f8e4 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,7 +5,7 @@ import re from typing import Tuple from github import Github -from env_helper import GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL +from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -231,7 +231,7 @@ if __name__ == "__main__": ) sys.exit(1) - url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + url = GITHUB_RUN_URL if not can_run: print("::notice ::Cannot run") commit.create_status( diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index f7b74e8d5dd..289fc4b3184 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -2,7 +2,7 @@ import os import logging import ast -from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID +from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_URL from report import ReportColorTheme, create_test_html_report @@ -66,7 +66,7 @@ def upload_results( branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}" - task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + task_url = GITHUB_RUN_URL if additional_urls: raw_log_url = additional_urls[0] From f07918c6590792ec3137219ca641305fd9d53a0f Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 24 Mar 2022 15:50:32 +0100 Subject: [PATCH 120/132] Increase stack size --- src/Common/FiberStack.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index d55b0fa691c..c55608311d0 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -31,8 +31,8 @@ public: /// probably it worth to try to increase stack size for coroutines. /// /// Current value is just enough for all tests in our CI. It's not selected in some special - /// way. We will have 64 pages with 4KB page size. - static constexpr size_t default_stack_size = 256 * 1024; /// 64KB was not enough for tests + /// way. We will have 80 pages with 4KB page size. + static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) { From d6fc6b9c447d307cf359e35698bd90075a2f1bfc Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Mar 2022 15:40:52 +0100 Subject: [PATCH 121/132] Add build-url label to built docker images --- tests/ci/docker_images_check.py | 3 ++- tests/ci/docker_test.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 818478f6430..3d0cc468aec 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -11,7 +11,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union from github import Github -from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP +from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL from s3_helper import S3Helper from pr_info import PRInfo from get_robot_token import get_best_robot_token, get_parameter_from_ssm @@ -234,6 +234,7 @@ def build_and_push_one_image( with open(build_log, "wb") as bl: cmd = ( "docker buildx build --builder default " + f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" f"--build-arg BUILDKIT_INLINE_CACHE=1 " f"--tag {image.repo}:{version_string} " diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 27bfe07db53..2b864b6b94c 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -4,6 +4,7 @@ import os import unittest from unittest.mock import patch +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo import docker_images_check as di @@ -117,7 +118,8 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default --build-arg FROM_TAG=version " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + "--build-arg FROM_TAG=version " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version --cache-from " "type=registry,ref=name:version --push --progress plain path", mock_popen.call_args.args, @@ -133,7 +135,8 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default --build-arg FROM_TAG=version2 " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + "--build-arg FROM_TAG=version2 " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " "type=registry,ref=name:version2 --progress plain path", mock_popen.call_args.args, @@ -149,7 +152,7 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " "type=registry,ref=name:version2 --progress plain path", mock_popen.call_args.args, From b2863d4cea139382730897bd11855e92f48cb1ae Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Mar 2022 15:49:06 +0100 Subject: [PATCH 122/132] Rebuild docs-check for a test --- docker/docs/check/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/docs/check/Dockerfile b/docker/docs/check/Dockerfile index 174be123eed..4eb03a91e7a 100644 --- a/docker/docs/check/Dockerfile +++ b/docker/docs/check/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #33610 # docker build -t clickhouse/docs-check . ARG FROM_TAG=latest FROM clickhouse/docs-builder:$FROM_TAG From e2f4546f85d7cda7b0fcc93bf616a24dafb7bfc4 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Mar 2022 16:59:09 +0100 Subject: [PATCH 123/132] Pin jinja2 to 3.0.3 to have working jinja2.contextfilter --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 8bf1a5f477c..c48a70b0909 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -10,7 +10,7 @@ cssmin==0.2.0 future==0.18.2 htmlmin==0.1.12 idna==2.10 -Jinja2>=3.0.3 +Jinja2==3.0.3 jinja2-highlight==0.6.1 jsmin==3.0.0 livereload==2.6.3 From d90627e82fd9f01e9ec3e41a753d6ffd92967d51 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Mar 2022 17:50:31 +0100 Subject: [PATCH 124/132] Fix version string update, fix #35518 --- tests/ci/version_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 02e22ee0c4d..3bb547333e7 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -238,7 +238,7 @@ def _update_dockerfile(repo_path: str, version: ClickHouseVersion): def update_version_local(repo_path, version, version_type="testing"): update_contributors() version.with_description(version_type) - update_cmake_version(version, version_type) + update_cmake_version(version) _update_changelog(repo_path, version) _update_dockerfile(repo_path, version) From d910357e6958be848c897962d032e00997f7ab4b Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 24 Mar 2022 20:25:16 +0300 Subject: [PATCH 125/132] Update test.py --- tests/integration/test_s3_zero_copy_replication/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index 22334b0803e..1ce1047ebec 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -361,6 +361,7 @@ def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations): ) node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") + node1.query("SYSTEM SYNC REPLICA ttl_delete_test") node2.query("SYSTEM SYNC REPLICA ttl_delete_test") From c63bc052929cc133bccf3f0747a1ae9cbc8b8878 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 24 Mar 2022 19:32:14 +0000 Subject: [PATCH 126/132] Add test --- .../0_stateless/01675_data_type_coroutine.reference | 1 + .../queries/0_stateless/01675_data_type_coroutine.sh | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference index 7326d960397..541dab48def 100644 --- a/tests/queries/0_stateless/01675_data_type_coroutine.reference +++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference @@ -1 +1,2 @@ Ok +Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh index 8e80d722a4c..98a3d351cd2 100755 --- a/tests/queries/0_stateless/01675_data_type_coroutine.sh +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -6,6 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) counter=0 retries=60 + I=0 while [[ $counter -lt $retries ]]; do I=$((I + 1)) @@ -14,5 +15,16 @@ while [[ $counter -lt $retries ]]; do ((++counter)) done +echo 'Ok' + +counter=0 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") + ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; + ((++counter)) +done + #echo "I = ${I}" echo 'Ok' From a2c4073ca1d658f87c271876b3007975896b6901 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 24 Mar 2022 22:48:24 +0300 Subject: [PATCH 127/132] [docs] remove Metrika counter --- website/js/base.js | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/website/js/base.js b/website/js/base.js index 6704231c69d..a2356cb565c 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -70,15 +70,6 @@ (function (d, w, c) { (w[c] = w[c] || []).push(function() { var is_single_page = $('html').attr('data-single-page') === 'true'; - try { - w.yaCounter18343495 = new Ya.Metrika2({ - id: 18343495, - clickmap: !is_single_page, - trackLinks: !is_single_page, - accurateTrackBounce: !is_single_page, - webvisor: !is_single_page - }); - } catch(e) { } if (!is_single_page) { $('head').each(function(_, element) { @@ -91,20 +82,6 @@ }); } }); - - var n = d.getElementsByTagName("script")[0], - s = d.createElement("script"), - f = function () { n.parentNode.insertBefore(s, n); }; - s.type = "text/javascript"; - s.async = true; - s.src = "/js/metrika.js"; - if (window.location.hostname.endsWith('clickhouse.com')) { - if (w.opera == "[object Opera]") { - d.addEventListener("DOMContentLoaded", f, false); - } else { - f(); - } - } })(document, window, "yandex_metrika_callbacks2"); var beforePrint = function() { From 123ea5117fb2c169c017e6fccd3f3ffd1d86a75a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 24 Mar 2022 22:55:26 +0300 Subject: [PATCH 128/132] Update base.js --- website/js/base.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/js/base.js b/website/js/base.js index a2356cb565c..9389028f1ef 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -82,7 +82,7 @@ }); } }); - })(document, window, "yandex_metrika_callbacks2"); + })(document, window, ""); var beforePrint = function() { var details = document.getElementsByTagName("details"); From aedea58741f710c9cf26ff5d6dca75952412b8c8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 24 Mar 2022 22:10:20 +0100 Subject: [PATCH 129/132] Mark test as long --- tests/queries/0_stateless/01675_data_type_coroutine.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh index 98a3d351cd2..9f7d5401bd2 100755 --- a/tests/queries/0_stateless/01675_data_type_coroutine.sh +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From f133531bc0a886c4b3b43374d906127cbba29b0f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 24 Mar 2022 22:19:30 +0100 Subject: [PATCH 130/132] Functions cast into IPv4, IPV6 add backward incompatible section into changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61724ab2d0c..100b03ab92b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). * Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). #### New Feature @@ -366,7 +367,7 @@ #### Improvement -* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. +* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. * Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). * If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). From 572b15a63c3abc5096f80801cf92c64c1d235118 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 24 Mar 2022 14:32:43 +0100 Subject: [PATCH 131/132] Fix paths and some cmake typos --- CMakeLists.txt | 2 +- cmake/strip_binary.cmake | 6 +++--- packages/clickhouse-common-static-dbg.yaml | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ed3872fd6e..deef582c790 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) endif () # Allows to build stripped binary in a separate directory -if (OBJCOPY_PATH AND READELF_PATH) +if (OBJCOPY_PATH AND STRIP_PATH) option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) if (INSTALL_STRIPPED_BINARIES) set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index 1547a814913..1f24790a159 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -22,12 +22,12 @@ macro(clickhouse_strip_binary) COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" - COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_BINDIR}/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse) endmacro() @@ -49,5 +49,5 @@ macro(clickhouse_make_empty_debug_info_for_nfpm) COMMENT "Addiding empty debug info for NFPM" VERBATIM ) - install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug" COMPONENT clickhouse) + install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse) endmacro() diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index 349f9ec0c47..12a1594bd30 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -21,12 +21,12 @@ description: | This package contains the debugging symbols for clickhouse-common. contents: -- src: root/usr/lib/debug/clickhouse.debug - dst: /usr/lib/debug/clickhouse.debug -- src: root/usr/lib/debug/clickhouse-odbc-bridge.debug - dst: /usr/lib/debug/clickhouse-odbc-bridge.debug -- src: root/usr/lib/debug/clickhouse-library-bridge.debug - dst: /usr/lib/debug/clickhouse-library-bridge.debug +- src: root/usr/lib/debug/usr/bin/clickhouse.debug + dst: /usr/lib/debug/usr/bin/clickhouse.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS From 64f79f0c69699a66fe69f6c806bedef9d0dead68 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Fri, 25 Mar 2022 00:14:26 +0100 Subject: [PATCH 132/132] Added an ability to specify cluster secret in replicated database (#35333) --- src/Databases/DatabaseReplicated.cpp | 29 +++++++++++++++++----- src/Databases/DatabaseReplicated.h | 10 ++++++++ src/Databases/DatabaseReplicatedSettings.h | 7 +++--- src/Interpreters/Cluster.cpp | 22 +++++++++++++--- src/Interpreters/Cluster.h | 8 ++++-- 5 files changed, 61 insertions(+), 15 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d9d9f5b45f6..0c3cc56c061 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated( /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it. if (zookeeper_path.front() != '/') zookeeper_path = "/" + zookeeper_path; + + if (!db_settings.collection_name.value.empty()) + fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef()); } String DatabaseReplicated::getFullReplicaName() const @@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const shards.back().emplace_back(unescapeForFileName(host_port)); } - String username = db_settings.cluster_username; - String password = db_settings.cluster_password; UInt16 default_port = getContext()->getTCPPort(); - bool secure = db_settings.cluster_secure_connection; bool treat_local_as_remote = false; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; return std::make_shared( getContext()->getSettingsRef(), shards, - username, - password, + cluster_auth_info.cluster_username, + cluster_auth_info.cluster_password, default_port, treat_local_as_remote, treat_local_port_as_remote, - secure); + cluster_auth_info.cluster_secure_connection, + /*priority=*/1, + database_name, + cluster_auth_info.cluster_secret); +} + + +void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref) +{ + const auto & config_prefix = fmt::format("named_collections.{}", collection_name); + + if (!config_ref.has(config_prefix)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); + + cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", ""); + cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", ""); + cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", ""); + cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false); } void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index fcb8a2c4d33..ac212e168b8 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -75,6 +75,16 @@ private: bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); + struct + { + String cluster_username{"default"}; + String cluster_password; + String cluster_secret; + bool cluster_secure_connection{false}; + } cluster_auth_info; + + void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config); + void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr); diff --git a/src/Databases/DatabaseReplicatedSettings.h b/src/Databases/DatabaseReplicatedSettings.h index 0aff26712c0..8bed1ababf6 100644 --- a/src/Databases/DatabaseReplicatedSettings.h +++ b/src/Databases/DatabaseReplicatedSettings.h @@ -8,12 +8,11 @@ namespace DB class ASTStorage; #define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \ - M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ + M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \ M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ - M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \ - M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \ - M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \ + M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \ + DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index d558d1cfd67..1039fac6883 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -132,7 +132,9 @@ Cluster::Address::Address( bool secure_, Int64 priority_, UInt32 shard_index_, - UInt32 replica_index_) + UInt32 replica_index_, + String cluster_name_, + String cluster_secret_) : user(user_), password(password_) { bool can_be_local = true; @@ -164,6 +166,8 @@ Cluster::Address::Address( is_local = can_be_local && isLocal(clickhouse_port); shard_index = shard_index_; replica_index = replica_index_; + cluster = cluster_name_; + cluster_secret = cluster_secret_; } @@ -537,10 +541,14 @@ Cluster::Cluster( bool treat_local_as_remote, bool treat_local_port_as_remote, bool secure, - Int64 priority) + Int64 priority, + String cluster_name, + String cluster_secret) { UInt32 current_shard_num = 1; + secret = cluster_secret; + for (const auto & shard : names) { Addresses current; @@ -554,7 +562,9 @@ Cluster::Cluster( secure, priority, current_shard_num, - current.size() + 1); + current.size() + 1, + cluster_name, + cluster_secret); addresses_with_failover.emplace_back(current); @@ -690,6 +700,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti } } + secret = from.secret; + name = from.name; + initMisc(); } @@ -704,6 +717,9 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); } + secret = from.secret; + name = from.name; + initMisc(); } diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index e9f26c21089..13f19f7c0ed 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -55,7 +55,9 @@ public: bool treat_local_as_remote, bool treat_local_port_as_remote, bool secure = false, - Int64 priority = 1); + Int64 priority = 1, + String cluster_name = "", + String cluster_secret = ""); Cluster(const Cluster &)= delete; Cluster & operator=(const Cluster &) = delete; @@ -127,7 +129,9 @@ public: bool secure_ = false, Int64 priority_ = 1, UInt32 shard_index_ = 0, - UInt32 replica_index_ = 0); + UInt32 replica_index_ = 0, + String cluster_name = "", + String cluster_secret_ = ""); /// Returns 'escaped_host_name:port' String toString() const;