diff --git a/.gitmodules b/.gitmodules index ab7c8a7c94d..0a7a6b4a3f9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -103,7 +103,7 @@ url = https://github.com/ClickHouse-Extras/fastops [submodule "contrib/orc"] path = contrib/orc - url = https://github.com/apache/orc + url = https://github.com/ClickHouse-Extras/orc [submodule "contrib/sparsehash-c11"] path = contrib/sparsehash-c11 url = https://github.com/sparsehash/sparsehash-c11.git @@ -210,9 +210,6 @@ [submodule "contrib/fast_float"] path = contrib/fast_float url = https://github.com/fastfloat/fast_float -[submodule "contrib/libpqxx"] - path = contrib/libpqxx - url = https://github.com/jtv/libpqxx [submodule "contrib/libpq"] path = contrib/libpq url = https://github.com/ClickHouse-Extras/libpq @@ -231,3 +228,6 @@ [submodule "contrib/yaml-cpp"] path = contrib/yaml-cpp url = https://github.com/ClickHouse-Extras/yaml-cpp.git +[submodule "contrib/libpqxx"] + path = contrib/libpqxx + url = https://github.com/ClickHouse-Extras/libpqxx.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d37fe182f9..8987082db30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,6 @@ #### Upgrade Notes -* One bug has been found after release: [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187). -* Do not upgrade if you have partition key with `UUID`. * `zstd` compression library is updated to v1.5.0. You may get messages about "checksum does not match" in replication. These messages are expected due to update of compression algorithm and you can ignore them. These messages are informational and do not indicate any kinds of undesired behaviour. * The setting `compile_expressions` is enabled by default. Although it has been heavily tested on variety of scenarios, if you find some undesired behaviour on your servers, you can try turning this setting off. * Values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`. diff --git a/CMakeLists.txt b/CMakeLists.txt index ce0f58e2521..9cf8188cc8e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -183,24 +183,20 @@ endif () # Make sure the final executable has symbols exported set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") -if (OS_LINUX) - find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") - if (OBJCOPY_PATH) - message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") - - if (ARCH_AMD64) - set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386) - elseif (ARCH_AARCH64) - set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64) - endif () - else () - message(FATAL_ERROR "Cannot find objcopy.") - endif () +find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy") +if (OBJCOPY_PATH) + message(STATUS "Using objcopy: ${OBJCOPY_PATH}.") +else () + message(FATAL_ERROR "Cannot find objcopy.") endif () if (OS_DARWIN) - set(WHOLE_ARCHIVE -all_load) - set(NO_WHOLE_ARCHIVE -noall_load) + # The `-all_load` flag forces loading of all symbols from all libraries, + # and leads to multiply-defined symbols. This flag allows force loading + # from a _specific_ library, which is what we need. + set(WHOLE_ARCHIVE -force_load) + # The `-noall_load` flag is the default and now obsolete. + set(NO_WHOLE_ARCHIVE "") else () set(WHOLE_ARCHIVE --whole-archive) set(NO_WHOLE_ARCHIVE --no-whole-archive) diff --git a/README.md b/README.md index 5677837815c..21eda470f49 100644 --- a/README.md +++ b/README.md @@ -15,4 +15,4 @@ ClickHouse® is an open-source column-oriented database management system that a * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. ## Upcoming Events -* [SF Bay Area ClickHouse Community Meetup (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/278144089/) on 16 June 2021. +* [China ClickHouse Community Meetup (online)](http://hdxu.cn/rhbfZ) on 26 June 2021. diff --git a/base/bridge/IBridge.cpp b/base/bridge/IBridge.cpp index b2ec53158b1..35a9b95c97f 100644 --- a/base/bridge/IBridge.cpp +++ b/base/bridge/IBridge.cpp @@ -1,14 +1,22 @@ #include "IBridge.h" -#include #include #include #include -#include -#include + #include +#include + +#include #include +#include +#include +#include #include +#include +#include +#include +#include #if USE_ODBC # include @@ -163,6 +171,31 @@ void IBridge::initialize(Application & self) max_server_connections = config().getUInt("max-server-connections", 1024); keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10); + struct rlimit limit; + const UInt64 gb = 1024 * 1024 * 1024; + + /// Set maximum RSS to 1 GiB. + limit.rlim_max = limit.rlim_cur = gb; + if (setrlimit(RLIMIT_RSS, &limit)) + LOG_WARNING(log, "Unable to set maximum RSS to 1GB: {} (current rlim_cur={}, rlim_max={})", + errnoToString(errno), limit.rlim_cur, limit.rlim_max); + + if (!getrlimit(RLIMIT_RSS, &limit)) + LOG_INFO(log, "RSS limit: cur={}, max={}", limit.rlim_cur, limit.rlim_max); + + try + { + const auto oom_score = toString(config().getUInt64("bridge_oom_score", 500)); + WriteBufferFromFile buf("/proc/self/oom_score_adj"); + buf.write(oom_score.data(), oom_score.size()); + buf.close(); + LOG_INFO(log, "OOM score is set to {}", oom_score); + } + catch (const Exception & e) + { + LOG_WARNING(log, "Failed to set OOM score, error: {}", e.what()); + } + initializeTerminationAndSignalProcessing(); ServerApplication::initialize(self); // NOLINT @@ -214,7 +247,7 @@ int IBridge::main(const std::vector & /*args*/) server.stop(); - for (size_t count : ext::range(1, 6)) + for (size_t count : collections::range(1, 6)) { if (server.currentConnections() == 0) break; diff --git a/base/common/DecomposedFloat.h b/base/common/DecomposedFloat.h index 078ba823c15..21034908fe7 100644 --- a/base/common/DecomposedFloat.h +++ b/base/common/DecomposedFloat.h @@ -91,10 +91,12 @@ struct DecomposedFloat /// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic. + /// This function is generic, big integers (128, 256 bit) are supported as well. /// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers. /// (note that we need total order) + /// Returns -1, 0 or 1. template - int compare(Int rhs) + int compare(Int rhs) const { if (rhs == 0) return sign(); @@ -137,10 +139,11 @@ struct DecomposedFloat if (normalized_exponent() >= static_cast(8 * sizeof(Int) - is_signed_v)) return is_negative() ? -1 : 1; - using UInt = make_unsigned_t; + using UInt = std::conditional_t<(sizeof(Int) > sizeof(typename Traits::UInt)), make_unsigned_t, typename Traits::UInt>; UInt uint_rhs = rhs < 0 ? -rhs : rhs; /// Smaller octave: abs(rhs) < abs(float) + /// FYI, TIL: octave is also called "binade", https://en.wikipedia.org/wiki/Binade if (uint_rhs < (static_cast(1) << normalized_exponent())) return is_negative() ? -1 : 1; @@ -154,11 +157,11 @@ struct DecomposedFloat bool large_and_always_integer = normalized_exponent() >= static_cast(Traits::mantissa_bits); - typename Traits::UInt a = large_and_always_integer - ? mantissa() << (normalized_exponent() - Traits::mantissa_bits) - : mantissa() >> (Traits::mantissa_bits - normalized_exponent()); + UInt a = large_and_always_integer + ? static_cast(mantissa()) << (normalized_exponent() - Traits::mantissa_bits) + : static_cast(mantissa()) >> (Traits::mantissa_bits - normalized_exponent()); - typename Traits::UInt b = uint_rhs - (static_cast(1) << normalized_exponent()); + UInt b = uint_rhs - (static_cast(1) << normalized_exponent()); if (a < b) return is_negative() ? 1 : -1; @@ -175,37 +178,37 @@ struct DecomposedFloat template - bool equals(Int rhs) + bool equals(Int rhs) const { return compare(rhs) == 0; } template - bool notEquals(Int rhs) + bool notEquals(Int rhs) const { return compare(rhs) != 0; } template - bool less(Int rhs) + bool less(Int rhs) const { return compare(rhs) < 0; } template - bool greater(Int rhs) + bool greater(Int rhs) const { return compare(rhs) > 0; } template - bool lessOrEquals(Int rhs) + bool lessOrEquals(Int rhs) const { return compare(rhs) <= 0; } template - bool greaterOrEquals(Int rhs) + bool greaterOrEquals(Int rhs) const { return compare(rhs) >= 0; } diff --git a/base/common/ReadlineLineReader.cpp b/base/common/ReadlineLineReader.cpp index 397a7dd7543..f2c2b60f327 100644 --- a/base/common/ReadlineLineReader.cpp +++ b/base/common/ReadlineLineReader.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/base/common/SimpleCache.h b/base/common/SimpleCache.h index 57247de696a..c3bf019c226 100644 --- a/base/common/SimpleCache.h +++ b/base/common/SimpleCache.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include /** The simplest cache for a free function. @@ -32,10 +32,11 @@ public: template Result operator() (Args &&... args) { + Key key{std::forward(args)...}; + { std::lock_guard lock(mutex); - Key key{std::forward(args)...}; auto it = cache.find(key); if (cache.end() != it) @@ -43,7 +44,7 @@ public: } /// The calculations themselves are not done under mutex. - Result res = f(std::forward(args)...); + Result res = std::apply(f, key); { std::lock_guard lock(mutex); @@ -57,11 +58,12 @@ public: template void update(Args &&... args) { - Result res = f(std::forward(args)...); + Key key{std::forward(args)...}; + + Result res = std::apply(f, key); + { std::lock_guard lock(mutex); - - Key key{std::forward(args)...}; cache[key] = std::move(res); } } diff --git a/base/common/arraySize.h b/base/common/arraySize.h new file mode 100644 index 00000000000..d6245257ad0 --- /dev/null +++ b/base/common/arraySize.h @@ -0,0 +1,7 @@ +#pragma once + +#include + +/** \brief Returns number of elements in an automatic array. */ +template +constexpr size_t arraySize(const T (&)[N]) noexcept { return N; } diff --git a/base/common/bit_cast.h b/base/common/bit_cast.h new file mode 100644 index 00000000000..5b4b0931b62 --- /dev/null +++ b/base/common/bit_cast.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + + +/** \brief Returns value `from` converted to type `To` while retaining bit representation. + * `To` and `From` must satisfy `CopyConstructible`. + */ +template +std::decay_t bit_cast(const From & from) +{ + To res {}; + memcpy(static_cast(&res), &from, std::min(sizeof(res), sizeof(from))); + return res; +} + +/** \brief Returns value `from` converted to type `To` while retaining bit representation. + * `To` and `From` must satisfy `CopyConstructible`. + */ +template +std::decay_t safe_bit_cast(const From & from) +{ + static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width"); + return bit_cast(from); +} diff --git a/base/common/chrono_io.h b/base/common/chrono_io.h new file mode 100644 index 00000000000..4ee8dec6634 --- /dev/null +++ b/base/common/chrono_io.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include +#include + + +inline std::string to_string(const std::time_t & time) +{ + return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone()); +} + +template +std::string to_string(const std::chrono::time_point & tp) +{ + // Don't use DateLUT because it shows weird characters for + // TimePoint::max(). I wish we could use C++20 format, but it's not + // there yet. + // return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp)); + + auto in_time_t = std::chrono::system_clock::to_time_t(tp); + return to_string(in_time_t); +} + +template > +std::string to_string(const std::chrono::duration & duration) +{ + auto seconds_as_int = std::chrono::duration_cast(duration); + if (seconds_as_int == duration) + return std::to_string(seconds_as_int.count()) + "s"; + auto seconds_as_double = std::chrono::duration_cast>(duration); + return std::to_string(seconds_as_double.count()) + "s"; +} + +template +std::ostream & operator<<(std::ostream & o, const std::chrono::time_point & tp) +{ + return o << to_string(tp); +} + +template > +std::ostream & operator<<(std::ostream & o, const std::chrono::duration & duration) +{ + return o << to_string(duration); +} diff --git a/base/ext/function_traits.h b/base/common/function_traits.h similarity index 100% rename from base/ext/function_traits.h rename to base/common/function_traits.h diff --git a/base/common/getResource.cpp b/base/common/getResource.cpp index 5d5f18047b3..6682ae0a01f 100644 --- a/base/common/getResource.cpp +++ b/base/common/getResource.cpp @@ -4,23 +4,42 @@ #include #include - std::string_view getResource(std::string_view name) { + // Convert the resource file name into the form generated by `ld -r -b binary`. std::string name_replaced(name); std::replace(name_replaced.begin(), name_replaced.end(), '/', '_'); std::replace(name_replaced.begin(), name_replaced.end(), '-', '_'); std::replace(name_replaced.begin(), name_replaced.end(), '.', '_'); boost::replace_all(name_replaced, "+", "_PLUS_"); - /// These are the names that are generated by "ld -r -b binary" - std::string symbol_name_data = "_binary_" + name_replaced + "_start"; - std::string symbol_name_size = "_binary_" + name_replaced + "_size"; + // In most `dlsym(3)` APIs, one passes the symbol name as it appears via + // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be + // looked up with the string `"_foo"`. + // + // Apple's linker is confusingly different. The NOTES on the man page for + // `dlsym(3)` claim that one looks up the symbol with "the name used in C + // source code". In this example, that would mean using the string `"foo"`. + // This apparently applies even in the case where the symbol did not originate + // from C source, such as the embedded binary resource files used here. So + // the symbol name must not have a leading `_` on Apple platforms. It's not + // clear how this applies to other symbols, such as those which _have_ a leading + // underscore in them by design, many leading underscores, etc. +#if defined OS_DARWIN + std::string prefix = "binary_"; +#else + std::string prefix = "_binary_"; +#endif + std::string symbol_name_start = prefix + name_replaced + "_start"; + std::string symbol_name_end = prefix + name_replaced + "_end"; - const void * sym_data = dlsym(RTLD_DEFAULT, symbol_name_data.c_str()); - const void * sym_size = dlsym(RTLD_DEFAULT, symbol_name_size.c_str()); + const char* sym_start = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_start.c_str())); + const char* sym_end = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_end.c_str())); - if (sym_data && sym_size) - return { static_cast(sym_data), unalignedLoad(&sym_size) }; + if (sym_start && sym_end) + { + auto resource_size = static_cast(std::distance(sym_start, sym_end)); + return { sym_start, resource_size }; + } return {}; } diff --git a/base/common/map.h b/base/common/map.h new file mode 100644 index 00000000000..043d8363619 --- /dev/null +++ b/base/common/map.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include + +namespace collections +{ + +/// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers +template +using unqualified_t = std::remove_cv_t>; + +/** \brief Returns collection of the same container-type as the input collection, + * with each element transformed by the application of `mapper`. + */ +template