diff --git a/CMakeLists.txt b/CMakeLists.txt index cb91f879d5b..a3c0f0fe863 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -169,8 +169,8 @@ endif () option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) -if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") - # Only for Linux, x86_64. +if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") + # Only for Linux, x86_64 or aarch64. option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") diff --git a/base/common/DecomposedFloat.h b/base/common/DecomposedFloat.h new file mode 100644 index 00000000000..078ba823c15 --- /dev/null +++ b/base/common/DecomposedFloat.h @@ -0,0 +1,216 @@ +#pragma once + +#include +#include +#include +#include + + +/// Allows to check the internals of IEEE-754 floating point number. + +template struct FloatTraits; + +template <> +struct FloatTraits +{ + using UInt = uint32_t; + static constexpr size_t bits = 32; + static constexpr size_t exponent_bits = 8; + static constexpr size_t mantissa_bits = bits - exponent_bits - 1; +}; + +template <> +struct FloatTraits +{ + using UInt = uint64_t; + static constexpr size_t bits = 64; + static constexpr size_t exponent_bits = 11; + static constexpr size_t mantissa_bits = bits - exponent_bits - 1; +}; + + +/// x = sign * (2 ^ normalized_exponent) * (1 + mantissa * 2 ^ -mantissa_bits) +/// x = sign * (2 ^ normalized_exponent + mantissa * 2 ^ (normalized_exponent - mantissa_bits)) +template +struct DecomposedFloat +{ + using Traits = FloatTraits; + + DecomposedFloat(T x) + { + memcpy(&x_uint, &x, sizeof(x)); + } + + typename Traits::UInt x_uint; + + bool is_negative() const + { + return x_uint >> (Traits::bits - 1); + } + + /// Returns 0 for both +0. and -0. + int sign() const + { + return (exponent() == 0 && mantissa() == 0) + ? 0 + : (is_negative() + ? -1 + : 1); + } + + uint16_t exponent() const + { + return (x_uint >> (Traits::mantissa_bits)) & (((1ull << (Traits::exponent_bits + 1)) - 1) >> 1); + } + + int16_t normalized_exponent() const + { + return int16_t(exponent()) - ((1ull << (Traits::exponent_bits - 1)) - 1); + } + + uint64_t mantissa() const + { + return x_uint & ((1ull << Traits::mantissa_bits) - 1); + } + + int64_t mantissa_with_sign() const + { + return is_negative() ? -mantissa() : mantissa(); + } + + /// NOTE Probably floating point instructions can be better. + bool is_integer_in_representable_range() const + { + return x_uint == 0 + || (normalized_exponent() >= 0 /// The number is not less than one + /// The number is inside the range where every integer has exact representation in float + && normalized_exponent() <= static_cast(Traits::mantissa_bits) + /// After multiplying by 2^exp, the fractional part becomes zero, means the number is integer + && ((mantissa() & ((1ULL << (Traits::mantissa_bits - normalized_exponent())) - 1)) == 0)); + } + + + /// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic. + /// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers. + /// (note that we need total order) + template + int compare(Int rhs) + { + if (rhs == 0) + return sign(); + + /// Different signs + if (is_negative() && rhs > 0) + return -1; + if (!is_negative() && rhs < 0) + return 1; + + /// Fractional number with magnitude less than one + if (normalized_exponent() < 0) + { + if (!is_negative()) + return rhs > 0 ? -1 : 1; + else + return rhs >= 0 ? -1 : 1; + } + + /// The case of the most negative integer + if constexpr (is_signed_v) + { + if (rhs == std::numeric_limits::lowest()) + { + assert(is_negative()); + + if (normalized_exponent() < static_cast(8 * sizeof(Int) - is_signed_v)) + return 1; + if (normalized_exponent() > static_cast(8 * sizeof(Int) - is_signed_v)) + return -1; + + if (mantissa() == 0) + return 0; + else + return -1; + } + } + + /// Too large number: abs(float) > abs(rhs). Also the case with infinities and NaN. + if (normalized_exponent() >= static_cast(8 * sizeof(Int) - is_signed_v)) + return is_negative() ? -1 : 1; + + using UInt = make_unsigned_t; + UInt uint_rhs = rhs < 0 ? -rhs : rhs; + + /// Smaller octave: abs(rhs) < abs(float) + if (uint_rhs < (static_cast(1) << normalized_exponent())) + return is_negative() ? -1 : 1; + + /// Larger octave: abs(rhs) > abs(float) + if (normalized_exponent() + 1 < static_cast(8 * sizeof(Int) - is_signed_v) + && uint_rhs >= (static_cast(1) << (normalized_exponent() + 1))) + return is_negative() ? 1 : -1; + + /// The same octave + /// uint_rhs == 2 ^ normalized_exponent + mantissa * 2 ^ (normalized_exponent - mantissa_bits) + + bool large_and_always_integer = normalized_exponent() >= static_cast(Traits::mantissa_bits); + + typename Traits::UInt a = large_and_always_integer + ? mantissa() << (normalized_exponent() - Traits::mantissa_bits) + : mantissa() >> (Traits::mantissa_bits - normalized_exponent()); + + typename Traits::UInt b = uint_rhs - (static_cast(1) << normalized_exponent()); + + if (a < b) + return is_negative() ? 1 : -1; + if (a > b) + return is_negative() ? -1 : 1; + + /// Float has no fractional part means that the numbers are equal. + if (large_and_always_integer || (mantissa() & ((1ULL << (Traits::mantissa_bits - normalized_exponent())) - 1)) == 0) + return 0; + else + /// Float has fractional part means its abs value is larger. + return is_negative() ? -1 : 1; + } + + + template + bool equals(Int rhs) + { + return compare(rhs) == 0; + } + + template + bool notEquals(Int rhs) + { + return compare(rhs) != 0; + } + + template + bool less(Int rhs) + { + return compare(rhs) < 0; + } + + template + bool greater(Int rhs) + { + return compare(rhs) > 0; + } + + template + bool lessOrEquals(Int rhs) + { + return compare(rhs) <= 0; + } + + template + bool greaterOrEquals(Int rhs) + { + return compare(rhs) >= 0; + } +}; + + +using DecomposedFloat64 = DecomposedFloat; +using DecomposedFloat32 = DecomposedFloat; diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h index c170d214636..175e75a62f4 100644 --- a/base/common/arithmeticOverflow.h +++ b/base/common/arithmeticOverflow.h @@ -56,27 +56,33 @@ namespace common } template <> - inline bool addOverflow(__int128 x, __int128 y, __int128 & res) + inline bool addOverflow(Int128 x, Int128 y, Int128 & res) { - static constexpr __int128 min_int128 = minInt128(); - static constexpr __int128 max_int128 = maxInt128(); res = addIgnoreOverflow(x, y); - return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y); + return (y > 0 && x > std::numeric_limits::max() - y) || + (y < 0 && x < std::numeric_limits::min() - y); } template <> - inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res) + inline bool addOverflow(UInt128 x, UInt128 y, UInt128 & res) { res = addIgnoreOverflow(x, y); - return (y > 0 && x > std::numeric_limits::max() - y) || - (y < 0 && x < std::numeric_limits::min() - y); + return x > std::numeric_limits::max() - y; } template <> - inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) + inline bool addOverflow(Int256 x, Int256 y, Int256 & res) { res = addIgnoreOverflow(x, y); - return x > std::numeric_limits::max() - y; + return (y > 0 && x > std::numeric_limits::max() - y) || + (y < 0 && x < std::numeric_limits::min() - y); + } + + template <> + inline bool addOverflow(UInt256 x, UInt256 y, UInt256 & res) + { + res = addIgnoreOverflow(x, y); + return x > std::numeric_limits::max() - y; } template @@ -104,24 +110,30 @@ namespace common } template <> - inline bool subOverflow(__int128 x, __int128 y, __int128 & res) + inline bool subOverflow(Int128 x, Int128 y, Int128 & res) { - static constexpr __int128 min_int128 = minInt128(); - static constexpr __int128 max_int128 = maxInt128(); res = subIgnoreOverflow(x, y); - return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y); + return (y < 0 && x > std::numeric_limits::max() + y) || + (y > 0 && x < std::numeric_limits::min() + y); } template <> - inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res) + inline bool subOverflow(UInt128 x, UInt128 y, UInt128 & res) { res = subIgnoreOverflow(x, y); - return (y < 0 && x > std::numeric_limits::max() + y) || - (y > 0 && x < std::numeric_limits::min() + y); + return x < y; } template <> - inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) + inline bool subOverflow(Int256 x, Int256 y, Int256 & res) + { + res = subIgnoreOverflow(x, y); + return (y < 0 && x > std::numeric_limits::max() + y) || + (y > 0 && x < std::numeric_limits::min() + y); + } + + template <> + inline bool subOverflow(UInt256 x, UInt256 y, UInt256 & res) { res = subIgnoreOverflow(x, y); return x < y; @@ -151,36 +163,33 @@ namespace common return __builtin_smulll_overflow(x, y, &res); } + /// Overflow check is not implemented for big integers. + template <> - inline bool mulOverflow(__int128 x, __int128 y, __int128 & res) + inline bool mulOverflow(Int128 x, Int128 y, Int128 & res) { res = mulIgnoreOverflow(x, y); - if (!x || !y) - return false; - - unsigned __int128 a = (x > 0) ? x : -x; - unsigned __int128 b = (y > 0) ? y : -y; - return mulIgnoreOverflow(a, b) / b != a; + return false; } template <> - inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res) + inline bool mulOverflow(Int256 x, Int256 y, Int256 & res) { res = mulIgnoreOverflow(x, y); - if (!x || !y) - return false; - - wInt256 a = (x > 0) ? x : -x; - wInt256 b = (y > 0) ? y : -y; - return mulIgnoreOverflow(a, b) / b != a; + return false; } template <> - inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) + inline bool mulOverflow(UInt128 x, UInt128 y, UInt128 & res) { res = mulIgnoreOverflow(x, y); - if (!x || !y) - return false; - return res / y != x; + return false; + } + + template <> + inline bool mulOverflow(UInt256 x, UInt256 y, UInt256 & res) + { + res = mulIgnoreOverflow(x, y); + return false; } } diff --git a/base/common/extended_types.h b/base/common/extended_types.h index 2ae70c0f432..79209568ef5 100644 --- a/base/common/extended_types.h +++ b/base/common/extended_types.h @@ -5,16 +5,14 @@ #include #include -using Int128 = __int128; -using wInt256 = wide::integer<256, signed>; -using wUInt256 = wide::integer<256, unsigned>; +using Int128 = wide::integer<128, signed>; +using UInt128 = wide::integer<128, unsigned>; +using Int256 = wide::integer<256, signed>; +using UInt256 = wide::integer<256, unsigned>; -static_assert(sizeof(wInt256) == 32); -static_assert(sizeof(wUInt256) == 32); - -static constexpr __int128 minInt128() { return static_cast(1) << 127; } -static constexpr __int128 maxInt128() { return (static_cast(1) << 127) - 1; } +static_assert(sizeof(Int256) == 32); +static_assert(sizeof(UInt256) == 32); /// The standard library type traits, such as std::is_arithmetic, with one exception /// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior. @@ -26,7 +24,7 @@ struct is_signed }; template <> struct is_signed { static constexpr bool value = true; }; -template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; template inline constexpr bool is_signed_v = is_signed::value; @@ -37,7 +35,8 @@ struct is_unsigned static constexpr bool value = std::is_unsigned_v; }; -template <> struct is_unsigned { static constexpr bool value = true; }; +template <> struct is_unsigned { static constexpr bool value = true; }; +template <> struct is_unsigned { static constexpr bool value = true; }; template inline constexpr bool is_unsigned_v = is_unsigned::value; @@ -51,8 +50,9 @@ struct is_integer }; template <> struct is_integer { static constexpr bool value = true; }; -template <> struct is_integer { static constexpr bool value = true; }; -template <> struct is_integer { static constexpr bool value = true; }; +template <> struct is_integer { static constexpr bool value = true; }; +template <> struct is_integer { static constexpr bool value = true; }; +template <> struct is_integer { static constexpr bool value = true; }; template inline constexpr bool is_integer_v = is_integer::value; @@ -64,7 +64,11 @@ struct is_arithmetic static constexpr bool value = std::is_arithmetic_v; }; -template <> struct is_arithmetic<__int128> { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; + template inline constexpr bool is_arithmetic_v = is_arithmetic::value; @@ -75,9 +79,10 @@ struct make_unsigned typedef std::make_unsigned_t type; }; -template <> struct make_unsigned { using type = unsigned __int128; }; -template <> struct make_unsigned { using type = wUInt256; }; -template <> struct make_unsigned { using type = wUInt256; }; +template <> struct make_unsigned { using type = UInt128; }; +template <> struct make_unsigned { using type = UInt128; }; +template <> struct make_unsigned { using type = UInt256; }; +template <> struct make_unsigned { using type = UInt256; }; template using make_unsigned_t = typename make_unsigned::type; @@ -87,8 +92,10 @@ struct make_signed typedef std::make_signed_t type; }; -template <> struct make_signed { using type = wInt256; }; -template <> struct make_signed { using type = wInt256; }; +template <> struct make_signed { using type = Int128; }; +template <> struct make_signed { using type = Int128; }; +template <> struct make_signed { using type = Int256; }; +template <> struct make_signed { using type = Int256; }; template using make_signed_t = typename make_signed::type; @@ -98,8 +105,10 @@ struct is_big_int static constexpr bool value = false; }; -template <> struct is_big_int { static constexpr bool value = true; }; -template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; template inline constexpr bool is_big_int_v = is_big_int::value; diff --git a/base/common/itoa.h b/base/common/itoa.h index a02e7b68c05..4c86239de36 100644 --- a/base/common/itoa.h +++ b/base/common/itoa.h @@ -30,9 +30,8 @@ #include #include #include +#include -using int128_t = __int128; -using uint128_t = unsigned __int128; namespace impl { @@ -106,7 +105,7 @@ using UnsignedOfSize = typename SelectType uint16_t, uint32_t, uint64_t, - uint128_t + __uint128_t >::Result; /// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in @@ -313,7 +312,8 @@ namespace convert } } -static inline int digits10(uint128_t x) +template +static inline int digits10(T x) { if (x < 10ULL) return 1; @@ -346,8 +346,11 @@ static inline int digits10(uint128_t x) return 12 + digits10(x / 1000000000000ULL); } -static inline char * writeUIntText(uint128_t x, char * p) +template +static inline char * writeUIntText(T x, char * p) { + static_assert(is_unsigned_v); + int len = digits10(x); auto pp = p + len; while (x >= 100) @@ -370,14 +373,28 @@ static inline char * writeLeadingMinus(char * pos) return pos + 1; } -static inline char * writeSIntText(int128_t x, char * pos) +template +static inline char * writeSIntText(T x, char * pos) { - static constexpr int128_t min_int128 = uint128_t(1) << 127; + static_assert(std::is_same_v || std::is_same_v); - if (unlikely(x == min_int128)) + using UnsignedT = make_unsigned_t; + static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1); + + if (unlikely(x == min_int)) { - memcpy(pos, "-170141183460469231731687303715884105728", 40); - return pos + 40; + if constexpr (std::is_same_v) + { + const char * res = "-170141183460469231731687303715884105728"; + memcpy(pos, res, strlen(res)); + return pos + strlen(res); + } + else if constexpr (std::is_same_v) + { + const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968"; + memcpy(pos, res, strlen(res)); + return pos + strlen(res); + } } if (x < 0) @@ -385,7 +402,7 @@ static inline char * writeSIntText(int128_t x, char * pos) x = -x; pos = writeLeadingMinus(pos); } - return writeUIntText(static_cast(x), pos); + return writeUIntText(UnsignedT(x), pos); } } @@ -403,13 +420,25 @@ inline char * itoa(char8_t i, char * p) } template <> -inline char * itoa(uint128_t i, char * p) +inline char * itoa(UInt128 i, char * p) { return impl::writeUIntText(i, p); } template <> -inline char * itoa(int128_t i, char * p) +inline char * itoa(Int128 i, char * p) +{ + return impl::writeSIntText(i, p); +} + +template <> +inline char * itoa(UInt256 i, char * p) +{ + return impl::writeUIntText(i, p); +} + +template <> +inline char * itoa(Int256 i, char * p) { return impl::writeSIntText(i, p); } diff --git a/base/common/strong_typedef.h b/base/common/strong_typedef.h index 77b83bfa6e5..a1e2b253aa7 100644 --- a/base/common/strong_typedef.h +++ b/base/common/strong_typedef.h @@ -4,7 +4,8 @@ #include #include -template + +template struct StrongTypedef { private: @@ -38,14 +39,16 @@ public: bool operator==(const Self & rhs) const { return t == rhs.t; } bool operator<(const Self & rhs) const { return t < rhs.t; } + bool operator>(const Self & rhs) const { return t > rhs.t; } T & toUnderType() { return t; } const T & toUnderType() const { return t; } }; + namespace std { - template + template struct hash> { size_t operator()(const StrongTypedef & x) const diff --git a/base/common/throwError.h b/base/common/throwError.h index b495a0fbc7a..dd352913e78 100644 --- a/base/common/throwError.h +++ b/base/common/throwError.h @@ -1,13 +1,15 @@ #pragma once + #include + /// Throw DB::Exception-like exception before its definition. /// DB::Exception derived from Poco::Exception derived from std::exception. -/// DB::Exception generally cought as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes. +/// DB::Exception generally caught as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes. /// DB::Exception is not defined yet. It'd better to throw Poco::Exception but we do not want to include any big header here, even . /// So we throw some std::exception instead in the hope its catch block is the same as DB::Exception one. template -inline void throwError(const T & err) +[[noreturn]] inline void throwError(const T & err) { throw std::runtime_error(err); } diff --git a/base/common/wide_integer.h b/base/common/wide_integer.h index c9d1eaa32aa..419b4e4558c 100644 --- a/base/common/wide_integer.h +++ b/base/common/wide_integer.h @@ -58,9 +58,11 @@ public: using signed_base_type = int64_t; // ctors - constexpr integer() noexcept; + constexpr integer() noexcept = default; + template constexpr integer(T rhs) noexcept; + template constexpr integer(std::initializer_list il) noexcept; @@ -108,9 +110,9 @@ public: constexpr explicit operator bool() const noexcept; template - using __integral_not_wide_integer_class = typename std::enable_if::value, T>::type; + using _integral_not_wide_integer_class = typename std::enable_if::value, T>::type; - template > + template > constexpr operator T() const noexcept; constexpr operator long double() const noexcept; @@ -119,25 +121,27 @@ public: struct _impl; + base_type items[_impl::item_count]; + private: template friend class integer; friend class std::numeric_limits>; friend class std::numeric_limits>; - - base_type items[_impl::item_count]; }; template static constexpr bool ArithmeticConcept() noexcept; + template -using __only_arithmetic = typename std::enable_if() && ArithmeticConcept()>::type; +using _only_arithmetic = typename std::enable_if() && ArithmeticConcept()>::type; template static constexpr bool IntegralConcept() noexcept; + template -using __only_integer = typename std::enable_if() && IntegralConcept()>::type; +using _only_integer = typename std::enable_if() && IntegralConcept()>::type; // Unary operators template @@ -153,54 +157,55 @@ constexpr integer operator+(const integer & lhs) noe template std::common_type_t, integer> constexpr operator*(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator*(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator/(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator/(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator+(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator+(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator-(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator-(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator%(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator%(const Integral & rhs, const Integral2 & lhs); template std::common_type_t, integer> constexpr operator&(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator&(const Integral & rhs, const Integral2 & lhs); template std::common_type_t, integer> constexpr operator|(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator|(const Integral & rhs, const Integral2 & lhs); template std::common_type_t, integer> constexpr operator^(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator^(const Integral & rhs, const Integral2 & lhs); // TODO: Integral template constexpr integer operator<<(const integer & lhs, int n) noexcept; + template constexpr integer operator>>(const integer & lhs, int n) noexcept; @@ -217,32 +222,32 @@ constexpr integer operator>>(const integer & lhs, In template constexpr bool operator<(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator<(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator>(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator>(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator<=(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator<=(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator>=(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator>=(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator==(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator==(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator!=(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator!=(const Arithmetic & rhs, const Arithmetic2 & lhs); } diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h index 456c10a22e4..725caec6a3e 100644 --- a/base/common/wide_integer_impl.h +++ b/base/common/wide_integer_impl.h @@ -5,6 +5,7 @@ /// (See at http://www.boost.org/LICENSE_1_0.txt) #include "throwError.h" + #include #include #include @@ -81,7 +82,7 @@ public: res.items[T::_impl::big(0)] = std::numeric_limits::signed_base_type>::min(); return res; } - return 0; + return wide::integer(0); } static constexpr wide::integer max() noexcept @@ -176,7 +177,7 @@ struct integer::_impl constexpr static bool is_negative(const integer & n) noexcept { if constexpr (std::is_same_v) - return static_cast(n.items[big(0)]) < 0; + return static_cast(n.items[integer::_impl::big(0)]) < 0; else return false; } @@ -193,40 +194,36 @@ struct integer::_impl template constexpr static integer make_positive(const integer & n) noexcept { - return is_negative(n) ? operator_unary_minus(n) : n; + return is_negative(n) ? integer(operator_unary_minus(n)) : n; } template __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept { - if constexpr (std::is_same_v) - return f; - else if constexpr (std::is_signed_v) + if constexpr (std::is_signed_v) return static_cast(f); else return static_cast(f); } template - constexpr static void wide_integer_from_bultin(integer & self, Integral rhs) noexcept + constexpr static void wide_integer_from_builtin(integer & self, Integral rhs) noexcept { - self.items[0] = _impl::to_Integral(rhs); - if constexpr (std::is_same_v) - self.items[1] = rhs >> base_bits; + static_assert(sizeof(Integral) <= sizeof(base_type)); - constexpr const unsigned start = (sizeof(Integral) == 16) ? 2 : 1; + self.items[0] = _impl::to_Integral(rhs); if constexpr (std::is_signed_v) { if (rhs < 0) { - for (unsigned i = start; i < item_count; ++i) + for (size_t i = 1; i < item_count; ++i) self.items[i] = -1; return; } } - for (unsigned i = start; i < item_count; ++i) + for (size_t i = 1; i < item_count; ++i) self.items[i] = 0; } @@ -239,7 +236,8 @@ struct integer::_impl * a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case. */ template - constexpr static void set_multiplier(integer & self, T t) noexcept { + constexpr static void set_multiplier(integer & self, T t) noexcept + { constexpr uint64_t max_int = std::numeric_limits::max(); /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast). @@ -260,7 +258,8 @@ struct integer::_impl self += static_cast(t - alpha * static_cast(max_int)); // += b_i } - constexpr static void wide_integer_from_bultin(integer& self, double rhs) noexcept { + constexpr static void wide_integer_from_builtin(integer& self, double rhs) noexcept + { constexpr int64_t max_int = std::numeric_limits::max(); constexpr int64_t min_int = std::numeric_limits::min(); @@ -383,13 +382,13 @@ struct integer::_impl if (bit_shift) lhs.items[big(items_shift)] |= std::numeric_limits::max() << (base_bits - bit_shift); - for (unsigned i = item_count - items_shift; i < items_shift; ++i) - lhs.items[little(i)] = std::numeric_limits::max(); + for (unsigned i = 0; i < items_shift; ++i) + lhs.items[big(i)] = std::numeric_limits::max(); } else { - for (unsigned i = item_count - items_shift; i < items_shift; ++i) - lhs.items[little(i)] = 0; + for (unsigned i = 0; i < items_shift; ++i) + lhs.items[big(i)] = 0; } return lhs; @@ -397,23 +396,23 @@ struct integer::_impl private: template - constexpr static base_type get_item(const T & x, unsigned number) + constexpr static base_type get_item(const T & x, unsigned idx) { if constexpr (IsWideInteger::value) { - if (number < T::_impl::item_count) - return x.items[number]; + if (idx < T::_impl::item_count) + return x.items[idx]; return 0; } else { if constexpr (sizeof(T) <= sizeof(base_type)) { - if (!number) + if (0 == idx) return x; } - else if (number * sizeof(base_type) < sizeof(T)) - return x >> (number * base_bits); // & std::numeric_limits::max() + else if (idx * sizeof(base_type) < sizeof(T)) + return x >> (idx * base_bits); // & std::numeric_limits::max() return 0; } } @@ -439,7 +438,7 @@ private: for (unsigned i = 1; i < item_count; ++i) { - if (underflows[i-1]) + if (underflows[i - 1]) { base_type & res_item = res.items[little(i)]; if (res_item == 0) @@ -472,7 +471,7 @@ private: for (unsigned i = 1; i < item_count; ++i) { - if (overflows[i-1]) + if (overflows[i - 1]) { base_type & res_item = res.items[little(i)]; ++res_item; @@ -532,6 +531,17 @@ private: res.items[little(2)] = r12 >> 64; return res; } + else if constexpr (Bits == 128 && sizeof(base_type) == 8) + { + using CompilerUInt128 = unsigned __int128; + CompilerUInt128 a = (CompilerUInt128(lhs.items[1]) << 64) + lhs.items[0]; + CompilerUInt128 b = (CompilerUInt128(rhs.items[1]) << 64) + rhs.items[0]; + CompilerUInt128 c = a * b; + integer res; + res.items[0] = c; + res.items[1] = c >> 64; + return res; + } else { integer res{}; @@ -657,7 +667,7 @@ public: } template - constexpr static bool operator_more(const integer & lhs, const T & rhs) noexcept + constexpr static bool operator_greater(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { @@ -677,7 +687,7 @@ public: else { static_assert(IsWideInteger::value); - return std::common_type_t, T>::_impl::operator_more(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_greater(T(lhs), rhs); } } @@ -764,7 +774,6 @@ public: } } -private: template constexpr static bool is_zero(const T & x) { @@ -781,46 +790,65 @@ private: } /// returns quotient as result and remainder in numerator. - template - constexpr static T divide(T & numerator, T && denominator) + template + constexpr static integer divide(integer & numerator, integer denominator) { - if (is_zero(denominator)) - throwError("divide by zero"); + static_assert(std::is_unsigned_v); - T & n = numerator; - T & d = denominator; - T x = 1; - T quotient = 0; - - while (!operator_more(d, n) && operator_eq(operator_amp(shift_right(d, base_bits * item_count - 1), 1), 0)) + if constexpr (Bits == 128 && sizeof(base_type) == 8) { - x = shift_left(x, 1); - d = shift_left(d, 1); + using CompilerUInt128 = unsigned __int128; + + CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0]; + CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0]; + CompilerUInt128 c = a / b; + + integer res; + res.items[0] = c; + res.items[1] = c >> 64; + + CompilerUInt128 remainder = a - b * c; + numerator.items[0] = remainder; + numerator.items[1] = remainder >> 64; + + return res; } - while (!operator_eq(x, 0)) + if (is_zero(denominator)) + throwError("Division by zero"); + + integer x = 1; + integer quotient = 0; + + while (!operator_greater(denominator, numerator) && is_zero(operator_amp(shift_right(denominator, Bits2 - 1), 1))) { - if (!operator_more(d, n)) + x = shift_left(x, 1); + denominator = shift_left(denominator, 1); + } + + while (!is_zero(x)) + { + if (!operator_greater(denominator, numerator)) { - n = operator_minus(n, d); + numerator = operator_minus(numerator, denominator); quotient = operator_pipe(quotient, x); } x = shift_right(x, 1); - d = shift_right(d, 1); + denominator = shift_right(denominator, 1); } return quotient; } -public: template constexpr static auto operator_slash(const integer & lhs, const T & rhs) { if constexpr (should_keep_size()) { - integer numerator = make_positive(lhs); - integer quotient = divide(numerator, make_positive(integer(rhs))); + integer numerator = make_positive(lhs); + integer denominator = make_positive(integer(rhs)); + integer quotient = integer::_impl::divide(numerator, std::move(denominator)); if (std::is_same_v && is_negative(rhs) != is_negative(lhs)) quotient = operator_unary_minus(quotient); @@ -838,8 +866,9 @@ public: { if constexpr (should_keep_size()) { - integer remainder = make_positive(lhs); - divide(remainder, make_positive(integer(rhs))); + integer remainder = make_positive(lhs); + integer denominator = make_positive(integer(rhs)); + integer::_impl::divide(remainder, std::move(denominator)); if (std::is_same_v && is_negative(lhs)) remainder = operator_unary_minus(remainder); @@ -905,7 +934,7 @@ public: ++c; } else - throwError("invalid char from"); + throwError("Invalid char from"); } } else @@ -913,7 +942,7 @@ public: while (*c) { if (*c < '0' || *c > '9') - throwError("invalid char from"); + throwError("Invalid char from"); res = multiply(res, 10U); res = plus(res, *c - '0'); @@ -930,11 +959,6 @@ public: // Members -template -constexpr integer::integer() noexcept - : items{} -{} - template template constexpr integer::integer(T rhs) noexcept @@ -943,7 +967,7 @@ constexpr integer::integer(T rhs) noexcept if constexpr (IsWideInteger::value) _impl::wide_integer_from_wide_integer(*this, rhs); else - _impl::wide_integer_from_bultin(*this, rhs); + _impl::wide_integer_from_builtin(*this, rhs); } template @@ -956,10 +980,19 @@ constexpr integer::integer(std::initializer_list il) noexcept if constexpr (IsWideInteger::value) _impl::wide_integer_from_wide_integer(*this, *il.begin()); else - _impl::wide_integer_from_bultin(*this, *il.begin()); + _impl::wide_integer_from_builtin(*this, *il.begin()); + } + else if (il.size() == 0) + { + _impl::wide_integer_from_builtin(*this, 0); } else - _impl::wide_integer_from_bultin(*this, 0); + { + auto it = il.begin(); + for (size_t i = 0; i < _impl::item_count; ++i) + if (it < il.end()) + items[i] = *it; + } } template @@ -974,7 +1007,7 @@ template template constexpr integer & integer::operator=(T rhs) noexcept { - _impl::wide_integer_from_bultin(*this, rhs); + _impl::wide_integer_from_builtin(*this, rhs); return *this; } @@ -1057,7 +1090,7 @@ constexpr integer & integer::operator>>=(int n) noex { if (static_cast(n) >= Bits) { - if (is_negative(*this)) + if (_impl::is_negative(*this)) *this = -1; else *this = 0; @@ -1107,16 +1140,17 @@ template template constexpr integer::operator T() const noexcept { - if constexpr (std::is_same_v) - { - static_assert(Bits >= 128); - return (__int128(items[1]) << 64) | items[0]; - } - else - { - static_assert(std::numeric_limits::is_integer); - return items[0]; - } + static_assert(std::numeric_limits::is_integer); + + /// NOTE: memcpy will suffice, but unfortunately, this function is constexpr. + + using UnsignedT = std::make_unsigned_t; + + UnsignedT res{}; + for (unsigned i = 0; i < _impl::item_count && i < (sizeof(T) + sizeof(base_type) - 1) / sizeof(base_type); ++i) + res += UnsignedT(items[i]) << (sizeof(base_type) * 8 * i); + + return res; } template @@ -1280,7 +1314,7 @@ template constexpr integer operator<<(const integer & lhs, int n) noexcept { if (static_cast(n) >= Bits) - return 0; + return integer(0); if (n <= 0) return lhs; return integer::_impl::shift_left(lhs, n); @@ -1289,7 +1323,7 @@ template constexpr integer operator>>(const integer & lhs, int n) noexcept { if (static_cast(n) >= Bits) - return 0; + return integer(0); if (n <= 0) return lhs; return integer::_impl::shift_right(lhs, n); @@ -1309,7 +1343,7 @@ constexpr bool operator<(const Arithmetic & lhs, const Arithmetic2 & rhs) template constexpr bool operator>(const integer & lhs, const integer & rhs) { - return std::common_type_t, integer>::_impl::operator_more(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_greater(lhs, rhs); } template constexpr bool operator>(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1332,7 +1366,7 @@ constexpr bool operator<=(const Arithmetic & lhs, const Arithmetic2 & rhs) template constexpr bool operator>=(const integer & lhs, const integer & rhs) { - return std::common_type_t, integer>::_impl::operator_more(lhs, rhs) + return std::common_type_t, integer>::_impl::operator_greater(lhs, rhs) || std::common_type_t, integer>::_impl::operator_eq(lhs, rhs); } template diff --git a/base/common/wide_integer_to_string.h b/base/common/wide_integer_to_string.h index 9908ef4be7a..8b794fe9bcb 100644 --- a/base/common/wide_integer_to_string.h +++ b/base/common/wide_integer_to_string.h @@ -1,9 +1,12 @@ #pragma once #include +#include +#include #include "wide_integer.h" + namespace wide { @@ -33,3 +36,34 @@ inline std::string to_string(const integer & n) } } + + +template +std::ostream & operator<<(std::ostream & out, const wide::integer & value) +{ + return out << to_string(value); +} + + +/// See https://fmt.dev/latest/api.html#formatting-user-defined-types +template +struct fmt::formatter> +{ + constexpr auto parse(format_parse_context & ctx) + { + auto it = ctx.begin(); + auto end = ctx.end(); + + /// Only support {}. + if (it != end && *it != '}') + throw format_error("invalid format"); + + return it; + } + + template + auto format(const wide::integer & value, FormatContext & ctx) + { + return format_to(ctx.out(), "{}", to_string(value)); + } +}; diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 1b7d0064b99..1028dc7d2dc 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -102,7 +102,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) auto * logger = &Poco::Logger::get("SentryWriter"); if (config.getBool("send_crash_reports.enabled", false)) { - if (debug || (strlen(VERSION_OFFICIAL) > 0)) + if (debug || (strlen(VERSION_OFFICIAL) > 0)) //-V560 { enabled = true; } diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index e785e2ab2ce..8cba91de33f 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -15,7 +15,7 @@ if (GLIBC_COMPATIBILITY) add_headers_and_sources(glibc_compatibility .) add_headers_and_sources(glibc_compatibility musl) - if (ARCH_ARM) + if (ARCH_AARCH64) list (APPEND glibc_compatibility_sources musl/aarch64/syscall.s musl/aarch64/longjmp.s) set (musl_arch_include_dir musl/aarch64) elseif (ARCH_AMD64) diff --git a/base/glibc-compatibility/musl/lgamma.c b/base/glibc-compatibility/musl/lgamma.c index fb9d105d0fa..5e959504e29 100644 --- a/base/glibc-compatibility/musl/lgamma.c +++ b/base/glibc-compatibility/musl/lgamma.c @@ -78,6 +78,9 @@ * */ +// Disable warnings by PVS-Studio +//-V::GA + static const double pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c index b158748ce1f..775559f13b6 100644 --- a/base/glibc-compatibility/musl/lgammal.c +++ b/base/glibc-compatibility/musl/lgammal.c @@ -85,6 +85,9 @@ * */ +// Disable warnings by PVS-Studio +//-V::GA + #include #include #include "libm.h" diff --git a/base/glibc-compatibility/musl/libm.h b/base/glibc-compatibility/musl/libm.h index 55520c2fb03..e5029318693 100644 --- a/base/glibc-compatibility/musl/libm.h +++ b/base/glibc-compatibility/musl/libm.h @@ -155,7 +155,7 @@ static inline long double fp_barrierl(long double x) static inline void fp_force_evalf(float x) { volatile float y; - y = x; + y = x; //-V1001 } #endif @@ -164,7 +164,7 @@ static inline void fp_force_evalf(float x) static inline void fp_force_eval(double x) { volatile double y; - y = x; + y = x; //-V1001 } #endif @@ -173,7 +173,7 @@ static inline void fp_force_eval(double x) static inline void fp_force_evall(long double x) { volatile long double y; - y = x; + y = x; //-V1001 } #endif diff --git a/base/glibc-compatibility/musl/powf.c b/base/glibc-compatibility/musl/powf.c index de8fab54554..35dc3611b94 100644 --- a/base/glibc-compatibility/musl/powf.c +++ b/base/glibc-compatibility/musl/powf.c @@ -3,6 +3,9 @@ * SPDX-License-Identifier: MIT */ +// Disable warnings by PVS-Studio +//-V::GA + #include #include #include "libm.h" diff --git a/base/loggers/Loggers.cpp b/base/loggers/Loggers.cpp index f66b1eb5b91..4a66d43606b 100644 --- a/base/loggers/Loggers.cpp +++ b/base/loggers/Loggers.cpp @@ -40,7 +40,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log split->addTextLog(log, text_log_max_priority); auto current_logger = config.getString("logger", ""); - if (config_logger == current_logger) + if (config_logger == current_logger) //-V1051 return; config_logger = current_logger; diff --git a/base/pcg-random/pcg_extras.hpp b/base/pcg-random/pcg_extras.hpp index b71e859a25f..39c91c4ecfa 100644 --- a/base/pcg-random/pcg_extras.hpp +++ b/base/pcg-random/pcg_extras.hpp @@ -447,69 +447,6 @@ inline SrcIter uneven_copy(SrcIter src_first, std::integral_constant{}); } -/* generate_to, fill in a fixed-size array of integral type using a SeedSeq - * (actually works for any random-access iterator) - */ - -template -inline void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::true_type) -{ - generator.generate(dest, dest+size); -} - -template -void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::false_type) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto GEN_SIZE = sizeof(uint32_t); - - constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; - constexpr size_t FROM_ELEMS = - GEN_IS_SMALLER - ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) - : (size + (GEN_SIZE / DEST_SIZE) - 1) - / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); - // this odd code ^^^^^^^^^^^^^^^^^ is work-around for - // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 - - if (FROM_ELEMS <= 1024) { - uint32_t buffer[FROM_ELEMS]; - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - } else { - uint32_t* buffer = static_cast(malloc(GEN_SIZE * FROM_ELEMS)); - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - free(static_cast(buffer)); - } -} - -template -inline void generate_to(SeedSeq&& generator, DestIter dest) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); - - generate_to_impl(std::forward(generator), dest, - std::integral_constant{}); -} - -/* generate_one, produce a value of integral type using a SeedSeq - * (optionally, we can have it produce more than one and pick which one - * we want) - */ - -template -inline UInt generate_one(SeedSeq&& generator) -{ - UInt result[N]; - generate_to(std::forward(generator), result); - return result[i]; -} - template auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) -> typename RngType::result_type @@ -517,7 +454,7 @@ auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) typedef typename RngType::result_type rtype; rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) % upper_bound; - for (;;) { + for (;;) { //-V1044 rtype r = rng() - RngType::min(); if (r >= threshold) return r % upper_bound; diff --git a/base/pcg-random/pcg_random.hpp b/base/pcg-random/pcg_random.hpp index b7145e2309c..d9d3519a4cf 100644 --- a/base/pcg-random/pcg_random.hpp +++ b/base/pcg-random/pcg_random.hpp @@ -928,7 +928,7 @@ struct rxs_m_xs_mixin { constexpr bitcount_t shift = bits - xtypebits; constexpr bitcount_t mask = (1 << opbits) - 1; bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; //-V547 internal ^= internal >> (opbits + rshift); internal *= mcg_multiplier::multiplier(); xtype result = internal >> shift; @@ -950,7 +950,7 @@ struct rxs_m_xs_mixin { internal *= mcg_unmultiplier::unmultiplier(); - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; //-V547 internal = unxorshift(internal, bits, opbits + rshift); return internal; @@ -975,7 +975,7 @@ struct rxs_m_mixin { : 2; constexpr bitcount_t shift = bits - xtypebits; constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; //-V547 internal ^= internal >> (opbits + rshift); internal *= mcg_multiplier::multiplier(); xtype result = internal >> shift; @@ -1366,7 +1366,7 @@ void extended::selfinit() // - any strange correlations would only be apparent if we // were to backstep the generator so that the base generator // was generating the same values again - result_type xdiff = baseclass::operator()() - baseclass::operator()(); + result_type xdiff = baseclass::operator()() - baseclass::operator()(); //-V501 for (size_t i = 0; i < table_size; ++i) { data_[i] = baseclass::operator()() ^ xdiff; } diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake index 6ea7a5fd683..e08f45b9932 100644 --- a/cmake/find/llvm.cmake +++ b/cmake/find/llvm.cmake @@ -2,7 +2,7 @@ if (APPLE OR SPLIT_SHARED_LIBRARIES OR NOT ARCH_AMD64) set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") endif() -option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile_expressions' option for query execution" ${ENABLE_LIBRARIES}) +option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ON) # Broken in macos. TODO: update clang, re-test, enable on Apple if (ENABLE_EMBEDDED_COMPILER AND NOT SPLIT_SHARED_LIBRARIES AND ARCH_AMD64 AND NOT (SANITIZE STREQUAL "undefined")) option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library." ${NOT_UNBUNDLED}) diff --git a/contrib/datasketches-cpp b/contrib/datasketches-cpp index f915d35b2de..7d73d7610db 160000 --- a/contrib/datasketches-cpp +++ b/contrib/datasketches-cpp @@ -1 +1 @@ -Subproject commit f915d35b2de676683493c86c585141a1e1c83334 +Subproject commit 7d73d7610db31d4e1ecde0fb3a7ee90ef371207f diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index a7cc398e5c9..42c720a7e63 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -308,6 +308,8 @@ function run_tests 01354_order_by_tuple_collate_const 01355_ilike 01411_bayesian_ab_testing + 01798_uniq_theta_sketch + 01799_long_uniq_theta_sketch collate collation _orc_ @@ -370,6 +372,10 @@ function run_tests # Depends on AWS 01801_s3_cluster + + # Depends on LLVM JIT + 01852_jit_if + 01865_jit_comparison_constant_result ) (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 1e82f137961..dd6b7467afc 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -14,11 +14,6 @@ 10G - - - - - diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index bd7eee4c166..c5e24b5e37f 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.6.74 docker-compose docker dicttoxml kazoo tzlocal +RUN pip3 install urllib3 testflows==1.6.74 docker-compose docker dicttoxml kazoo tzlocal python-dateutil numpy ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce @@ -74,4 +74,3 @@ VOLUME /var/lib/docker EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] CMD ["sh", "-c", "python3 regression.py --no-color -o classic --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json; /usr/local/bin/process_testflows_result.py || echo -e 'failure\tCannot parse results' > check_status.tsv"] - diff --git a/docs/en/commercial/cloud.md b/docs/en/commercial/cloud.md index 953a0ab5748..5a897a77db2 100644 --- a/docs/en/commercial/cloud.md +++ b/docs/en/commercial/cloud.md @@ -31,10 +31,10 @@ toc_title: Cloud ## Alibaba Cloud {#alibaba-cloud} -Alibaba Cloud Managed Service for ClickHouse. [China Site](https://www.aliyun.com/product/clickhouse) (will be available at the international site in May 2021). Provides the following key features: +[Alibaba Cloud Managed Service for ClickHouse](https://www.alibabacloud.com/product/clickhouse) provides the following key features: - Highly reliable cloud disk storage engine based on [Alibaba Cloud Apsara](https://www.alibabacloud.com/product/apsara-stack) distributed system -- Expand capacity on-demand without manual data migration +- Expand capacity on demand without manual data migration - Support single-node, single-replica, multi-node, and multi-replica architectures, and support hot and cold data tiering - Support access allow-list, one-key recovery, multi-layer network security protection, cloud disk encryption - Seamless integration with cloud log systems, databases, and data application tools diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index eb0d92b7738..c07ab337cb0 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -139,6 +139,7 @@ The following settings can be specified in configuration file for given endpoint - `endpoint` — Specifies prefix of an endpoint. Mandatory. - `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. +- `region` — Specifies S3 region name. Optional. - `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`. - `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. - `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times. @@ -152,6 +153,7 @@ The following settings can be specified in configuration file for given endpoint https://storage.yandexcloud.net/my-test-bucket-768/ + diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index dfe0ca84723..8743090df41 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -739,6 +739,7 @@ Configuration markup: https://storage.yandexcloud.net/my-bucket/root-path/ your_access_key_id your_secret_access_key + your_base64_encoded_customer_key http://proxy1 @@ -764,6 +765,7 @@ Required parameters: - `secret_access_key` — S3 secret access key. Optional parameters: +- `region` — S3 region name. - `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. - `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. - `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 3d3bbe2af4e..2bc1be51f19 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -21,6 +21,7 @@ Columns: - `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. - `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. - `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found. - `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. - `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). - `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary. @@ -60,4 +61,4 @@ SELECT * FROM system.dictionaries └──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 56ef598f7e7..32d174136e0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -12,6 +12,9 @@ The result depends on the order of running the query, and is nondeterministic. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function. +!!! note "Note" + Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead. + **Syntax** ``` sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index 7ba2cdc6cb8..f060b85c976 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -38,3 +38,4 @@ We recommend using this function in almost all scenarios. - [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) - [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) - [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 4434686ae61..7bd392ef870 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -49,3 +49,4 @@ Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq - [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) - [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) - [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index eee675016ee..d758c179d7a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -23,3 +23,4 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` - [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) - [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined) - [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12) +- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index 4983220ed7f..b65a0151e18 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -37,3 +37,4 @@ We don’t recommend using this function. In most cases, use the [uniq](../../.. - [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) - [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) - [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md new file mode 100644 index 00000000000..dd744a34190 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md @@ -0,0 +1,39 @@ +--- +toc_priority: 195 +--- + +# uniqThetaSketch {#agg_function-uniqthetasketch} + +Calculates the approximate number of different argument values, using the [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html). + +``` sql +uniqThetaSketch(x[, ...]) +``` + +**Arguments** + +The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. + +**Returned value** + +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. + +**Implementation details** + +Function: + +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. + +- Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values. + + 4096(2^12) 64-bit sketch are used. The size of the state is about 41 KB. + +- The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail. + +**See Also** + +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 8083fa160fa..1c19adcdcd8 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -47,9 +47,9 @@ Union ### EXPLAIN AST {#explain-ast} -Dump query AST. +Dump query AST. Supports all types of queries, not only `SELECT`. -Example: +Examples: ```sql EXPLAIN AST SELECT 1; @@ -63,6 +63,22 @@ SelectWithUnionQuery (children 1) Literal UInt64_1 ``` +```sql +EXPLAIN AST ALTER TABLE t1 DELETE WHERE date = today(); +``` + +```sql + explain + AlterQuery t1 (children 1) + ExpressionList (children 1) + AlterCommand 27 (children 1) + Function equals (children 1) + ExpressionList (children 2) + Identifier date + Function today (children 1) + ExpressionList +``` + ### EXPLAIN SYNTAX {#explain-syntax} Returns query after syntax optimizations. diff --git a/docs/ja/commercial/cloud.md b/docs/ja/commercial/cloud.md index 84f58e46cdb..62fc75ecbda 100644 --- a/docs/ja/commercial/cloud.md +++ b/docs/ja/commercial/cloud.md @@ -22,7 +22,7 @@ toc_title: "\u30AF\u30E9\u30A6\u30C9" ## Alibaba Cloud {#alibaba-cloud} -ClickHouseのためのAlibaba Cloudの管理サービス [中国サイト](https://www.aliyun.com/product/clickhouse) (2021年5月に国際サイトで利用可能になります) 次の主な機能を提供します: +[ClickHouseのためのAlibaba Cloudの管理サービス](https://www.alibabacloud.com/product/clickhouse) 次の主な機能を提供します: - Alibaba Cloud Apsara分散システムをベースにした信頼性の高いクラウドディスクストレージエンジン - 手動でのデータ移行を必要とせずに、オン・デマンドで容量を拡張 diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index bee1fc1318a..9a5a985c76b 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -82,6 +82,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Необязательные настройки: - `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной точкой приема запроса. +- `region` — название региона S3. - `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной точки приема запроса. Значение по умолчанию - `false`. - `header` — добавляет указанный HTTP-заголовок к запросу на заданную точку приема запроса. Может быть определен несколько раз. - `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C. @@ -94,6 +95,7 @@ SELECT * FROM s3_engine_table LIMIT 2; https://storage.yandexcloud.net/my-test-bucket-768/ + diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index b8bd259167a..4cff6fcfb80 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -727,6 +727,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' https://storage.yandexcloud.net/my-bucket/root-path/ your_access_key_id your_secret_access_key + http://proxy1 http://proxy2 @@ -753,6 +754,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' Необязательные параметры: +- `region` — название региона S3. - `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из сетевого окружения, а также из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`. - `use_insecure_imds_request` — признак, нужно ли использовать менее безопасное соединение при выполнении запроса к IMDS при получении учётных данных из метаданных Amazon EC2. Значение по умолчанию: `false`. - `proxy` — конфигурация прокси-сервера для конечной точки S3. Каждый элемент `uri` внутри блока `proxy` должен содержать URL прокси-сервера. diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md deleted file mode 100644 index 4535767e8e0..00000000000 --- a/docs/ru/operations/tips.md +++ /dev/null @@ -1,248 +0,0 @@ ---- -toc_priority: 58 -toc_title: "Советы по эксплуатации" ---- - -# Советы по эксплуатации {#sovety-po-ekspluatatsii} - -## CPU Scaling Governor {#cpu-scaling-governor} - -Всегда используйте `performance` scaling governor. `ondemand` scaling governor работает намного хуже при постоянно высоком спросе. - -``` bash -$ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor -``` - -## Ограничение CPU {#ogranichenie-cpu} - -Процессоры могут перегреваться. С помощью `dmesg` можно увидеть, если тактовая частота процессора была ограничена из-за перегрева. -Также ограничение может устанавливаться снаружи на уровне дата-центра. С помощью `turbostat` можно за этим наблюдать под нагрузкой. - -## Оперативная память {#operativnaia-pamiat} - -Для небольших объёмов данных (до ~200 Гб в сжатом виде) лучше всего использовать столько памяти не меньше, чем объём данных. -Для больших объёмов данных, при выполнении интерактивных (онлайн) запросов, стоит использовать разумный объём оперативной памяти (128 Гб или более) для того, чтобы горячее подмножество данных поместилось в кеше страниц. -Даже для объёмов данных в ~50 Тб на сервер, использование 128 Гб оперативной памяти намного лучше для производительности выполнения запросов, чем 64 Гб. - -Не выключайте overcommit. Значение `cat /proc/sys/vm/overcommit_memory` должно быть 0 or 1. Выполните: - -``` bash -$ echo 0 | sudo tee /proc/sys/vm/overcommit_memory -``` - -## Huge Pages {#huge-pages} - -Механизм прозрачных huge pages нужно отключить. Он мешает работе аллокаторов памяти, что приводит к значительной деградации производительности. - -``` bash -$ echo 'madvise' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled -``` - -С помощью `perf top` можно наблюдать за временем, проведенном в ядре операционной системы для управления памятью. -Постоянные huge pages так же не нужно аллоцировать. - -## Подсистема хранения {#podsistema-khraneniia} - -Если ваш бюджет позволяет использовать SSD, используйте SSD. -В противном случае используйте HDD. SATA HDDs 7200 RPM подойдут. - -Предпочитайте много серверов с локальными жесткими дисками вместо меньшего числа серверов с подключенными дисковыми полками. -Но для хранения архивов с редкими запросами полки всё же подходят. - -## RAID {#raid} - -При использовании HDD можно объединить их RAID-10, RAID-5, RAID-6 или RAID-50. -Лучше использовать программный RAID в Linux (`mdadm`). Лучше не использовать LVM. -При создании RAID-10, нужно выбрать `far` расположение. -Если бюджет позволяет, лучше выбрать RAID-10. - -На более чем 4 дисках вместо RAID-5 нужно использовать RAID-6 (предпочтительнее) или RAID-50. -При использовании RAID-5, RAID-6 или RAID-50, нужно всегда увеличивать stripe_cache_size, так как значение по умолчанию выбрано не самым удачным образом. - -``` bash -$ echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size -``` - -Точное число стоит вычислять из числа устройств и размер блока по формуле: `2 * num_devices * chunk_size_in_bytes / 4096`. - -Размер блока в 1024 Кб подходит для всех конфигураций RAID. -Никогда не указывайте слишком маленький или слишком большой размер блока. - -На SSD можно использовать RAID-0. -Вне зависимости от использования RAID, всегда используйте репликацию для безопасности данных. - -Включите NCQ с длинной очередью. Для HDD стоит выбрать планировщик CFQ, а для SSD — noop. Не стоит уменьшать настройку readahead. -На HDD стоит включать кеш записи. - -## Файловая система {#failovaia-sistema} - -Ext4 самый проверенный вариант. Укажите опции монтирования `noatime,nobarrier`. -XFS также подходит, но не так тщательно протестирована в сочетании с ClickHouse. -Большинство других файловых систем также должны нормально работать. Файловые системы с отложенной аллокацией работают лучше. - -## Ядро Linux {#iadro-linux} - -Не используйте слишком старое ядро Linux. - -## Сеть {#set} - -При использовании IPv6, стоит увеличить размер кеша маршрутов. -Ядра Linux до 3.2 имели массу проблем в реализации IPv6. - -Предпочитайте как минимум 10 Гбит сеть. 1 Гбит также будет работать, но намного хуже для починки реплик с десятками терабайт данных или для обработки распределенных запросов с большим объёмом промежуточных данных. - -## ZooKeeper {#zookeeper} - -Вероятно вы уже используете ZooKeeper для других целей. Можно использовать ту же инсталляцию ZooKeeper, если она не сильно перегружена. - -Лучше использовать свежую версию ZooKeeper, как минимум 3.4.9. Версия в стабильных дистрибутивах Linux может быть устаревшей. - -Никогда не используете написанные вручную скрипты для переноса данных между разными ZooKeeper кластерами, потому что результат будет некорректный для sequential нод. Никогда не используйте утилиту «zkcopy», по той же причине: https://github.com/ksprojects/zkcopy/issues/15 - -Если вы хотите разделить существующий ZooKeeper кластер на два, правильный способ - увеличить количество его реплик, а затем переконфигурировать его как два независимых кластера. - -Не запускайте ZooKeeper на тех же серверах, что и ClickHouse. Потому что ZooKeeper очень чувствителен к задержкам, а ClickHouse может использовать все доступные системные ресурсы. - -С настройками по умолчанию, ZooKeeper является бомбой замедленного действия: - -> Сервер ZooKeeper не будет удалять файлы со старыми снепшоты и логами при использовании конфигурации по умолчанию (см. autopurge), это является ответственностью оператора. - -Эту бомбу нужно обезвредить. - -Далее описана конфигурация ZooKeeper (3.5.1), используемая в боевом окружении Яндекс.Метрики на момент 20 мая 2017 года: - -zoo.cfg: - -``` bash -# http://hadoop.apache.org/zookeeper/docs/current/zookeeperAdmin.html - -# The number of milliseconds of each tick -tickTime=2000 -# The number of ticks that the initial -# synchronization phase can take -initLimit=30000 -# The number of ticks that can pass between -# sending a request and getting an acknowledgement -syncLimit=10 - -maxClientCnxns=2000 - -maxSessionTimeout=60000000 -# the directory where the snapshot is stored. -dataDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/data -# Place the dataLogDir to a separate physical disc for better performance -dataLogDir=/opt/zookeeper/{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/logs - -autopurge.snapRetainCount=10 -autopurge.purgeInterval=1 - - -# To avoid seeks ZooKeeper allocates space in the transaction log file in -# blocks of preAllocSize kilobytes. The default block size is 64M. One reason -# for changing the size of the blocks is to reduce the block size if snapshots -# are taken more often. (Also, see snapCount). -preAllocSize=131072 - -# Clients can submit requests faster than ZooKeeper can process them, -# especially if there are a lot of clients. To prevent ZooKeeper from running -# out of memory due to queued requests, ZooKeeper will throttle clients so that -# there is no more than globalOutstandingLimit outstanding requests in the -# system. The default limit is 1,000.ZooKeeper logs transactions to a -# transaction log. After snapCount transactions are written to a log file a -# snapshot is started and a new transaction log file is started. The default -# snapCount is 10,000. -snapCount=3000000 - -# If this option is defined, requests will be will logged to a trace file named -# traceFile.year.month.day. -#traceFile= - -# Leader accepts client connections. Default value is "yes". The leader machine -# coordinates updates. For higher update throughput at thes slight expense of -# read throughput the leader can be configured to not accept clients and focus -# on coordination. -leaderServes=yes - -standaloneEnabled=false -dynamicConfigFile=/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/zoo.cfg.dynamic -``` - -Версия Java: - -``` text -Java(TM) SE Runtime Environment (build 1.8.0_25-b17) -Java HotSpot(TM) 64-Bit Server VM (build 25.25-b02, mixed mode) -``` - -Параметры JVM: - -``` bash -NAME=zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }} -ZOOCFGDIR=/etc/$NAME/conf - -# TODO this is really ugly -# How to find out, which jars are needed? -# seems, that log4j requires the log4j.properties file to be in the classpath -CLASSPATH="$ZOOCFGDIR:/usr/build/classes:/usr/build/lib/*.jar:/usr/share/zookeeper/zookeeper-3.5.1-metrika.jar:/usr/share/zookeeper/slf4j-log4j12-1.7.5.jar:/usr/share/zookeeper/slf4j-api-1.7.5.jar:/usr/share/zookeeper/servlet-api-2.5-20081211.jar:/usr/share/zookeeper/netty-3.7.0.Final.jar:/usr/share/zookeeper/log4j-1.2.16.jar:/usr/share/zookeeper/jline-2.11.jar:/usr/share/zookeeper/jetty-util-6.1.26.jar:/usr/share/zookeeper/jetty-6.1.26.jar:/usr/share/zookeeper/javacc.jar:/usr/share/zookeeper/jackson-mapper-asl-1.9.11.jar:/usr/share/zookeeper/jackson-core-asl-1.9.11.jar:/usr/share/zookeeper/commons-cli-1.2.jar:/usr/src/java/lib/*.jar:/usr/etc/zookeeper" - -ZOOCFG="$ZOOCFGDIR/zoo.cfg" -ZOO_LOG_DIR=/var/log/$NAME -USER=zookeeper -GROUP=zookeeper -PIDDIR=/var/run/$NAME -PIDFILE=$PIDDIR/$NAME.pid -SCRIPTNAME=/etc/init.d/$NAME -JAVA=/usr/bin/java -ZOOMAIN="org.apache.zookeeper.server.quorum.QuorumPeerMain" -ZOO_LOG4J_PROP="INFO,ROLLINGFILE" -JMXLOCALONLY=false -JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '{{' }} '}}' }} \ - -Xmx{{ '{{' }} cluster.get('xmx','1G') {{ '{{' }} '}}' }} \ - -Xloggc:/var/log/$NAME/zookeeper-gc.log \ - -XX:+UseGCLogFileRotation \ - -XX:NumberOfGCLogFiles=16 \ - -XX:GCLogFileSize=16M \ - -verbose:gc \ - -XX:+PrintGCTimeStamps \ - -XX:+PrintGCDateStamps \ - -XX:+PrintGCDetails - -XX:+PrintTenuringDistribution \ - -XX:+PrintGCApplicationStoppedTime \ - -XX:+PrintGCApplicationConcurrentTime \ - -XX:+PrintSafepointStatistics \ - -XX:+UseParNewGC \ - -XX:+UseConcMarkSweepGC \ --XX:+CMSParallelRemarkEnabled" -``` - -Salt init: - -``` text -description "zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }} centralized coordination service" - -start on runlevel [2345] -stop on runlevel [!2345] - -respawn - -limit nofile 8192 8192 - -pre-start script - [ -r "/etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/environment" ] || exit 0 - . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/environment - [ -d $ZOO_LOG_DIR ] || mkdir -p $ZOO_LOG_DIR - chown $USER:$GROUP $ZOO_LOG_DIR -end script - -script - . /etc/zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }}/conf/environment - [ -r /etc/default/zookeeper ] && . /etc/default/zookeeper - if [ -z "$JMXDISABLE" ]; then - JAVA_OPTS="$JAVA_OPTS -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.local.only=$JMXLOCALONLY" - fi - exec start-stop-daemon --start -c $USER --exec $JAVA --name zookeeper-{{ '{{' }} cluster['name'] {{ '{{' }} '}}' }} \ - -- -cp $CLASSPATH $JAVA_OPTS -Dzookeeper.log.dir=${ZOO_LOG_DIR} \ - -Dzookeeper.root.logger=${ZOO_LOG4J_PROP} $ZOOMAIN $ZOOCFG -end script -``` - diff --git a/docs/ru/operations/tips.md b/docs/ru/operations/tips.md new file mode 120000 index 00000000000..9b3413bdbc3 --- /dev/null +++ b/docs/ru/operations/tips.md @@ -0,0 +1 @@ +../../en/operations/tips.md \ No newline at end of file diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index f7239be0ba5..6dce79d8a89 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -12,6 +12,9 @@ toc_priority: 208 Внутренние состояния функций `quantile*` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantiles](#quantiles), это повысит эффективность запроса. +!!! note "Примечание" + Использование `quantileTDigestWeighted` [не рекомендуется для небольших наборов данных](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) и может привести к значительной ошибке. Рассмотрите возможность использования [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) в таких случаях. + **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/statements/explain.md b/docs/ru/sql-reference/statements/explain.md index ef2a670a87e..c2a35f1b925 100644 --- a/docs/ru/sql-reference/statements/explain.md +++ b/docs/ru/sql-reference/statements/explain.md @@ -47,9 +47,9 @@ Union ### EXPLAIN AST {#explain-ast} -Дамп AST запроса. +Дамп AST запроса. Поддерживает все типы запросов, не только `SELECT`. -Пример: +Примеры: ```sql EXPLAIN AST SELECT 1; @@ -63,6 +63,22 @@ SelectWithUnionQuery (children 1) Literal UInt64_1 ``` +```sql +EXPLAIN AST ALTER TABLE t1 DELETE WHERE date = today(); +``` + +```sql + explain + AlterQuery t1 (children 1) + ExpressionList (children 1) + AlterCommand 27 (children 1) + Function equals (children 1) + ExpressionList (children 2) + Identifier date + Function today (children 1) + ExpressionList +``` + ### EXPLAIN SYNTAX {#explain-syntax} Возвращает текст запроса после применения синтаксических оптимизаций. diff --git a/docs/zh/commercial/cloud.md b/docs/zh/commercial/cloud.md index c74ffa93e9a..e0a297f51c8 100644 --- a/docs/zh/commercial/cloud.md +++ b/docs/zh/commercial/cloud.md @@ -31,7 +31,7 @@ toc_title: 云 ## 阿里云 {#alibaba-cloud} -阿里云的 ClickHouse 托管服务 [中国站](https://www.aliyun.com/product/clickhouse) (国际站于2021年5月初开放) 提供以下主要功能: +[阿里云的 ClickHouse 托管服务](https://www.alibabacloud.com/zh/product/clickhouse) 提供以下主要功能: - 基于阿里飞天分布式系统的高可靠云盘存储引擎 - 按需扩容,无需手动进行数据搬迁 diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index dd32e959dae..aae2e0f1c59 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1399,17 +1399,27 @@ private: // when `lambda()` function gets substituted into a wrong place. // To avoid dealing with these cases, run the check only for the // queries we were able to successfully execute. - // The final caveat is that sometimes WITH queries are not executed, + // Another caveat is that sometimes WITH queries are not executed, // if they are not referenced by the main SELECT, so they can still // have the aforementioned problems. Disable this check for such // queries, for lack of a better solution. - if (!have_error && queryHasWithClause(parsed_query.get())) + // There is also a problem that fuzzer substitutes positive Int64 + // literals or Decimal literals, which are then parsed back as + // UInt64, and suddenly duplicate alias substitition starts or stops + // working (ASTWithAlias::formatImpl) or something like that. + // So we compare not even the first and second formatting of the + // query, but second and third. + // If you have to add any more workarounds to this check, just remove + // it altogether, it's not so useful. + if (!have_error && !queryHasWithClause(parsed_query.get())) { - ASTPtr parsed_formatted_query; + ASTPtr ast_2; try { const auto * tmp_pos = query_to_send.c_str(); - parsed_formatted_query = parseQuery(tmp_pos, tmp_pos + query_to_send.size(), false /* allow_multi_statements */); + + ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_send.size(), + false /* allow_multi_statements */); } catch (Exception & e) { @@ -1419,27 +1429,30 @@ private: } } - if (parsed_formatted_query) + if (ast_2) { - const auto formatted_twice = parsed_formatted_query->formatForErrorMessage(); - - if (formatted_twice != query_to_send) + const auto text_2 = ast_2->formatForErrorMessage(); + const auto * tmp_pos = text_2.c_str(); + const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), + false /* allow_multi_statements */); + const auto text_3 = ast_3->formatForErrorMessage(); + if (text_3 != text_2) { fmt::print(stderr, "The query formatting is broken.\n"); printChangedSettings(); - fmt::print( - stderr, + fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", - formatted_twice, - query_to_send); + text_3, text_2); fmt::print(stderr, "In more detail:\n"); - fmt::print(stderr, "AST-1:\n'{}'\n", parsed_query->dumpTree()); + fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_send); - fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree()); - fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice); - fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n"); + fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree()); + fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2); + fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3->dumpTree()); + fmt::print(stderr, "Text-3 (AST-3 formatted):\n'{}'\n", text_3); + fmt::print(stderr, "Text-3 must be equal to Text-2, but it is not.\n"); exit(1); } @@ -1456,6 +1469,11 @@ private: server_exception.reset(); client_exception.reset(); have_error = false; + + // We have to reinitialize connection after errors, because it + // might have gotten into a wrong state and we'll get false + // positives about "Unknown packet from server". + connection->forceConnected(connection_parameters.timeouts); } else if (ast_to_process->formatForErrorMessage().size() > 500) { diff --git a/programs/client/QueryFuzzer.h b/programs/client/QueryFuzzer.h index 9ef66db1873..7c79e683eb4 100644 --- a/programs/client/QueryFuzzer.h +++ b/programs/client/QueryFuzzer.h @@ -4,11 +4,14 @@ #include #include +#include + #include #include #include #include + namespace DB { diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index b07435dcf78..7977cfba79d 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -774,7 +774,7 @@ UInt128 diffHash(const CommitDiff & file_changes) } UInt128 hash_of_diff; - hasher.get128(hash_of_diff.low, hash_of_diff.high); + hasher.get128(hash_of_diff.items[0], hash_of_diff.items[1]); return hash_of_diff; } diff --git a/programs/library-bridge/SharedLibraryHandler.h b/programs/library-bridge/SharedLibraryHandler.h index 5c0334ac89f..fa476995e32 100644 --- a/programs/library-bridge/SharedLibraryHandler.h +++ b/programs/library-bridge/SharedLibraryHandler.h @@ -23,6 +23,8 @@ public: SharedLibraryHandler(const SharedLibraryHandler & other); + SharedLibraryHandler & operator=(const SharedLibraryHandler & other) = delete; + ~SharedLibraryHandler(); BlockInputStreamPtr loadAll(); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index c92eb5c6647..fb6817fbf80 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -365,16 +365,20 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI } } -static void transformUUID(const UInt128 & src, UInt128 & dst, UInt64 seed) +static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed) { + const UInt128 & src = src_uuid.toUnderType(); + UInt128 & dst = dst_uuid.toUnderType(); + SipHash hash; hash.update(seed); - hash.update(reinterpret_cast(&src), sizeof(UInt128)); + hash.update(reinterpret_cast(&src), sizeof(UUID)); /// Saving version and variant from an old UUID hash.get128(reinterpret_cast(&dst)); - dst.high = (dst.high & 0x1fffffffffffffffull) | (src.high & 0xe000000000000000ull); - dst.low = (dst.low & 0xffffffffffff0fffull) | (src.low & 0x000000000000f000ull); + + dst.items[1] = (dst.items[1] & 0x1fffffffffffffffull) | (src.items[1] & 0xe000000000000000ull); + dst.items[0] = (dst.items[0] & 0xffffffffffff0fffull) | (src.items[0] & 0x000000000000f000ull); } class FixedStringModel : public IModel @@ -426,10 +430,10 @@ public: ColumnPtr generate(const IColumn & column) override { - const ColumnUInt128 & src_column = assert_cast(column); + const ColumnUUID & src_column = assert_cast(column); const auto & src_data = src_column.getData(); - auto res_column = ColumnUInt128::create(); + auto res_column = ColumnUUID::create(); auto & res_data = res_column->getData(); res_data.resize(src_data.size()); diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index e33858583c2..f4f575bb33d 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -109,7 +109,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); - nanodbc::catalog catalog(*connection); + nanodbc::catalog catalog(connection->get()); std::string catalog_name; /// In XDBC tables it is allowed to pass either database_name or schema_name in table definion, but not both of them. diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index a5a97cb8086..124a5c420f8 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -46,7 +46,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); - auto identifier = getIdentifierQuote(*connection); + auto identifier = getIdentifierQuote(connection->get()); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index e24b51f6037..ffa636e8b49 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -18,13 +18,10 @@ #include #include #include -#include "ODBCConnectionFactory.h" #include #include -#include - namespace DB { @@ -133,12 +130,12 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse auto quoting_style = IdentifierQuotingStyle::None; #if USE_ODBC - quoting_style = getQuotingStyle(*connection); + quoting_style = getQuotingStyle(connection->get()); #endif auto & read_buf = request.getStream(); auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, getContext(), max_block_size); auto input_stream = std::make_shared(input_format); - ODBCBlockOutputStream output_stream(*connection, db_name, table_name, *sample_block, getContext(), quoting_style); + ODBCBlockOutputStream output_stream(std::move(connection), db_name, table_name, *sample_block, getContext(), quoting_style); copyData(*input_stream, output_stream); writeStringBinary("Ok.", out); } @@ -148,7 +145,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse LOG_TRACE(log, "Query: {}", query); BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext()); - ODBCBlockInputStream inp(*connection, query, *sample_block, max_block_size); + ODBCBlockInputStream inp(std::move(connection), query, *sample_block, max_block_size); copyData(inp, *writer); } } diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 3a73cb9f601..c695c8db9cf 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -21,14 +21,13 @@ namespace ErrorCodes ODBCBlockInputStream::ODBCBlockInputStream( - nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) + nanodbc::ConnectionHolderPtr connection, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) : log(&Poco::Logger::get("ODBCBlockInputStream")) , max_block_size{max_block_size_} - , connection(connection_) , query(query_str) { description.init(sample_block); - result = execute(connection, NANODBC_TEXT(query)); + result = execute(connection->get(), NANODBC_TEXT(query)); } diff --git a/programs/odbc-bridge/ODBCBlockInputStream.h b/programs/odbc-bridge/ODBCBlockInputStream.h index bbd90ce4d6c..26aa766dbcc 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.h +++ b/programs/odbc-bridge/ODBCBlockInputStream.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include "ODBCConnectionFactory.h" namespace DB @@ -13,7 +13,7 @@ namespace DB class ODBCBlockInputStream final : public IBlockInputStream { public: - ODBCBlockInputStream(nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_); + ODBCBlockInputStream(nanodbc::ConnectionHolderPtr connection, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_); String getName() const override { return "ODBC"; } @@ -36,7 +36,6 @@ private: const UInt64 max_block_size; ExternalResultDescription description; - nanodbc::connection & connection; nanodbc::result result; String query; bool finished = false; diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index e4614204178..dc965b3b2a7 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -1,5 +1,6 @@ #include "ODBCBlockOutputStream.h" +#include #include #include #include @@ -37,17 +38,16 @@ namespace query.IAST::format(settings); return buf.str(); } - } -ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::connection & connection_, +ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::ConnectionHolderPtr connection_, const std::string & remote_database_name_, const std::string & remote_table_name_, const Block & sample_block_, ContextPtr local_context_, IdentifierQuotingStyle quoting_) : log(&Poco::Logger::get("ODBCBlockOutputStream")) - , connection(connection_) + , connection(std::move(connection_)) , db_name(remote_database_name_) , table_name(remote_table_name_) , sample_block(sample_block_) @@ -69,7 +69,7 @@ void ODBCBlockOutputStream::write(const Block & block) writer->write(block); std::string query = getInsertQuery(db_name, table_name, block.getColumnsWithTypeAndName(), quoting) + values_buf.str(); - execute(connection, query); + execute(connection->get(), query); } } diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.h b/programs/odbc-bridge/ODBCBlockOutputStream.h index 0b13f7039b5..c370a0a9c7b 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.h +++ b/programs/odbc-bridge/ODBCBlockOutputStream.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include "ODBCConnectionFactory.h" namespace DB @@ -16,7 +16,7 @@ class ODBCBlockOutputStream : public IBlockOutputStream public: ODBCBlockOutputStream( - nanodbc::connection & connection_, + nanodbc::ConnectionHolderPtr connection_, const std::string & remote_database_name_, const std::string & remote_table_name_, const Block & sample_block_, @@ -29,7 +29,7 @@ public: private: Poco::Logger * log; - nanodbc::connection & connection; + nanodbc::ConnectionHolderPtr connection; std::string db_name; std::string table_name; Block sample_block; diff --git a/programs/odbc-bridge/ODBCConnectionFactory.h b/programs/odbc-bridge/ODBCConnectionFactory.h index 56961ddb2fb..41ed5f1b31f 100644 --- a/programs/odbc-bridge/ODBCConnectionFactory.h +++ b/programs/odbc-bridge/ODBCConnectionFactory.h @@ -6,53 +6,51 @@ #include #include +namespace DB +{ +namespace ErrorCodes +{ + extern const int NO_FREE_CONNECTION; +} +} namespace nanodbc { -static constexpr inline auto ODBC_CONNECT_TIMEOUT = 100; - -using ConnectionPtr = std::shared_ptr; +using ConnectionPtr = std::unique_ptr; using Pool = BorrowedObjectPool; using PoolPtr = std::shared_ptr; class ConnectionHolder { - public: - ConnectionHolder(const std::string & connection_string_, PoolPtr pool_) : connection_string(connection_string_), pool(pool_) {} + ConnectionHolder(PoolPtr pool_, ConnectionPtr connection_) : pool(pool_), connection(std::move(connection_)) {} - ~ConnectionHolder() + ConnectionHolder(const ConnectionHolder & other) = delete; + + ~ConnectionHolder() { pool->returnObject(std::move(connection)); } + + nanodbc::connection & get() const { - if (connection) - pool->returnObject(std::move(connection)); - } - - nanodbc::connection & operator*() - { - if (!connection) - { - pool->borrowObject(connection, [&]() - { - return std::make_shared(connection_string, ODBC_CONNECT_TIMEOUT); - }); - } - + assert(connection != nullptr); return *connection; } private: - std::string connection_string; PoolPtr pool; ConnectionPtr connection; }; +using ConnectionHolderPtr = std::unique_ptr; } namespace DB { +static constexpr inline auto ODBC_CONNECT_TIMEOUT = 100; +static constexpr inline auto ODBC_POOL_WAIT_TIMEOUT = 10000; + class ODBCConnectionFactory final : private boost::noncopyable { public: @@ -62,14 +60,32 @@ public: return ret; } - nanodbc::ConnectionHolder get(const std::string & connection_string, size_t pool_size) + nanodbc::ConnectionHolderPtr get(const std::string & connection_string, size_t pool_size) { std::lock_guard lock(mutex); if (!factory.count(connection_string)) factory.emplace(std::make_pair(connection_string, std::make_shared(pool_size))); - return nanodbc::ConnectionHolder(connection_string, factory[connection_string]); + auto & pool = factory[connection_string]; + + nanodbc::ConnectionPtr connection; + auto connection_available = pool->tryBorrowObject(connection, []() { return nullptr; }, ODBC_POOL_WAIT_TIMEOUT); + + if (!connection_available) + throw Exception("Unable to fetch connection within the timeout", ErrorCodes::NO_FREE_CONNECTION); + + try + { + if (!connection || !connection->connected()) + connection = std::make_unique(connection_string, ODBC_CONNECT_TIMEOUT); + } + catch (...) + { + pool->returnObject(std::move(connection)); + } + + return std::make_unique(factory[connection_string], std::move(connection)); } private: diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index 4cceaee962c..3a20148780d 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -53,7 +53,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); - bool result = isSchemaAllowed(*connection); + bool result = isSchemaAllowed(connection->get()); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try diff --git a/programs/odbc-bridge/getIdentifierQuote.cpp b/programs/odbc-bridge/getIdentifierQuote.cpp index 9ccad6e6e1d..586e3c4e5dd 100644 --- a/programs/odbc-bridge/getIdentifierQuote.cpp +++ b/programs/odbc-bridge/getIdentifierQuote.cpp @@ -3,7 +3,6 @@ #if USE_ODBC #include -#include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 729dc150e07..82392b3fee0 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -385,6 +386,11 @@ void Server::initialize(Poco::Util::Application & self) { BaseDaemon::initialize(self); logger().information("starting up"); + + LOG_INFO(&logger(), "OS Name = {}, OS Version = {}, OS Architecture = {}", + Poco::Environment::osName(), + Poco::Environment::osVersion(), + Poco::Environment::osArchitecture()); } std::string Server::getDefaultCorePath() const @@ -879,7 +885,8 @@ int Server::main(const std::vector & /*args*/) global_context->setMMappedFileCache(mmap_cache_size); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500); + constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 1024; + size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size); #endif diff --git a/programs/server/config.xml b/programs/server/config.xml index 195b6263595..df8a5266c39 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -329,6 +329,8 @@ --> 1000 + + 1073741824 /var/lib/clickhouse/ @@ -518,6 +520,33 @@ false + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 80594f66dfc..8c38cd02f9c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -355,8 +355,9 @@ String DiskAccessStorage::getStorageParamsJSON() const std::lock_guard lock{mutex}; Poco::JSON::Object json; json.set("path", directory_path); - if (readonly) - json.set("readonly", readonly.load()); + bool readonly_loaded = readonly; + if (readonly_loaded) + json.set("readonly", Poco::Dynamic::Var{true}); std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(json, oss); diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 1cade973724..0c4d2f417c9 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -77,7 +77,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str if (enable_tls_lc_str == "starttls") params.enable_tls = LDAPClient::Params::TLSEnable::YES_STARTTLS; else if (config.getBool(ldap_server_config + ".enable_tls")) - params.enable_tls = LDAPClient::Params::TLSEnable::YES; + params.enable_tls = LDAPClient::Params::TLSEnable::YES; //-V1048 else params.enable_tls = LDAPClient::Params::TLSEnable::NO; } @@ -96,7 +96,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str else if (tls_minimum_protocol_version_lc_str == "tls1.1") params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_1; else if (tls_minimum_protocol_version_lc_str == "tls1.2") - params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_2; + params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_2; //-V1048 else throw Exception("Bad value for 'tls_minimum_protocol_version' entry, allowed values are: 'ssl2', 'ssl3', 'tls1.0', 'tls1.1', 'tls1.2'", ErrorCodes::BAD_ARGUMENTS); } @@ -113,7 +113,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str else if (tls_require_cert_lc_str == "try") params.tls_require_cert = LDAPClient::Params::TLSRequireCert::TRY; else if (tls_require_cert_lc_str == "demand") - params.tls_require_cert = LDAPClient::Params::TLSRequireCert::DEMAND; + params.tls_require_cert = LDAPClient::Params::TLSRequireCert::DEMAND; //-V1048 else throw Exception("Bad value for 'tls_require_cert' entry, allowed values are: 'never', 'allow', 'try', 'demand'", ErrorCodes::BAD_ARGUMENTS); } diff --git a/src/Access/GrantedRoles.cpp b/src/Access/GrantedRoles.cpp index 7930b56e44d..2659f8a3ec9 100644 --- a/src/Access/GrantedRoles.cpp +++ b/src/Access/GrantedRoles.cpp @@ -136,7 +136,7 @@ GrantedRoles::Elements GrantedRoles::getElements() const boost::range::set_difference(roles, roles_with_admin_option, std::back_inserter(element.ids)); if (!element.empty()) { - element.admin_option = false; + element.admin_option = false; //-V1048 elements.emplace_back(std::move(element)); } diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 2cdd8eabf73..cc914664149 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Access/Quota.h b/src/Access/Quota.h index 430bdca29b0..b7970b2583b 100644 --- a/src/Access/Quota.h +++ b/src/Access/Quota.h @@ -45,7 +45,7 @@ struct Quota : public IAccessEntity struct ResourceTypeInfo { - const char * const raw_name; + const char * const raw_name = ""; const String name; /// Lowercased with underscores, e.g. "result_rows". const String keyword; /// Uppercased with spaces, e.g. "RESULT ROWS". const bool output_as_float = false; diff --git a/src/Access/RolesOrUsersSet.h b/src/Access/RolesOrUsersSet.h index 0d8983c2ec3..871bb0c0758 100644 --- a/src/Access/RolesOrUsersSet.h +++ b/src/Access/RolesOrUsersSet.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Access/RowPolicy.h b/src/Access/RowPolicy.h index c9b4d69152d..723db545dbe 100644 --- a/src/Access/RowPolicy.h +++ b/src/Access/RowPolicy.h @@ -2,6 +2,7 @@ #include #include +#include #include diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index 8a6491d9b61..f2ea51ac28d 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -7,7 +7,7 @@ #include #include #include -#include "Core/DecimalFunctions.h" +#include namespace DB diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h index f8b452fc444..8b932918aa5 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h @@ -5,18 +5,18 @@ namespace DB { -template +template using AvgWeightedFieldType = std::conditional_t, std::conditional_t, Decimal256, Decimal128>, std::conditional_t, Float64, // no way to do UInt128 * UInt128, better cast to Float64 NearestFieldType>>; -template +template using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType) > sizeof(AvgWeightedFieldType)), AvgWeightedFieldType, AvgWeightedFieldType>; -template +template class AggregateFunctionAvgWeighted final : public AggregateFunctionAvgBase< MaxFieldType, AvgWeightedFieldType, AggregateFunctionAvgWeighted> diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 0cfb33efc10..bee58a4a932 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -20,7 +19,7 @@ namespace ErrorCodes /** Tracks the leftmost and rightmost (x, y) data points. */ -struct AggregateFunctionBoundingRatioData +struct AggregateFunctionBoundingRatioData //-V730 { struct Point { diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index b587bbebf6e..b551f8a4ec5 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -10,7 +10,6 @@ #include #include -#include namespace DB { diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 921274f7d59..e59f19e3bae 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -296,12 +296,7 @@ public: if (size) { typename ColumnVector::Container & data_to = assert_cast &>(arr_to.getData()).getData(); - if constexpr (is_big_int_v) - // is data_to empty? we should probably use std::vector::insert then - for (auto it = this->data(place).value.begin(); it != this->data(place).value.end(); it++) - data_to.push_back(*it); - else - data_to.insert(this->data(place).value.begin(), this->data(place).value.end()); + data_to.insert(this->data(place).value.begin(), this->data(place).value.end()); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp index e2139ba882f..b6e9fda9559 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes namespace { +/// TODO Proper support for Decimal256. template struct MovingSum { diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 3bab831d316..8b7109ae3ef 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -38,7 +38,7 @@ struct MovingData using Array = PODArray; Array value; /// Prefix sums. - T sum = 0; + T sum{}; void NO_SANITIZE_UNDEFINED add(T val, Arena * arena) { @@ -69,9 +69,9 @@ struct MovingAvgData : public MovingData T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const { if (idx < window_size) - return this->value[idx] / window_size; + return this->value[idx] / T(window_size); else - return (this->value[idx] - this->value[idx - window_size]) / window_size; + return (this->value[idx] - this->value[idx - window_size]) / T(window_size); } }; diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index c44cb61b275..a5a6920ce33 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -220,7 +220,7 @@ private: } public: - AggregateFunctionHistogramData() + AggregateFunctionHistogramData() //-V730 : size(0) , lower_bound(std::numeric_limits::max()) , upper_bound(std::numeric_limits::lowest()) diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index 7efdebf1639..7573de65cc0 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -21,7 +20,7 @@ #include -#include + namespace DB { diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 919026a78c1..f4561660ad5 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -181,7 +181,7 @@ public: /** For strings. Short strings are stored in the object itself, and long strings are allocated separately. * NOTE It could also be suitable for arrays of numbers. */ -struct SingleValueDataString +struct SingleValueDataString //-V730 { private: using Self = SingleValueDataString; diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp index 1dcdb288339..e8d86a03ff1 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -100,13 +100,14 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c if (which.idx == TypeIndex::Decimal32) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Decimal64) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Decimal128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::Decimal256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::DateTime64) return std::make_shared>(argument_types, params); - //if (which.idx == TypeIndex::Decimal256) return std::make_shared>(argument_types, params); } if constexpr (supportBigInt()) { if (which.idx == TypeIndex::Int128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::UInt256) return std::make_shared>(argument_types, params); } diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/src/AggregateFunctions/AggregateFunctionStatistics.cpp index 9e3aa0962db..d8c8c2ab210 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.cpp +++ b/src/AggregateFunctions/AggregateFunctionStatistics.cpp @@ -36,7 +36,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(const std::string & assertNoParameters(name, parameters); assertBinary(name, argument_types); - AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); + AggregateFunctionPtr res(createWithTwoBasicNumericTypes(*argument_types[0], *argument_types[1], argument_types)); if (!res) throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName() + " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h index 473bce1d89a..2b778f85d99 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.h +++ b/src/AggregateFunctions/AggregateFunctionStatistics.h @@ -13,7 +13,7 @@ namespace DB { -namespace +namespace detail { /// This function returns true if both values are large and comparable. @@ -72,7 +72,7 @@ public: Float64 factor = static_cast(count * source.count) / total_count; Float64 delta = mean - source.mean; - if (areComparable(count, source.count)) + if (detail::areComparable(count, source.count)) mean = (source.count * source.mean + count * mean) / total_count; else mean = source.mean + delta * (static_cast(count) / total_count); @@ -302,7 +302,7 @@ public: Float64 left_delta = left_mean - source.left_mean; Float64 right_delta = right_mean - source.right_mean; - if (areComparable(count, source.count)) + if (detail::areComparable(count, source.count)) { left_mean = (source.count * source.left_mean + count * left_mean) / total_count; right_mean = (source.count * source.right_mean + count * right_mean) / total_count; diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp index c7c2f9025ed..5dde84c1b64 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp @@ -41,7 +41,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(const std::string & assertNoParameters(name, parameters); assertBinary(name, argument_types); - AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); + AggregateFunctionPtr res(createWithTwoBasicNumericTypes(*argument_types[0], *argument_types[1], argument_types)); if (!res) throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName() + " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index a5d6bbb7dc8..eaf119a6264 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -131,11 +131,10 @@ public: static_cast(static_cast(*columns[1]).getData()[row_num])); else { - if constexpr (std::is_same_v) + if constexpr (IsDecimalNumber) { this->data(place).add(static_cast( - static_cast(*columns[0]).getData()[row_num].value - )); + static_cast(*columns[0]).getData()[row_num].value)); } else this->data(place).add( diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index e32da02f442..79a39817ea5 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -2,9 +2,10 @@ #include #include #include +#include #include #include -#include "registerAggregateFunctions.h" + #define TOP_K_MAX_SIZE 0xFFFFFF diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index dbc471687ea..5e5e7d07cec 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -10,7 +10,6 @@ #include #include -#include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index 32fdb188529..7637e076d5b 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -132,6 +132,12 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) factory.registerFunction("uniqExact", {createAggregateFunctionUniq>, properties}); + +#if USE_DATASKETCHES + factory.registerFunction("uniqThetaSketch", + {createAggregateFunctionUniq, properties}); +#endif + } } diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 253af6e2895..6db1c49c071 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -22,6 +22,7 @@ #include #include +#include #include @@ -69,7 +70,7 @@ struct AggregateFunctionUniqHLL12Data }; template <> -struct AggregateFunctionUniqHLL12Data +struct AggregateFunctionUniqHLL12Data { using Set = HyperLogLogWithSmallSetOptimization; Set set; @@ -124,6 +125,19 @@ struct AggregateFunctionUniqExactData }; +/// uniqThetaSketch +#if USE_DATASKETCHES + +struct AggregateFunctionUniqThetaSketchData +{ + using Set = ThetaSketchData; + Set set; + + static String getName() { return "uniqThetaSketch"; } +}; + +#endif + namespace detail { @@ -133,16 +147,14 @@ template struct AggregateFunctionUniqTraits { static UInt64 hash(T x) { - if constexpr (std::is_same_v) - { - return sipHash64(x); - } - else if constexpr (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) { return ext::bit_cast(x); } else if constexpr (sizeof(T) <= sizeof(UInt64)) + { return x; + } else return DefaultHash64(x); } @@ -184,11 +196,17 @@ struct OneAdder UInt128 key; SipHash hash; hash.update(value.data, value.size); - hash.get128(key.low, key.high); + hash.get128(key); data.set.insert(key); } } +#if USE_DATASKETCHES + else if constexpr (std::is_same_v) + { + data.set.insertOriginal(column.getDataAt(row_num)); + } +#endif } }; diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index c0064044f95..84a9267ffe0 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -3,11 +3,13 @@ #include #include +#include + #include #include #include -#include "registerAggregateFunctions.h" + namespace DB { diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 31bd8021dcf..f425a343caa 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp index 9befc515de6..8195bd8964c 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp @@ -1,11 +1,11 @@ #include #include #include +#include #include #include #include #include -#include "registerAggregateFunctions.h" namespace DB diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index 853e1cb6447..5dd62c96a9e 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -1,6 +1,7 @@ #pragma once -#include +#include + #include #include @@ -35,7 +36,7 @@ namespace DB */ template -struct __attribute__((__packed__)) AggregateFunctionUniqUpToData +struct AggregateFunctionUniqUpToData { /** If count == threshold + 1 - this means that it is "overflowed" (values greater than threshold). * In this case (for example, after calling the merge function), the `data` array does not necessarily contain the initialized values @@ -43,7 +44,17 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData * then set count to `threshold + 1`, and values from another state are not copied. */ UInt8 count = 0; - T data[0]; + char data_ptr[0]; + + T load(size_t i) const + { + return unalignedLoad(data_ptr + i * sizeof(T)); + } + + void store(size_t i, const T & x) + { + unalignedStore(data_ptr + i * sizeof(T), x); + } size_t size() const { @@ -60,12 +71,12 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData /// Linear search for the matching element. for (size_t i = 0; i < count; ++i) - if (data[i] == x) + if (load(i) == x) return; /// Did not find the matching element. If there is room for one more element, insert it. if (count < threshold) - data[count] = x; + store(count, x); /// After increasing count, the state may be overflowed. ++count; @@ -84,7 +95,7 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData } for (size_t i = 0; i < rhs.count; ++i) - insert(rhs.data[i], threshold); + insert(rhs.load(i), threshold); } void write(WriteBuffer & wb, UInt8 threshold) const @@ -93,7 +104,7 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData /// Write values only if the state is not overflowed. Otherwise, they are not needed, and only the fact that the state is overflowed is important. if (count <= threshold) - wb.write(reinterpret_cast(data), count * sizeof(data[0])); + wb.write(data_ptr, count * sizeof(T)); } void read(ReadBuffer & rb, UInt8 threshold) @@ -101,7 +112,7 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData readBinary(count, rb); if (count <= threshold) - rb.read(reinterpret_cast(data), count * sizeof(data[0])); + rb.read(data_ptr, count * sizeof(T)); } /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index bf51b5f6e2e..d1f1830eb27 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -15,12 +15,12 @@ M(Float32) \ M(Float64) -// No UInt128 here because of the name conflict #define FOR_NUMERIC_TYPES(M) \ M(UInt8) \ M(UInt16) \ M(UInt32) \ M(UInt64) \ + M(UInt128) \ M(UInt256) \ M(Int8) \ M(Int16) \ @@ -109,6 +109,8 @@ static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argu if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate>(std::forward(args)...); if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate>(std::forward(args)...); if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate>(std::forward(args)...); + if (which.idx == TypeIndex::UInt128) return new AggregateFunctionTemplate>(std::forward(args)...); + if (which.idx == TypeIndex::UInt256) return new AggregateFunctionTemplate>(std::forward(args)...); return nullptr; } @@ -119,11 +121,11 @@ static IAggregateFunction * createWithNumericBasedType(const IDataType & argumen if (f) return f; - /// expects that DataTypeDate based on UInt16, DataTypeDateTime based on UInt32 and UUID based on UInt128 + /// expects that DataTypeDate based on UInt16, DataTypeDateTime based on UInt32 WhichDataType which(argument_type); if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate(std::forward(args)...); if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTemplate(std::forward(args)...); - if (which.idx == TypeIndex::UUID) return new AggregateFunctionTemplate(std::forward(args)...); + if (which.idx == TypeIndex::UUID) return new AggregateFunctionTemplate(std::forward(args)...); return nullptr; } @@ -184,6 +186,29 @@ static IAggregateFunction * createWithTwoNumericTypes(const IDataType & first_ty return nullptr; } +template class AggregateFunctionTemplate, typename... TArgs> +static IAggregateFunction * createWithTwoBasicNumericTypesSecond(const IDataType & second_type, TArgs && ... args) +{ + WhichDataType which(second_type); +#define DISPATCH(TYPE) \ + if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate(std::forward(args)...); + FOR_BASIC_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + return nullptr; +} + +template