diff --git a/README.md b/README.md index 7f6a102a2dd..f1c8e17086b 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ ClickHouse is an open-source column-oriented database management system that all * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. -## Upcoming Events +## Upcoming Events -* [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020. -* [ClickHouse talk at Ya.Subbotnik (in Russian)](https://ya.cc/t/cIBI-3yECj5JF) on September 12, 2020. +* [eBay migrating from Druid](https://us02web.zoom.us/webinar/register/tZMkfu6rpjItHtaQ1DXcgPWcSOnmM73HLGKL) on September 23, 2020. +* [ClickHouse for Edge Analytics](https://ones2020.sched.com/event/bWPs) on September 29, 2020. diff --git a/base/common/throwError.h b/base/common/throwError.h new file mode 100644 index 00000000000..b495a0fbc7a --- /dev/null +++ b/base/common/throwError.h @@ -0,0 +1,13 @@ +#pragma once +#include + +/// Throw DB::Exception-like exception before its definition. +/// DB::Exception derived from Poco::Exception derived from std::exception. +/// DB::Exception generally cought as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes. +/// DB::Exception is not defined yet. It'd better to throw Poco::Exception but we do not want to include any big header here, even . +/// So we throw some std::exception instead in the hope its catch block is the same as DB::Exception one. +template +inline void throwError(const T & err) +{ + throw std::runtime_error(err); +} diff --git a/base/common/types.h b/base/common/types.h index 682fe94366c..2982781ce1f 100644 --- a/base/common/types.h +++ b/base/common/types.h @@ -1,8 +1,6 @@ #pragma once -#include #include -#include #include #include @@ -25,8 +23,8 @@ using UInt64 = uint64_t; using Int128 = __int128; -using wInt256 = std::wide_integer<256, signed>; -using wUInt256 = std::wide_integer<256, unsigned>; +using wInt256 = wide::integer<256, signed>; +using wUInt256 = wide::integer<256, unsigned>; static_assert(sizeof(wInt256) == 32); static_assert(sizeof(wUInt256) == 32); @@ -121,12 +119,6 @@ template <> struct is_big_int { static constexpr bool value = true; }; template inline constexpr bool is_big_int_v = is_big_int::value; -template -inline std::string bigintToString(const T & x) -{ - return to_string(x); -} - template inline To bigint_cast(const From & x [[maybe_unused]]) { diff --git a/base/common/wide_integer.h b/base/common/wide_integer.h index 67d0b3f04da..2aeac072b3f 100644 --- a/base/common/wide_integer.h +++ b/base/common/wide_integer.h @@ -22,79 +22,87 @@ * without express or implied warranty. */ -#include // CHAR_BIT -#include #include #include #include +#include + +namespace wide +{ +template +class integer; +} namespace std { -template -class wide_integer; template -struct common_type, wide_integer>; +struct common_type, wide::integer>; template -struct common_type, Arithmetic>; +struct common_type, Arithmetic>; template -struct common_type>; +struct common_type>; + +} + +namespace wide +{ template -class wide_integer +class integer { public: using base_type = uint8_t; using signed_base_type = int8_t; // ctors - wide_integer() = default; + integer() = default; template - constexpr wide_integer(T rhs) noexcept; + constexpr integer(T rhs) noexcept; template - constexpr wide_integer(std::initializer_list il) noexcept; + constexpr integer(std::initializer_list il) noexcept; // assignment template - constexpr wide_integer & operator=(const wide_integer & rhs) noexcept; + constexpr integer & operator=(const integer & rhs) noexcept; template - constexpr wide_integer & operator=(Arithmetic rhs) noexcept; + constexpr integer & operator=(Arithmetic rhs) noexcept; template - constexpr wide_integer & operator*=(const Arithmetic & rhs); + constexpr integer & operator*=(const Arithmetic & rhs); template - constexpr wide_integer & operator/=(const Arithmetic & rhs); + constexpr integer & operator/=(const Arithmetic & rhs); template - constexpr wide_integer & operator+=(const Arithmetic & rhs) noexcept(is_same::value); + constexpr integer & operator+=(const Arithmetic & rhs) noexcept(std::is_same_v); template - constexpr wide_integer & operator-=(const Arithmetic & rhs) noexcept(is_same::value); + constexpr integer & operator-=(const Arithmetic & rhs) noexcept(std::is_same_v); template - constexpr wide_integer & operator%=(const Integral & rhs); + constexpr integer & operator%=(const Integral & rhs); template - constexpr wide_integer & operator&=(const Integral & rhs) noexcept; + constexpr integer & operator&=(const Integral & rhs) noexcept; template - constexpr wide_integer & operator|=(const Integral & rhs) noexcept; + constexpr integer & operator|=(const Integral & rhs) noexcept; template - constexpr wide_integer & operator^=(const Integral & rhs) noexcept; + constexpr integer & operator^=(const Integral & rhs) noexcept; - constexpr wide_integer & operator<<=(int n); - constexpr wide_integer & operator>>=(int n) noexcept; + constexpr integer & operator<<=(int n) noexcept; + constexpr integer & operator>>=(int n) noexcept; - constexpr wide_integer & operator++() noexcept(is_same::value); - constexpr wide_integer operator++(int) noexcept(is_same::value); - constexpr wide_integer & operator--() noexcept(is_same::value); - constexpr wide_integer operator--(int) noexcept(is_same::value); + constexpr integer & operator++() noexcept(std::is_same_v); + constexpr integer operator++(int) noexcept(std::is_same_v); + constexpr integer & operator--() noexcept(std::is_same_v); + constexpr integer operator--(int) noexcept(std::is_same_v); // observers @@ -114,10 +122,10 @@ public: private: template - friend class wide_integer; + friend class integer; - friend class numeric_limits>; - friend class numeric_limits>; + friend class std::numeric_limits>; + friend class std::numeric_limits>; base_type m_arr[_impl::arr_size]; }; @@ -134,115 +142,117 @@ using __only_integer = typename std::enable_if() && IntegralC // Unary operators template -constexpr wide_integer operator~(const wide_integer & lhs) noexcept; +constexpr integer operator~(const integer & lhs) noexcept; template -constexpr wide_integer operator-(const wide_integer & lhs) noexcept(is_same::value); +constexpr integer operator-(const integer & lhs) noexcept(std::is_same_v); template -constexpr wide_integer operator+(const wide_integer & lhs) noexcept(is_same::value); +constexpr integer operator+(const integer & lhs) noexcept(std::is_same_v); // Binary operators template -std::common_type_t, wide_integer> constexpr -operator*(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator*(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator*(const Arithmetic & rhs, const Arithmetic2 & lhs); template -std::common_type_t, wide_integer> constexpr -operator/(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator/(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator/(const Arithmetic & rhs, const Arithmetic2 & lhs); template -std::common_type_t, wide_integer> constexpr -operator+(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator+(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator+(const Arithmetic & rhs, const Arithmetic2 & lhs); template -std::common_type_t, wide_integer> constexpr -operator-(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator-(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator-(const Arithmetic & rhs, const Arithmetic2 & lhs); template -std::common_type_t, wide_integer> constexpr -operator%(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator%(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator%(const Integral & rhs, const Integral2 & lhs); template -std::common_type_t, wide_integer> constexpr -operator&(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator&(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator&(const Integral & rhs, const Integral2 & lhs); template -std::common_type_t, wide_integer> constexpr -operator|(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator|(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator|(const Integral & rhs, const Integral2 & lhs); template -std::common_type_t, wide_integer> constexpr -operator^(const wide_integer & lhs, const wide_integer & rhs); +std::common_type_t, integer> constexpr +operator^(const integer & lhs, const integer & rhs); template > std::common_type_t constexpr operator^(const Integral & rhs, const Integral2 & lhs); // TODO: Integral template -constexpr wide_integer operator<<(const wide_integer & lhs, int n) noexcept; +constexpr integer operator<<(const integer & lhs, int n) noexcept; template -constexpr wide_integer operator>>(const wide_integer & lhs, int n) noexcept; +constexpr integer operator>>(const integer & lhs, int n) noexcept; template >> -constexpr wide_integer operator<<(const wide_integer & lhs, Int n) noexcept +constexpr integer operator<<(const integer & lhs, Int n) noexcept { return lhs << int(n); } template >> -constexpr wide_integer operator>>(const wide_integer & lhs, Int n) noexcept +constexpr integer operator>>(const integer & lhs, Int n) noexcept { return lhs >> int(n); } template -constexpr bool operator<(const wide_integer & lhs, const wide_integer & rhs); +constexpr bool operator<(const integer & lhs, const integer & rhs); template > constexpr bool operator<(const Arithmetic & rhs, const Arithmetic2 & lhs); template -constexpr bool operator>(const wide_integer & lhs, const wide_integer & rhs); +constexpr bool operator>(const integer & lhs, const integer & rhs); template > constexpr bool operator>(const Arithmetic & rhs, const Arithmetic2 & lhs); template -constexpr bool operator<=(const wide_integer & lhs, const wide_integer & rhs); +constexpr bool operator<=(const integer & lhs, const integer & rhs); template > constexpr bool operator<=(const Arithmetic & rhs, const Arithmetic2 & lhs); template -constexpr bool operator>=(const wide_integer & lhs, const wide_integer & rhs); +constexpr bool operator>=(const integer & lhs, const integer & rhs); template > constexpr bool operator>=(const Arithmetic & rhs, const Arithmetic2 & lhs); template -constexpr bool operator==(const wide_integer & lhs, const wide_integer & rhs); +constexpr bool operator==(const integer & lhs, const integer & rhs); template > constexpr bool operator==(const Arithmetic & rhs, const Arithmetic2 & lhs); template -constexpr bool operator!=(const wide_integer & lhs, const wide_integer & rhs); +constexpr bool operator!=(const integer & lhs, const integer & rhs); template > constexpr bool operator!=(const Arithmetic & rhs, const Arithmetic2 & lhs); -template -std::string to_string(const wide_integer & n); +} + +namespace std +{ template -struct hash>; +struct hash>; } diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h index c77a9120a55..26bd6704bdc 100644 --- a/base/common/wide_integer_impl.h +++ b/base/common/wide_integer_impl.h @@ -1,19 +1,47 @@ /// Original is here https://github.com/cerevra/int #pragma once -#include "wide_integer.h" +#include "throwError.h" -#include -#include +#ifndef CHAR_BIT +#define CHAR_BIT 8 +#endif + +namespace wide +{ + +template +struct IsWideInteger +{ + static const constexpr bool value = false; +}; + +template +struct IsWideInteger> +{ + static const constexpr bool value = true; +}; + +template +static constexpr bool ArithmeticConcept() noexcept +{ + return std::is_arithmetic_v || IsWideInteger::value; +} + +template +static constexpr bool IntegralConcept() noexcept +{ + return std::is_integral_v || IsWideInteger::value; +} + +} namespace std { -#define CT(x) \ - std::common_type_t, std::decay_t> { x } // numeric limits template -class numeric_limits> +class numeric_limits> { public: static constexpr bool is_specialized = true; @@ -40,103 +68,84 @@ public: static constexpr bool traps = true; static constexpr bool tinyness_before = false; - static constexpr wide_integer min() noexcept + static constexpr wide::integer min() noexcept { if (is_same::value) { - using T = wide_integer; + using T = wide::integer; T res{}; - res.m_arr[T::_impl::big(0)] = std::numeric_limits::signed_base_type>::min(); + res.m_arr[T::_impl::big(0)] = std::numeric_limits::signed_base_type>::min(); return res; } return 0; } - static constexpr wide_integer max() noexcept + static constexpr wide::integer max() noexcept { - using T = wide_integer; + using T = wide::integer; T res{}; res.m_arr[T::_impl::big(0)] = is_same::value - ? std::numeric_limits::signed_base_type>::max() - : std::numeric_limits::base_type>::max(); - for (int i = 1; i < wide_integer::_impl::arr_size; ++i) + ? std::numeric_limits::signed_base_type>::max() + : std::numeric_limits::base_type>::max(); + for (int i = 1; i < wide::integer::_impl::arr_size; ++i) { - res.m_arr[T::_impl::big(i)] = std::numeric_limits::base_type>::max(); + res.m_arr[T::_impl::big(i)] = std::numeric_limits::base_type>::max(); } return res; } - static constexpr wide_integer lowest() noexcept { return min(); } - static constexpr wide_integer epsilon() noexcept { return 0; } - static constexpr wide_integer round_error() noexcept { return 0; } - static constexpr wide_integer infinity() noexcept { return 0; } - static constexpr wide_integer quiet_NaN() noexcept { return 0; } - static constexpr wide_integer signaling_NaN() noexcept { return 0; } - static constexpr wide_integer denorm_min() noexcept { return 0; } + static constexpr wide::integer lowest() noexcept { return min(); } + static constexpr wide::integer epsilon() noexcept { return 0; } + static constexpr wide::integer round_error() noexcept { return 0; } + static constexpr wide::integer infinity() noexcept { return 0; } + static constexpr wide::integer quiet_NaN() noexcept { return 0; } + static constexpr wide::integer signaling_NaN() noexcept { return 0; } + static constexpr wide::integer denorm_min() noexcept { return 0; } }; -template -struct IsWideInteger -{ - static const constexpr bool value = false; -}; - -template -struct IsWideInteger> -{ - static const constexpr bool value = true; -}; - -template -static constexpr bool ArithmeticConcept() noexcept -{ - return std::is_arithmetic_v || IsWideInteger::value; -} - -template -static constexpr bool IntegralConcept() noexcept -{ - return std::is_integral_v || IsWideInteger::value; -} - // type traits template -struct common_type, wide_integer> +struct common_type, wide::integer> { using type = std::conditional_t < Bits == Bits2, - wide_integer< + wide::integer< Bits, - std::conditional_t<(std::is_same::value && std::is_same::value), signed, unsigned>>, - std::conditional_t, wide_integer>>; + std::conditional_t<(std::is_same_v && std::is_same_v), signed, unsigned>>, + std::conditional_t, wide::integer>>; }; template -struct common_type, Arithmetic> +struct common_type, Arithmetic> { - static_assert(ArithmeticConcept(), ""); + static_assert(wide::ArithmeticConcept()); using type = std::conditional_t< - std::is_floating_point::value, + std::is_floating_point_v, Arithmetic, std::conditional_t< sizeof(Arithmetic) < Bits * sizeof(long), - wide_integer, + wide::integer, std::conditional_t< Bits * sizeof(long) < sizeof(Arithmetic), Arithmetic, std::conditional_t< - Bits * sizeof(long) == sizeof(Arithmetic) && (is_same::value || std::is_signed::value), + Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v || std::is_signed_v), Arithmetic, - wide_integer>>>>; + wide::integer>>>>; }; template -struct common_type> : std::common_type, Arithmetic> +struct common_type> : common_type, Arithmetic> { }; +} + +namespace wide +{ + template -struct wide_integer::_impl +struct integer::_impl { static_assert(Bits % CHAR_BIT == 0, "=)"); @@ -152,7 +161,7 @@ struct wide_integer::_impl static constexpr unsigned any(unsigned idx) { return idx; } template - constexpr static bool is_negative(const wide_integer & n) noexcept + constexpr static bool is_negative(const integer & n) noexcept { if constexpr (std::is_same_v) return static_cast(n.m_arr[big(0)]) < 0; @@ -161,7 +170,7 @@ struct wide_integer::_impl } template - constexpr static wide_integer make_positive(const wide_integer & n) noexcept + constexpr static integer make_positive(const integer & n) noexcept { return is_negative(n) ? operator_unary_minus(n) : n; } @@ -178,7 +187,7 @@ struct wide_integer::_impl } template - constexpr static void wide_integer_from_bultin(wide_integer & self, Integral rhs) noexcept + constexpr static void wide_integer_from_bultin(integer & self, Integral rhs) noexcept { auto r = _impl::to_Integral(rhs); @@ -197,7 +206,7 @@ struct wide_integer::_impl } } - constexpr static void wide_integer_from_bultin(wide_integer & self, double rhs) noexcept + constexpr static void wide_integer_from_bultin(integer & self, double rhs) noexcept { if ((rhs > 0 && rhs < std::numeric_limits::max()) || (rhs < 0 && rhs > std::numeric_limits::min())) { @@ -223,10 +232,10 @@ struct wide_integer::_impl template constexpr static void - wide_integer_from_wide_integer(wide_integer & self, const wide_integer & rhs) noexcept + wide_integer_from_wide_integer(integer & self, const integer & rhs) noexcept { // int Bits_to_copy = std::min(arr_size, rhs.arr_size); - auto rhs_arr_size = wide_integer::_impl::arr_size; + auto rhs_arr_size = integer::_impl::arr_size; int base_elems_to_copy = _impl::arr_size < rhs_arr_size ? _impl::arr_size : rhs_arr_size; for (int i = 0; i < base_elems_to_copy; ++i) { @@ -244,14 +253,14 @@ struct wide_integer::_impl return sizeof(T) * CHAR_BIT <= Bits; } - constexpr static wide_integer shift_left(const wide_integer & rhs, int n) + constexpr static integer shift_left(const integer & rhs, int n) noexcept { if (static_cast(n) >= base_bits * arr_size) return 0; if (n <= 0) return rhs; - wide_integer lhs = rhs; + integer lhs = rhs; int bit_shift = n % base_bits; unsigned n_bytes = n / base_bits; if (bit_shift) @@ -275,23 +284,19 @@ struct wide_integer::_impl return lhs; } - constexpr static wide_integer shift_left(const wide_integer & rhs, int n) + constexpr static integer shift_left(const integer & rhs, int n) noexcept { - // static_assert(is_negative(rhs), "shift left for negative lhsbers is underfined!"); - if (is_negative(rhs)) - throw std::runtime_error("shift left for negative lhsbers is underfined!"); - - return wide_integer(shift_left(wide_integer(rhs), n)); + return integer(shift_left(integer(rhs), n)); } - constexpr static wide_integer shift_right(const wide_integer & rhs, int n) noexcept + constexpr static integer shift_right(const integer & rhs, int n) noexcept { if (static_cast(n) >= base_bits * arr_size) return 0; if (n <= 0) return rhs; - wide_integer lhs = rhs; + integer lhs = rhs; int bit_shift = n % base_bits; unsigned n_bytes = n / base_bits; if (bit_shift) @@ -315,7 +320,7 @@ struct wide_integer::_impl return lhs; } - constexpr static wide_integer shift_right(const wide_integer & rhs, int n) noexcept + constexpr static integer shift_right(const integer & rhs, int n) noexcept { if (static_cast(n) >= base_bits * arr_size) return 0; @@ -324,14 +329,14 @@ struct wide_integer::_impl bool is_neg = is_negative(rhs); if (!is_neg) - return shift_right(wide_integer(rhs), n); + return shift_right(integer(rhs), n); - wide_integer lhs = rhs; + integer lhs = rhs; int bit_shift = n % base_bits; unsigned n_bytes = n / base_bits; if (bit_shift) { - lhs = shift_right(wide_integer(lhs), bit_shift); + lhs = shift_right(integer(lhs), bit_shift); lhs.m_arr[big(0)] |= std::numeric_limits::max() << (base_bits - bit_shift); } if (n_bytes) @@ -349,8 +354,8 @@ struct wide_integer::_impl } template - constexpr static wide_integer - operator_plus_T(const wide_integer & lhs, T rhs) noexcept(is_same::value) + constexpr static integer + operator_plus_T(const integer & lhs, T rhs) noexcept(std::is_same_v) { if (rhs < 0) return _operator_minus_T(lhs, -rhs); @@ -360,10 +365,10 @@ struct wide_integer::_impl private: template - constexpr static wide_integer - _operator_minus_T(const wide_integer & lhs, T rhs) noexcept(is_same::value) + constexpr static integer + _operator_minus_T(const integer & lhs, T rhs) noexcept(std::is_same_v) { - wide_integer res = lhs; + integer res = lhs; bool is_underflow = false; int r_idx = 0; @@ -399,10 +404,10 @@ private: } template - constexpr static wide_integer - _operator_plus_T(const wide_integer & lhs, T rhs) noexcept(is_same::value) + constexpr static integer + _operator_plus_T(const integer & lhs, T rhs) noexcept(std::is_same_v) { - wide_integer res = lhs; + integer res = lhs; bool is_overflow = false; int r_idx = 0; @@ -438,27 +443,27 @@ private: } public: - constexpr static wide_integer operator_unary_tilda(const wide_integer & lhs) noexcept + constexpr static integer operator_unary_tilda(const integer & lhs) noexcept { - wide_integer res{}; + integer res{}; for (int i = 0; i < arr_size; ++i) res.m_arr[any(i)] = ~lhs.m_arr[any(i)]; return res; } - constexpr static wide_integer - operator_unary_minus(const wide_integer & lhs) noexcept(is_same::value) + constexpr static integer + operator_unary_minus(const integer & lhs) noexcept(std::is_same_v) { return operator_plus_T(operator_unary_tilda(lhs), 1); } template - constexpr static auto operator_plus(const wide_integer & lhs, const T & rhs) noexcept(is_same::value) + constexpr static auto operator_plus(const integer & lhs, const T & rhs) noexcept(std::is_same_v) { if constexpr (should_keep_size()) { - wide_integer t = rhs; + integer t = rhs; if (is_negative(t)) return _operator_minus_wide_integer(lhs, operator_unary_minus(t)); else @@ -467,17 +472,17 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, wide_integer>::_impl::operator_plus( - wide_integer(lhs), rhs); + return std::common_type_t, integer>::_impl::operator_plus( + integer(lhs), rhs); } } template - constexpr static auto operator_minus(const wide_integer & lhs, const T & rhs) noexcept(is_same::value) + constexpr static auto operator_minus(const integer & lhs, const T & rhs) noexcept(std::is_same_v) { if constexpr (should_keep_size()) { - wide_integer t = rhs; + integer t = rhs; if (is_negative(t)) return _operator_plus_wide_integer(lhs, operator_unary_minus(t)); else @@ -486,16 +491,16 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, wide_integer>::_impl::operator_minus( - wide_integer(lhs), rhs); + return std::common_type_t, integer>::_impl::operator_minus( + integer(lhs), rhs); } } private: - constexpr static wide_integer _operator_minus_wide_integer( - const wide_integer & lhs, const wide_integer & rhs) noexcept(is_same::value) + constexpr static integer _operator_minus_wide_integer( + const integer & lhs, const integer & rhs) noexcept(std::is_same_v) { - wide_integer res = lhs; + integer res = lhs; bool is_underflow = false; for (int idx = 0; idx < arr_size; ++idx) @@ -518,10 +523,10 @@ private: return res; } - constexpr static wide_integer _operator_plus_wide_integer( - const wide_integer & lhs, const wide_integer & rhs) noexcept(is_same::value) + constexpr static integer _operator_plus_wide_integer( + const integer & lhs, const integer & rhs) noexcept(std::is_same_v) { - wide_integer res = lhs; + integer res = lhs; bool is_overflow = false; for (int idx = 0; idx < arr_size; ++idx) @@ -546,14 +551,14 @@ private: public: template - constexpr static auto operator_star(const wide_integer & lhs, const T & rhs) + constexpr static auto operator_star(const integer & lhs, const T & rhs) { if constexpr (should_keep_size()) { - const wide_integer a = make_positive(lhs); - wide_integer t = make_positive(wide_integer(rhs)); + const integer a = make_positive(lhs); + integer t = make_positive(integer(rhs)); - wide_integer res = 0; + integer res = 0; for (size_t i = 0; i < arr_size * base_bits; ++i) { @@ -563,7 +568,7 @@ public: t = shift_right(t, 1); } - if (is_same::value && is_negative(wide_integer(rhs)) != is_negative(lhs)) + if (std::is_same_v && is_negative(integer(rhs)) != is_negative(lhs)) res = operator_unary_minus(res); return res; @@ -571,19 +576,19 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, T>::_impl::operator_star(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_star(T(lhs), rhs); } } template - constexpr static bool operator_more(const wide_integer & lhs, const T & rhs) noexcept + constexpr static bool operator_more(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { // static_assert(Signed == std::is_signed::value, // "warning: operator_more: comparison of integers of different signs"); - wide_integer t = rhs; + integer t = rhs; if (std::numeric_limits::is_signed && (is_negative(lhs) != is_negative(t))) return is_negative(t); @@ -599,19 +604,19 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, T>::_impl::operator_more(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_more(T(lhs), rhs); } } template - constexpr static bool operator_less(const wide_integer & lhs, const T & rhs) noexcept + constexpr static bool operator_less(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { // static_assert(Signed == std::is_signed::value, // "warning: operator_less: comparison of integers of different signs"); - wide_integer t = rhs; + integer t = rhs; if (std::numeric_limits::is_signed && (is_negative(lhs) != is_negative(t))) return is_negative(lhs); @@ -625,16 +630,16 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, T>::_impl::operator_less(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_less(T(lhs), rhs); } } template - constexpr static bool operator_eq(const wide_integer & lhs, const T & rhs) noexcept + constexpr static bool operator_eq(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { - wide_integer t = rhs; + integer t = rhs; for (int i = 0; i < arr_size; ++i) if (lhs.m_arr[any(i)] != t.m_arr[any(i)]) @@ -645,17 +650,17 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, T>::_impl::operator_eq(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_eq(T(lhs), rhs); } } template - constexpr static auto operator_pipe(const wide_integer & lhs, const T & rhs) noexcept + constexpr static auto operator_pipe(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { - wide_integer t = rhs; - wide_integer res = lhs; + integer t = rhs; + integer res = lhs; for (int i = 0; i < arr_size; ++i) res.m_arr[any(i)] |= t.m_arr[any(i)]; @@ -664,17 +669,17 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, T>::_impl::operator_pipe(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_pipe(T(lhs), rhs); } } template - constexpr static auto operator_amp(const wide_integer & lhs, const T & rhs) noexcept + constexpr static auto operator_amp(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { - wide_integer t = rhs; - wide_integer res = lhs; + integer t = rhs; + integer res = lhs; for (int i = 0; i < arr_size; ++i) res.m_arr[any(i)] &= t.m_arr[any(i)]; @@ -683,7 +688,7 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, T>::_impl::operator_amp(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_amp(T(lhs), rhs); } } @@ -702,7 +707,7 @@ private: } if (is_zero) - throw std::domain_error("divide by zero"); + throwError("divide by zero"); T n = lhserator; T d = denominator; @@ -733,15 +738,15 @@ private: public: template - constexpr static auto operator_slash(const wide_integer & lhs, const T & rhs) + constexpr static auto operator_slash(const integer & lhs, const T & rhs) { if constexpr (should_keep_size()) { - wide_integer o = rhs; - wide_integer quotient{}, remainder{}; + integer o = rhs; + integer quotient{}, remainder{}; divide(make_positive(lhs), make_positive(o), quotient, remainder); - if (is_same::value && is_negative(o) != is_negative(lhs)) + if (std::is_same_v && is_negative(o) != is_negative(lhs)) quotient = operator_unary_minus(quotient); return quotient; @@ -749,20 +754,20 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, wide_integer>::operator_slash(T(lhs), rhs); + return std::common_type_t, integer>::operator_slash(T(lhs), rhs); } } template - constexpr static auto operator_percent(const wide_integer & lhs, const T & rhs) + constexpr static auto operator_percent(const integer & lhs, const T & rhs) { if constexpr (should_keep_size()) { - wide_integer o = rhs; - wide_integer quotient{}, remainder{}; + integer o = rhs; + integer quotient{}, remainder{}; divide(make_positive(lhs), make_positive(o), quotient, remainder); - if (is_same::value && is_negative(lhs)) + if (std::is_same_v && is_negative(lhs)) remainder = operator_unary_minus(remainder); return remainder; @@ -770,18 +775,18 @@ public: else { static_assert(T::_impl::_is_wide_integer, ""); - return std::common_type_t, wide_integer>::operator_percent(T(lhs), rhs); + return std::common_type_t, integer>::operator_percent(T(lhs), rhs); } } // ^ template - constexpr static auto operator_circumflex(const wide_integer & lhs, const T & rhs) noexcept + constexpr static auto operator_circumflex(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { - wide_integer t(rhs); - wide_integer res = lhs; + integer t(rhs); + integer res = lhs; for (int i = 0; i < arr_size; ++i) res.m_arr[any(i)] ^= t.m_arr[any(i)]; @@ -794,11 +799,11 @@ public: } } - constexpr static wide_integer from_str(const char * c) + constexpr static integer from_str(const char * c) { - wide_integer res = 0; + integer res = 0; - bool is_neg = is_same::value && *c == '-'; + bool is_neg = std::is_same_v && *c == '-'; if (is_neg) ++c; @@ -827,7 +832,7 @@ public: ++c; } else - throw std::runtime_error("invalid char from"); + throwError("invalid char from"); } } else @@ -835,7 +840,7 @@ public: while (*c) { if (*c < '0' || *c > '9') - throw std::runtime_error("invalid char from"); + throwError("invalid char from"); res = operator_star(res, 10U); res = operator_plus_T(res, *c - '0'); @@ -854,7 +859,7 @@ public: template template -constexpr wide_integer::wide_integer(T rhs) noexcept +constexpr integer::integer(T rhs) noexcept : m_arr{} { if constexpr (IsWideInteger::value) @@ -865,7 +870,7 @@ constexpr wide_integer::wide_integer(T rhs) noexcept template template -constexpr wide_integer::wide_integer(std::initializer_list il) noexcept +constexpr integer::integer(std::initializer_list il) noexcept : m_arr{} { if (il.size() == 1) @@ -881,7 +886,7 @@ constexpr wide_integer::wide_integer(std::initializer_list il) template template -constexpr wide_integer & wide_integer::operator=(const wide_integer & rhs) noexcept +constexpr integer & integer::operator=(const integer & rhs) noexcept { _impl::wide_integer_from_wide_integer(*this, rhs); return *this; @@ -889,7 +894,7 @@ constexpr wide_integer & wide_integer::operator=(con template template -constexpr wide_integer & wide_integer::operator=(T rhs) noexcept +constexpr integer & integer::operator=(T rhs) noexcept { _impl::wide_integer_from_bultin(*this, rhs); return *this; @@ -897,7 +902,7 @@ constexpr wide_integer & wide_integer::operator=(T r template template -constexpr wide_integer & wide_integer::operator*=(const T & rhs) +constexpr integer & integer::operator*=(const T & rhs) { *this = *this * rhs; return *this; @@ -905,7 +910,7 @@ constexpr wide_integer & wide_integer::operator*=(co template template -constexpr wide_integer & wide_integer::operator/=(const T & rhs) +constexpr integer & integer::operator/=(const T & rhs) { *this = *this / rhs; return *this; @@ -913,7 +918,7 @@ constexpr wide_integer & wide_integer::operator/=(co template template -constexpr wide_integer & wide_integer::operator+=(const T & rhs) noexcept(is_same::value) +constexpr integer & integer::operator+=(const T & rhs) noexcept(std::is_same_v) { *this = *this + rhs; return *this; @@ -921,7 +926,7 @@ constexpr wide_integer & wide_integer::operator+=(co template template -constexpr wide_integer & wide_integer::operator-=(const T & rhs) noexcept(is_same::value) +constexpr integer & integer::operator-=(const T & rhs) noexcept(std::is_same_v) { *this = *this - rhs; return *this; @@ -929,7 +934,7 @@ constexpr wide_integer & wide_integer::operator-=(co template template -constexpr wide_integer & wide_integer::operator%=(const T & rhs) +constexpr integer & integer::operator%=(const T & rhs) { *this = *this % rhs; return *this; @@ -937,7 +942,7 @@ constexpr wide_integer & wide_integer::operator%=(co template template -constexpr wide_integer & wide_integer::operator&=(const T & rhs) noexcept +constexpr integer & integer::operator&=(const T & rhs) noexcept { *this = *this & rhs; return *this; @@ -945,7 +950,7 @@ constexpr wide_integer & wide_integer::operator&=(co template template -constexpr wide_integer & wide_integer::operator|=(const T & rhs) noexcept +constexpr integer & integer::operator|=(const T & rhs) noexcept { *this = *this | rhs; return *this; @@ -953,35 +958,35 @@ constexpr wide_integer & wide_integer::operator|=(co template template -constexpr wide_integer & wide_integer::operator^=(const T & rhs) noexcept +constexpr integer & integer::operator^=(const T & rhs) noexcept { *this = *this ^ rhs; return *this; } template -constexpr wide_integer & wide_integer::operator<<=(int n) +constexpr integer & integer::operator<<=(int n) noexcept { *this = _impl::shift_left(*this, n); return *this; } template -constexpr wide_integer & wide_integer::operator>>=(int n) noexcept +constexpr integer & integer::operator>>=(int n) noexcept { *this = _impl::shift_right(*this, n); return *this; } template -constexpr wide_integer & wide_integer::operator++() noexcept(is_same::value) +constexpr integer & integer::operator++() noexcept(std::is_same_v) { *this = _impl::operator_plus(*this, 1); return *this; } template -constexpr wide_integer wide_integer::operator++(int) noexcept(is_same::value) +constexpr integer integer::operator++(int) noexcept(std::is_same_v) { auto tmp = *this; *this = _impl::operator_plus(*this, 1); @@ -989,14 +994,14 @@ constexpr wide_integer wide_integer::operator++(int) } template -constexpr wide_integer & wide_integer::operator--() noexcept(is_same::value) +constexpr integer & integer::operator--() noexcept(std::is_same_v) { *this = _impl::operator_minus(*this, 1); return *this; } template -constexpr wide_integer wide_integer::operator--(int) noexcept(is_same::value) +constexpr integer integer::operator--(int) noexcept(std::is_same_v) { auto tmp = *this; *this = _impl::operator_minus(*this, 1); @@ -1004,14 +1009,14 @@ constexpr wide_integer wide_integer::operator--(int) } template -constexpr wide_integer::operator bool() const noexcept +constexpr integer::operator bool() const noexcept { return !_impl::operator_eq(*this, 0); } template template -constexpr wide_integer::operator T() const noexcept +constexpr integer::operator T() const noexcept { static_assert(std::numeric_limits::is_integer, ""); T res = 0; @@ -1023,12 +1028,12 @@ constexpr wide_integer::operator T() const noexcept } template -constexpr wide_integer::operator long double() const noexcept +constexpr integer::operator long double() const noexcept { if (_impl::operator_eq(*this, 0)) return 0; - wide_integer tmp = *this; + integer tmp = *this; if (_impl::is_negative(*this)) tmp = -tmp; @@ -1048,42 +1053,45 @@ constexpr wide_integer::operator long double() const noexcept } template -constexpr wide_integer::operator double() const noexcept +constexpr integer::operator double() const noexcept { return static_cast(*this); } template -constexpr wide_integer::operator float() const noexcept +constexpr integer::operator float() const noexcept { return static_cast(*this); } // Unary operators template -constexpr wide_integer operator~(const wide_integer & lhs) noexcept +constexpr integer operator~(const integer & lhs) noexcept { - return wide_integer::_impl::operator_unary_tilda(lhs); + return integer::_impl::operator_unary_tilda(lhs); } template -constexpr wide_integer operator-(const wide_integer & lhs) noexcept(is_same::value) +constexpr integer operator-(const integer & lhs) noexcept(std::is_same_v) { - return wide_integer::_impl::operator_unary_minus(lhs); + return integer::_impl::operator_unary_minus(lhs); } template -constexpr wide_integer operator+(const wide_integer & lhs) noexcept(is_same::value) +constexpr integer operator+(const integer & lhs) noexcept(std::is_same_v) { return lhs; } +#define CT(x) \ + std::common_type_t, std::decay_t> { x } + // Binary operators template -std::common_type_t, wide_integer> constexpr -operator*(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator*(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_star(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_star(lhs, rhs); } template @@ -1093,10 +1101,10 @@ std::common_type_t constexpr operator*(const Arithmetic } template -std::common_type_t, wide_integer> constexpr -operator/(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator/(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_slash(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_slash(lhs, rhs); } template std::common_type_t constexpr operator/(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1105,10 +1113,10 @@ std::common_type_t constexpr operator/(const Arithmetic } template -std::common_type_t, wide_integer> constexpr -operator+(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator+(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_plus(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_plus(lhs, rhs); } template std::common_type_t constexpr operator+(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1117,10 +1125,10 @@ std::common_type_t constexpr operator+(const Arithmetic } template -std::common_type_t, wide_integer> constexpr -operator-(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator-(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_minus(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_minus(lhs, rhs); } template std::common_type_t constexpr operator-(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1129,10 +1137,10 @@ std::common_type_t constexpr operator-(const Arithmetic } template -std::common_type_t, wide_integer> constexpr -operator%(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator%(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_percent(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_percent(lhs, rhs); } template std::common_type_t constexpr operator%(const Integral & lhs, const Integral2 & rhs) @@ -1141,10 +1149,10 @@ std::common_type_t constexpr operator%(const Integral & lhs } template -std::common_type_t, wide_integer> constexpr -operator&(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator&(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_amp(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_amp(lhs, rhs); } template std::common_type_t constexpr operator&(const Integral & lhs, const Integral2 & rhs) @@ -1153,10 +1161,10 @@ std::common_type_t constexpr operator&(const Integral & lhs } template -std::common_type_t, wide_integer> constexpr -operator|(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator|(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_pipe(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_pipe(lhs, rhs); } template std::common_type_t constexpr operator|(const Integral & lhs, const Integral2 & rhs) @@ -1165,10 +1173,10 @@ std::common_type_t constexpr operator|(const Integral & lhs } template -std::common_type_t, wide_integer> constexpr -operator^(const wide_integer & lhs, const wide_integer & rhs) +std::common_type_t, integer> constexpr +operator^(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_circumflex(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_circumflex(lhs, rhs); } template std::common_type_t constexpr operator^(const Integral & lhs, const Integral2 & rhs) @@ -1177,20 +1185,20 @@ std::common_type_t constexpr operator^(const Integral & lhs } template -constexpr wide_integer operator<<(const wide_integer & lhs, int n) noexcept +constexpr integer operator<<(const integer & lhs, int n) noexcept { - return wide_integer::_impl::shift_left(lhs, n); + return integer::_impl::shift_left(lhs, n); } template -constexpr wide_integer operator>>(const wide_integer & lhs, int n) noexcept +constexpr integer operator>>(const integer & lhs, int n) noexcept { - return wide_integer::_impl::shift_right(lhs, n); + return integer::_impl::shift_right(lhs, n); } template -constexpr bool operator<(const wide_integer & lhs, const wide_integer & rhs) +constexpr bool operator<(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_less(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_less(lhs, rhs); } template constexpr bool operator<(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1199,9 +1207,9 @@ constexpr bool operator<(const Arithmetic & lhs, const Arithmetic2 & rhs) } template -constexpr bool operator>(const wide_integer & lhs, const wide_integer & rhs) +constexpr bool operator>(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_more(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_more(lhs, rhs); } template constexpr bool operator>(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1210,10 +1218,10 @@ constexpr bool operator>(const Arithmetic & lhs, const Arithmetic2 & rhs) } template -constexpr bool operator<=(const wide_integer & lhs, const wide_integer & rhs) +constexpr bool operator<=(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_less(lhs, rhs) - || std::common_type_t, wide_integer>::_impl::operator_eq(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_less(lhs, rhs) + || std::common_type_t, integer>::_impl::operator_eq(lhs, rhs); } template constexpr bool operator<=(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1222,10 +1230,10 @@ constexpr bool operator<=(const Arithmetic & lhs, const Arithmetic2 & rhs) } template -constexpr bool operator>=(const wide_integer & lhs, const wide_integer & rhs) +constexpr bool operator>=(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_more(lhs, rhs) - || std::common_type_t, wide_integer>::_impl::operator_eq(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_more(lhs, rhs) + || std::common_type_t, integer>::_impl::operator_eq(lhs, rhs); } template constexpr bool operator>=(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1234,9 +1242,9 @@ constexpr bool operator>=(const Arithmetic & lhs, const Arithmetic2 & rhs) } template -constexpr bool operator==(const wide_integer & lhs, const wide_integer & rhs) +constexpr bool operator==(const integer & lhs, const integer & rhs) { - return std::common_type_t, wide_integer>::_impl::operator_eq(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_eq(lhs, rhs); } template constexpr bool operator==(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1245,9 +1253,9 @@ constexpr bool operator==(const Arithmetic & lhs, const Arithmetic2 & rhs) } template -constexpr bool operator!=(const wide_integer & lhs, const wide_integer & rhs) +constexpr bool operator!=(const integer & lhs, const integer & rhs) { - return !std::common_type_t, wide_integer>::_impl::operator_eq(lhs, rhs); + return !std::common_type_t, integer>::_impl::operator_eq(lhs, rhs); } template constexpr bool operator!=(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1255,35 +1263,17 @@ constexpr bool operator!=(const Arithmetic & lhs, const Arithmetic2 & rhs) return CT(lhs) != CT(rhs); } -template -inline std::string to_string(const wide_integer & n) -{ - std::string res; - if (wide_integer::_impl::operator_eq(n, 0U)) - return "0"; +#undef CT - wide_integer t; - bool is_neg = wide_integer::_impl::is_negative(n); - if (is_neg) - t = wide_integer::_impl::operator_unary_minus(n); - else - t = n; - - while (!wide_integer::_impl::operator_eq(t, 0U)) - { - res.insert(res.begin(), '0' + char(wide_integer::_impl::operator_percent(t, 10U))); - t = wide_integer::_impl::operator_slash(t, 10U); - } - - if (is_neg) - res.insert(res.begin(), '-'); - return res; } -template -struct hash> +namespace std { - std::size_t operator()(const wide_integer & lhs) const + +template +struct hash> +{ + std::size_t operator()(const wide::integer & lhs) const { static_assert(Bits % (sizeof(size_t) * 8) == 0); @@ -1293,9 +1283,8 @@ struct hash> size_t res = 0; for (unsigned i = 0; i < count; ++i) res ^= ptr[i]; - return hash()(res); + return res; } }; -#undef CT } diff --git a/base/common/wide_integer_to_string.h b/base/common/wide_integer_to_string.h new file mode 100644 index 00000000000..9908ef4be7a --- /dev/null +++ b/base/common/wide_integer_to_string.h @@ -0,0 +1,35 @@ +#pragma once + +#include + +#include "wide_integer.h" + +namespace wide +{ + +template +inline std::string to_string(const integer & n) +{ + std::string res; + if (integer::_impl::operator_eq(n, 0U)) + return "0"; + + integer t; + bool is_neg = integer::_impl::is_negative(n); + if (is_neg) + t = integer::_impl::operator_unary_minus(n); + else + t = n; + + while (!integer::_impl::operator_eq(t, 0U)) + { + res.insert(res.begin(), '0' + char(integer::_impl::operator_percent(t, 10U))); + t = integer::_impl::operator_slash(t, 10U); + } + + if (is_neg) + res.insert(res.begin(), '-'); + return res; +} + +} diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 32443ed78c3..7c7e9c388a0 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -36,7 +36,15 @@ if (SANITIZE) endif () elseif (SANITIZE STREQUAL "thread") - set (TSAN_FLAGS "-fsanitize=thread -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt") + set (TSAN_FLAGS "-fsanitize=thread") + if (COMPILER_CLANG) + set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt") + else() + message (WARNING "TSAN suppressions was not passed to the compiler (since the compiler is not clang)") + message (WARNING "Use the following command to pass them manually:") + message (WARNING " export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"") + endif() + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}") diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 3317bb06043..ccbadb84f27 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -10,7 +10,7 @@ stage=${stage:-} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about -# it being undefined. Also read it as array so that we can pass an empty list +# it being undefined. Also read it as array so that we can pass an empty list # of additional variable to cmake properly, and it doesn't generate an extra # empty parameter. read -ra FASTTEST_CMAKE_FLAGS <<< "${FASTTEST_CMAKE_FLAGS:-}" @@ -127,6 +127,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ #ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 364e9994ab7..32ea74193b0 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -394,12 +394,24 @@ create table query_run_metrics_denorm engine File(TSV, 'analyze/query-run-metric order by test, query_index, metric_names, version, query_id ; +-- Filter out tests that don't have an even number of runs, to avoid breaking +-- the further calculations. This may happen if there was an error during the +-- test runs, e.g. the server died. It will be reported in test errors, so we +-- don't have to report it again. +create view broken_queries as + select test, query_index + from query_runs + group by test, query_index + having count(*) % 2 != 0 + ; + -- This is for statistical processing with eqmed.sql create table query_run_metrics_for_stats engine File( TSV, -- do not add header -- will parse with grep 'analyze/query-run-metrics-for-stats.tsv') as select test, query_index, 0 run, version, metric_values from query_run_metric_arrays + where (test, query_index) not in broken_queries order by test, query_index, run, version ; @@ -915,13 +927,15 @@ done function report_metrics { +build_log_column_definitions + rm -rf metrics ||: mkdir metrics clickhouse-local --query " create view right_async_metric_log as select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes, - 'event_date Date, event_time DateTime, name String, value Float64') + '$(cat right-async-metric-log.tsv.columns)') ; -- Use the right log as time reference because it may have higher precision. @@ -930,7 +944,7 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as select name metric, r.event_time - min_time event_time, l.value as left, r.value as right from right_async_metric_log r asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes, - 'event_date Date, event_time DateTime, name String, value Float64') l + '$(cat left-async-metric-log.tsv.columns)') l on l.name = r.name and r.event_time <= l.event_time order by metric, event_time ; diff --git a/docker/test/performance-comparison/eqmed.sql b/docker/test/performance-comparison/eqmed.sql index f7f8d6ac40d..139f0758798 100644 --- a/docker/test/performance-comparison/eqmed.sql +++ b/docker/test/performance-comparison/eqmed.sql @@ -8,7 +8,7 @@ select from ( -- quantiles of randomization distributions - select quantileExactForEach(0.999)( + select quantileExactForEach(0.99)( arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d ) threshold ---- uncomment to see what the distribution is really like @@ -33,7 +33,7 @@ from -- strip the query away before the join -- it might be several kB long; (select metrics, run, version from table) no_query, -- duplicate input measurements into many virtual runs - numbers(1, 100000) nn + numbers(1, 10000) nn -- for each virtual run, randomly reorder measurements order by virtual_run, rand() ) virtual_runs diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index e1476d9aeb4..05e89c9e44c 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -20,7 +20,7 @@ parser = argparse.ArgumentParser(description='Run performance test.') parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.") parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.") -parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') +parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 7)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.') diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 1003a6d0e1a..e9e2ac68c1e 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -372,7 +372,7 @@ if args.report == 'main': 'New, s', # 1 'Ratio of speedup (-) or slowdown (+)', # 2 'Relative difference (new − old) / old', # 3 - 'p < 0.001 threshold', # 4 + 'p < 0.01 threshold', # 4 # Failed # 5 'Test', # 6 '#', # 7 @@ -416,7 +416,7 @@ if args.report == 'main': 'Old, s', #0 'New, s', #1 'Relative difference (new - old)/old', #2 - 'p < 0.001 threshold', #3 + 'p < 0.01 threshold', #3 # Failed #4 'Test', #5 '#', #6 @@ -470,12 +470,13 @@ if args.report == 'main': text = tableStart('Test times') text += tableHeader(columns) - nominal_runs = 13 # FIXME pass this as an argument + nominal_runs = 7 # FIXME pass this as an argument total_runs = (nominal_runs + 1) * 2 # one prewarm run, two servers + allowed_average_run_time = allowed_single_run_time + 60 / total_runs; # some allowance for fill/create queries attrs = ['' for c in columns] for r in rows: anchor = f'{currentTableAnchor()}.{r[0]}' - if float(r[6]) > 1.5 * total_runs: + if float(r[6]) > allowed_average_run_time * total_runs: # FIXME should be 15s max -- investigate parallel_insert slow_average_tests += 1 attrs[6] = f'style="background: {color_bad}"' @@ -649,7 +650,7 @@ elif args.report == 'all-queries': 'New, s', #3 'Ratio of speedup (-) or slowdown (+)', #4 'Relative difference (new − old) / old', #5 - 'p < 0.001 threshold', #6 + 'p < 0.01 threshold', #6 'Test', #7 '#', #8 'Query', #9 diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 2ff15ca9c6a..4a9ad891883 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -24,6 +24,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/docker/test/stateless_unbundled/run.sh b/docker/test/stateless_unbundled/run.sh index 2ff15ca9c6a..4a9ad891883 100755 --- a/docker/test/stateless_unbundled/run.sh +++ b/docker/test/stateless_unbundled/run.sh @@ -24,6 +24,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 64317ee62fd..c3ccb18659b 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -57,6 +57,7 @@ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-se ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/decimals_dictionary.xml /etc/clickhouse-server/ +ln -s /usr/share/clickhouse-test/config/executable_dictionary.xml /etc/clickhouse-server/ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/ ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/ diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 9d3965b4a9c..bfe5b6218e4 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -10,42 +10,51 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: -| Format | Input | Output | -|-----------------------------------------------------------------|-------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | -| [VerticalRaw](#verticalraw) | ✗ | ✔ | -| [JSON](#json) | ✗ | ✔ | -| [JSONCompact](#jsoncompact) | ✗ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [Avro](#data-format-avro) | ✔ | ✔ | -| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | -| [Arrow](#data-format-arrow) | ✔ | ✔ | -| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✗ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✗ | +| Format | Input | Output | +|-----------------------------------------------------------------------------------------|-------|--------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [VerticalRaw](#verticalraw) | ✗ | ✔ | +| [JSON](#json) | ✗ | ✔ | +| [JSONString](#jsonstring) | ✗ | ✔ | +| [JSONCompact](#jsoncompact) | ✗ | ✔ | +| [JSONCompactString](#jsoncompactstring) | ✗ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ | +| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ | +| [JSONCompactStringEachRowWithNamesAndTypes](#jsoncompactstringeachrowwithnamesandtypes) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✗ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✗ | You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section. @@ -392,62 +401,41 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA "meta": [ { - "name": "SearchPhrase", + "name": "'hello'", "type": "String" }, { - "name": "c", + "name": "multiply(42, number)", "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" } ], "data": [ { - "SearchPhrase": "", - "c": "8267016" + "'hello'": "hello", + "multiply(42, number)": "0", + "range(5)": [0,1,2,3,4] }, { - "SearchPhrase": "bathroom interior design", - "c": "2166" + "'hello'": "hello", + "multiply(42, number)": "42", + "range(5)": [0,1,2,3,4] }, { - "SearchPhrase": "yandex", - "c": "1655" - }, - { - "SearchPhrase": "spring 2014 fashion", - "c": "1549" - }, - { - "SearchPhrase": "freeform photos", - "c": "1480" + "'hello'": "hello", + "multiply(42, number)": "84", + "range(5)": [0,1,2,3,4] } ], - "totals": - { - "SearchPhrase": "", - "c": "8873898" - }, + "rows": 3, - "extremes": - { - "min": - { - "SearchPhrase": "", - "c": "1480" - }, - "max": - { - "SearchPhrase": "", - "c": "8267016" - } - }, - - "rows": 5, - - "rows_before_limit_at_least": 141137 + "rows_before_limit_at_least": 3 } ``` @@ -468,63 +456,165 @@ ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `n See also the [JSONEachRow](#jsoneachrow) format. +## JSONString {#jsonstring} + +Differs from JSON only in that data fields are output in strings, not in typed json values. + +Example: + +```json +{ + "meta": + [ + { + "name": "'hello'", + "type": "String" + }, + { + "name": "multiply(42, number)", + "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" + } + ], + + "data": + [ + { + "'hello'": "hello", + "multiply(42, number)": "0", + "range(5)": "[0,1,2,3,4]" + }, + { + "'hello'": "hello", + "multiply(42, number)": "42", + "range(5)": "[0,1,2,3,4]" + }, + { + "'hello'": "hello", + "multiply(42, number)": "84", + "range(5)": "[0,1,2,3,4]" + } + ], + + "rows": 3, + + "rows_before_limit_at_least": 3 +} +``` + ## JSONCompact {#jsoncompact} +## JSONCompactString {#jsoncompactstring} Differs from JSON only in that data rows are output in arrays, not in objects. Example: ``` json +// JSONCompact { "meta": [ { - "name": "SearchPhrase", + "name": "'hello'", "type": "String" }, { - "name": "c", + "name": "multiply(42, number)", "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" } ], "data": [ - ["", "8267016"], - ["bathroom interior design", "2166"], - ["yandex", "1655"], - ["fashion trends spring 2014", "1549"], - ["freeform photo", "1480"] + ["hello", "0", [0,1,2,3,4]], + ["hello", "42", [0,1,2,3,4]], + ["hello", "84", [0,1,2,3,4]] ], - "totals": ["","8873898"], + "rows": 3, - "extremes": - { - "min": ["","1480"], - "max": ["","8267016"] - }, - - "rows": 5, - - "rows_before_limit_at_least": 141137 + "rows_before_limit_at_least": 3 } ``` -This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table). -See also the `JSONEachRow` format. +```json +// JSONCompactString +{ + "meta": + [ + { + "name": "'hello'", + "type": "String" + }, + { + "name": "multiply(42, number)", + "type": "UInt64" + }, + { + "name": "range(5)", + "type": "Array(UInt8)" + } + ], -## JSONEachRow {#jsoneachrow} + "data": + [ + ["hello", "0", "[0,1,2,3,4]"], + ["hello", "42", "[0,1,2,3,4]"], + ["hello", "84", "[0,1,2,3,4]"] + ], -When using this format, ClickHouse outputs rows as separated, newline-delimited JSON objects, but the data as a whole is not valid JSON. + "rows": 3, -``` json -{"SearchPhrase":"curtain designs","count()":"1064"} -{"SearchPhrase":"baku","count()":"1000"} -{"SearchPhrase":"","count()":"8267016"} + "rows_before_limit_at_least": 3 +} ``` -When inserting the data, you should provide a separate JSON object for each row. +## JSONEachRow {#jsoneachrow} +## JSONStringEachRow {#jsonstringeachrow} +## JSONCompactEachRow {#jsoncompacteachrow} +## JSONCompactStringEachRow {#jsoncompactstringeachrow} + +When using these formats, ClickHouse outputs rows as separated, newline-delimited JSON values, but the data as a whole is not valid JSON. + +``` json +{"some_int":42,"some_str":"hello","some_tuple":[1,"a"]} // JSONEachRow +[42,"hello",[1,"a"]] // JSONCompactEachRow +["42","hello","(2,'a')"] // JSONCompactStringsEachRow +``` + +When inserting the data, you should provide a separate JSON value for each row. + +## JSONEachRowWithProgress {#jsoneachrowwithprogress} +## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress} + +Differs from JSONEachRow/JSONStringEachRow in that ClickHouse will also yield progress information as JSON objects. + +```json +{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} +{"row":{"'hello'":"hello","multiply(42, number)":"42","range(5)":[0,1,2,3,4]}} +{"row":{"'hello'":"hello","multiply(42, number)":"84","range(5)":[0,1,2,3,4]}} +{"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}} +``` + +## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes} +## JSONCompactStringEachRowWithNamesAndTypes {#jsoncompactstringeachrowwithnamesandtypes} + +Differs from JSONCompactEachRow/JSONCompactStringEachRow in that the column names and types are written as the first two rows. + +```json +["'hello'", "multiply(42, number)", "range(5)"] +["String", "UInt64", "Array(UInt8)"] +["hello", "0", [0,1,2,3,4]] +["hello", "42", [0,1,2,3,4]] +["hello", "84", [0,1,2,3,4]] +``` ### Inserting Data {#inserting-data} diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 6b1d71e1ca6..75607cc30b0 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -6,6 +6,7 @@ Columns: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. - `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. @@ -16,18 +17,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬────value─┐ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pmuzzy │ 0 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.arenas.all.pdirty │ 4214 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.retained │ 17657856 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.mapped │ 71471104 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.resident │ 61538304 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.metadata │ 6199264 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.allocated │ 38074336 │ -│ 2020-06-22 │ 2020-06-22 06:57:30 │ jemalloc.epoch │ 2 │ -└────────────┴─────────────────────┴──────────────────────────────────────────┴──────────┘ +┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **See Also** diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index fb98a2b9e34..3e712e2962c 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -10,12 +10,16 @@ Columns: - `progress` (Float64) — The percentage of completed work from 0 to 1. - `num_parts` (UInt64) — The number of pieces to be merged. - `result_part_name` (String) — The name of the part that will be formed as the result of merging. -- `is_mutation` (UInt8) - 1 if this process is a part mutation. +- `is_mutation` (UInt8) — 1 if this process is a part mutation. - `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. - `total_size_marks` (UInt64) — The total number of marks in the merged parts. - `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. - `rows_read` (UInt64) — Number of rows read. - `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. - `rows_written` (UInt64) — Number of rows written. +- `memory_usage` (UInt64) — Memory consumption of the merge process. +- `thread_id` (UInt64) — Thread ID of the merge process. +- `merge_type` — The type of current merge. Empty if it's an mutation. +- `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. [Original article](https://clickhouse.tech/docs/en/operations/system_tables/merges) diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 9ccf61291d2..063fe81923b 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -23,28 +23,28 @@ SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical; ``` text Row 1: ────── -event_date: 2020-02-18 -event_time: 2020-02-18 07:15:33 -milliseconds: 554 -ProfileEvent_Query: 0 -ProfileEvent_SelectQuery: 0 -ProfileEvent_InsertQuery: 0 -ProfileEvent_FileOpen: 0 -ProfileEvent_Seek: 0 -ProfileEvent_ReadBufferFromFileDescriptorRead: 1 -ProfileEvent_ReadBufferFromFileDescriptorReadFailed: 0 -ProfileEvent_ReadBufferFromFileDescriptorReadBytes: 0 -ProfileEvent_WriteBufferFromFileDescriptorWrite: 1 -ProfileEvent_WriteBufferFromFileDescriptorWriteFailed: 0 -ProfileEvent_WriteBufferFromFileDescriptorWriteBytes: 56 +event_date: 2020-09-05 +event_time: 2020-09-05 16:22:33 +event_time_microseconds: 2020-09-05 16:22:33.196807 +milliseconds: 196 +ProfileEvent_Query: 0 +ProfileEvent_SelectQuery: 0 +ProfileEvent_InsertQuery: 0 +ProfileEvent_FailedQuery: 0 +ProfileEvent_FailedSelectQuery: 0 ... -CurrentMetric_Query: 0 -CurrentMetric_Merge: 0 -CurrentMetric_PartMutation: 0 -CurrentMetric_ReplicatedFetch: 0 -CurrentMetric_ReplicatedSend: 0 -CurrentMetric_ReplicatedChecks: 0 ... +CurrentMetric_Revision: 54439 +CurrentMetric_VersionInteger: 20009001 +CurrentMetric_RWLockWaitingReaders: 0 +CurrentMetric_RWLockWaitingWriters: 0 +CurrentMetric_RWLockActiveReaders: 0 +CurrentMetric_RWLockActiveWriters: 0 +CurrentMetric_GlobalThread: 74 +CurrentMetric_GlobalThreadActive: 26 +CurrentMetric_LocalThread: 0 +CurrentMetric_LocalThreadActive: 0 +CurrentMetric_DistributedFilesToInsert: 0 ``` **See also** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 05247b6db7d..1c059e9f97b 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -515,6 +515,29 @@ SELECT └────────────────┴────────────┘ ``` +## formatReadableQuantity(x) {#formatreadablequantityx} + +Accepts the number. Returns a rounded number with a suffix (thousand, million, billion, etc.) as a string. + +It is useful for reading big numbers by human. + +Example: + +``` sql +SELECT + arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number, + formatReadableQuantity(number) AS number_for_humans +``` + +``` text +┌─────────number─┬─number_for_humans─┐ +│ 1024 │ 1.02 thousand │ +│ 1234000 │ 1.23 million │ +│ 4567000000 │ 4.57 billion │ +│ 98765432101234 │ 98.77 trillion │ +└────────────────┴───────────────────┘ +``` + ## least(a, b) {#leasta-b} Returns the smallest value from a and b. diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 343f45135eb..f826b810d23 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -46,3 +46,25 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3 │ ([1,2],[-1,0]) │ Tuple(Array(UInt8), Array(Int64)) │ └────────────────┴───────────────────────────────────┘ ```` + +## mapPopulateSeries {#function-mappopulateseries} + +Syntax: `mapPopulateSeries((keys : Array(), values : Array()[, max : ])` + +Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with step size of one, +and corresponding values taken from `values` array. If the value is not specified for the key, then it uses default value in the resulting map. +For repeated keys only the first value (in order of appearing) gets associated with the key. + +The number of elements in `keys` and `values` must be the same for each row. + +Returns a tuple of two arrays: keys in sorted order, and values the corresponding keys. + +``` sql +select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type; +``` + +``` text +┌─res──────────────────────────┬─type──────────────────────────────┐ +│ ([1,2,3,4,5],[11,22,0,44,0]) │ Tuple(Array(UInt8), Array(UInt8)) │ +└──────────────────────────────┴───────────────────────────────────┘ +``` diff --git a/docs/es/operations/backup.md b/docs/es/operations/backup.md index f1e5b3d3e09..a6297070663 100644 --- a/docs/es/operations/backup.md +++ b/docs/es/operations/backup.md @@ -1,20 +1,18 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 49 toc_title: Copia de seguridad de datos --- # Copia de seguridad de datos {#data-backup} -Mientras [replicación](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [no puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas garantías no cubren todos los casos posibles y pueden eludirse. +Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse. Para mitigar eficazmente los posibles errores humanos, debe preparar cuidadosamente una estrategia para realizar copias de seguridad y restaurar sus datos **previamente**. -Cada empresa tiene diferentes recursos disponibles y requisitos comerciales, por lo que no existe una solución universal para las copias de seguridad y restauraciones de ClickHouse que se adapten a cada situación. Lo que funciona para un gigabyte de datos probablemente no funcionará para decenas de petabytes. Hay una variedad de posibles enfoques con sus propios pros y contras, que se discutirán a continuación. Es una buena idea utilizar varios enfoques en lugar de solo uno para compensar sus diversas deficiencias. +Cada empresa tiene diferentes recursos disponibles y requisitos comerciales, por lo que no existe una solución universal para las copias de seguridad y restauraciones de ClickHouse que se adapten a cada situación. Lo que funciona para un gigabyte de datos probablemente no funcionará para decenas de petabytes. Hay una variedad de posibles enfoques con sus propios pros y contras, que se discutirán a continuación. Es una buena idea utilizar varios enfoques en lugar de uno solo para compensar sus diversas deficiencias. !!! note "Nota" - Tenga en cuenta que si realizó una copia de seguridad de algo y nunca intentó restaurarlo, es probable que la restauración no funcione correctamente cuando realmente la necesite (o al menos tomará más tiempo de lo que las empresas pueden tolerar). Por lo tanto, cualquiera que sea el enfoque de copia de seguridad que elija, asegúrese de automatizar el proceso de restauración también y practicarlo en un clúster de ClickHouse de repuesto regularmente. + Tenga en cuenta que si realizó una copia de seguridad de algo y nunca intentó restaurarlo, es probable que la restauración no funcione correctamente cuando realmente la necesite (o al menos tomará más tiempo de lo que las empresas pueden tolerar). Por lo tanto, cualquiera que sea el enfoque de copia de seguridad que elija, asegúrese de automatizar el proceso de restauración también y ponerlo en practica en un clúster de ClickHouse de repuesto regularmente. ## Duplicar datos de origen en otro lugar {#duplicating-source-data-somewhere-else} @@ -32,7 +30,7 @@ Para volúmenes de datos más pequeños, un simple `INSERT INTO ... SELECT ...` ## Manipulaciones con piezas {#manipulations-with-parts} -ClickHouse permite usar el `ALTER TABLE ... FREEZE PARTITION ...` consulta para crear una copia local de particiones de tabla. Esto se implementa utilizando enlaces duros al `/var/lib/clickhouse/shadow/` carpeta, por lo que generalmente no consume espacio adicional en disco para datos antiguos. Las copias creadas de archivos no son manejadas por el servidor ClickHouse, por lo que puede dejarlas allí: tendrá una copia de seguridad simple que no requiere ningún sistema externo adicional, pero seguirá siendo propenso a problemas de hardware. Por esta razón, es mejor copiarlos de forma remota en otra ubicación y luego eliminar las copias locales. Los sistemas de archivos distribuidos y los almacenes de objetos siguen siendo una buena opción para esto, pero los servidores de archivos conectados normales con una capacidad lo suficientemente grande podrían funcionar también (en este caso, la transferencia ocurrirá a través del sistema de archivos de red o tal vez [rsync](https://en.wikipedia.org/wiki/Rsync)). +ClickHouse permite usar la consulta `ALTER TABLE ... FREEZE PARTITION ...` para crear una copia local de particiones de tabla. Esto se implementa utilizando enlaces duros a la carpeta `/var/lib/clickhouse/shadow/`, por lo que generalmente no consume espacio adicional en disco para datos antiguos. Las copias creadas de archivos no son manejadas por el servidor ClickHouse, por lo que puede dejarlas allí: tendrá una copia de seguridad simple que no requiere ningún sistema externo adicional, pero seguirá siendo propenso a problemas de hardware. Por esta razón, es mejor copiarlos de forma remota en otra ubicación y luego eliminar las copias locales. Los sistemas de archivos distribuidos y los almacenes de objetos siguen siendo una buena opción para esto, pero los servidores de archivos conectados normales con una capacidad lo suficientemente grande podrían funcionar también (en este caso, la transferencia ocurrirá a través del sistema de archivos de red o tal vez [rsync](https://en.wikipedia.org/wiki/Rsync)). Para obtener más información sobre las consultas relacionadas con las manipulaciones de particiones, consulte [Documentación de ALTER](../sql-reference/statements/alter.md#alter_manipulations-with-partitions). diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 054f75e8da8..04bca115974 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -28,6 +28,8 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT | [PrettySpace](#prettyspace) | ✗ | ✔ | | [Protobuf](#protobuf) | ✔ | ✔ | | [Parquet](#data-format-parquet) | ✔ | ✔ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | | [ORC](#data-format-orc) | ✔ | ✗ | | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | @@ -947,6 +949,12 @@ ClickHouse пишет и читает сообщения `Protocol Buffers` в ## Avro {#data-format-avro} +[Apache Avro](https://avro.apache.org/) — это ориентированный на строки фреймворк для сериализации данных. Разработан в рамках проекта Apache Hadoop. + +В ClickHouse формат Avro поддерживает чтение и запись [файлов данных Avro](https://avro.apache.org/docs/current/spec.html#Object+Container+Files). + +[Логические типы Avro](https://avro.apache.org/docs/current/spec.html#Logical+Types) + ## AvroConfluent {#data-format-avro-confluent} Для формата `AvroConfluent` ClickHouse поддерживает декодирование сообщений `Avro` с одним объектом. Такие сообщения используются с [Kafka] (http://kafka.apache.org/) и реестром схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html). @@ -996,7 +1004,7 @@ SELECT * FROM topic1_stream; ## Parquet {#data-format-parquet} -[Apache Parquet](http://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи. +[Apache Parquet](https://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи. ### Соответствие типов данных {#sootvetstvie-tipov-dannykh} @@ -1042,6 +1050,16 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../engines/table-engines/integrations/hdfs.md). +## Arrow {data-format-arrow} + +[Apache Arrow](https://arrow.apache.org/) поставляется с двумя встроенными поколоночнами форматами хранения. ClickHouse поддерживает операции чтения и записи для этих форматов. + +`Arrow` — это Apache Arrow's "file mode" формат. Он предназначен для произвольного доступа в памяти. + +## ArrowStream {data-format-arrow-stream} + +`ArrowStream` — это Apache Arrow's "stream mode" формат. Он предназначен для обработки потоков в памяти. + ## ORC {#data-format-orc} [Apache ORC](https://orc.apache.org/) - это column-oriented формат данных, распространённый в экосистеме Hadoop. Вы можете только вставлять данные этого формата в ClickHouse. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 468e15e7d57..7b9dacf21cd 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -508,6 +508,29 @@ SELECT └────────────────┴────────────┘ ``` +## formatReadableQuantity(x) {#formatreadablequantityx} + +Принимает число. Возвращает округленное число с суффиксом (thousand, million, billion и т.д.) в виде строки. + +Облегчает визуальное восприятие больших чисел живым человеком. + +Пример: + +``` sql +SELECT + arrayJoin([1024, 1234 * 1000, (4567 * 1000) * 1000, 98765432101234]) AS number, + formatReadableQuantity(number) AS number_for_humans +``` + +``` text +┌─────────number─┬─number_for_humans─┐ +│ 1024 │ 1.02 thousand │ +│ 1234000 │ 1.23 million │ +│ 4567000000 │ 4.57 billion │ +│ 98765432101234 │ 98.77 trillion │ +└────────────────┴───────────────────┘ +``` + ## least(a, b) {#leasta-b} Возвращает наименьшее значение из a и b. diff --git a/docs/ru/sql-reference/functions/random-functions.md b/docs/ru/sql-reference/functions/random-functions.md index b425505b69d..4aaaef5cb5d 100644 --- a/docs/ru/sql-reference/functions/random-functions.md +++ b/docs/ru/sql-reference/functions/random-functions.md @@ -55,4 +55,50 @@ FROM numbers(3) └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` +# Случайные функции для работы со строками {#random-functions-for-working-with-strings} + +## randomString {#random-string} + +## randomFixedString {#random-fixed-string} + +## randomPrintableASCII {#random-printable-ascii} + +## randomStringUTF8 {#random-string-utf8} + +## fuzzBits {#fuzzbits} + +**Синтаксис** + +``` sql +fuzzBits([s], [prob]) +``` +Инвертирует каждый бит `s` с вероятностью `prob`. + +**Параметры** + +- `s` — `String` or `FixedString` +- `prob` — constant `Float32/64` + +**Возвращаемое значение** + +Измененная случайным образом строка с тем же типом, что и `s`. + +**Пример** + +Запрос: + +``` sql +SELECT fuzzBits(materialize('abacaba'), 0.1) +FROM numbers(3) +``` + +Результат: + +``` text +┌─fuzzBits(materialize('abacaba'), 0.1)─┐ +│ abaaaja │ +│ a*cjab+ │ +│ aeca2A │ +└───────────────────────────────────────┘ + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/random_functions/) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 41ded78055c..c7d74a9d881 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -513,4 +513,95 @@ SELECT parseDateTimeBestEffort('10 20:19') - [toDate](#todate) - [toDateTime](#todatetime) +## toUnixTimestamp64Milli +## toUnixTimestamp64Micro +## toUnixTimestamp64Nano + +Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды. +Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение - это временная метка в UTC, а не в часовом поясе `DateTime64`. + +**Синтаксис** + +``` sql +toUnixTimestamp64Milli(value) +``` + +**Параметры** + +- `value` — значение `DateTime64` с любой точностью. + +**Возвращаемое значение** + +- Значение `value`, преобразованное в тип данных `Int64`. + +**Примеры** + +Запрос: + +``` sql +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +SELECT toUnixTimestamp64Milli(dt64) +``` + +Ответ: + +``` text +┌─toUnixTimestamp64Milli(dt64)─┐ +│ 1568650812345 │ +└──────────────────────────────┘ +``` + +Запрос: + +``` sql +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +SELECT toUnixTimestamp64Nano(dt64) +``` + +Ответ: + +``` text +┌─toUnixTimestamp64Nano(dt64)─┐ +│ 1568650812345678000 │ +└─────────────────────────────┘ +``` + +## fromUnixTimestamp64Milli +## fromUnixTimestamp64Micro +## fromUnixTimestamp64Nano + +Преобразует значение `Int64` в значение `DateTime64` с фиксированной точностью менее одной секунды и дополнительным часовым поясом. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что входное значение обрабатывается как метка времени UTC, а не метка времени в заданном (или неявном) часовом поясе. + +**Синтаксис** + +``` sql +fromUnixTimestamp64Milli(value [, ti]) +``` + +**Параметры** + +- `value` — значение типы `Int64` с любой точностью. +- `timezone` — (не обязательный параметр) часовой пояс в формате `String` для возвращаемого результата. + +**Возвращаемое значение** + +- Значение `value`, преобразованное в тип данных `DateTime64`. + +**Пример** + +Запрос: + +``` sql +WITH CAST(1234567891011, 'Int64') AS i64 +SELECT fromUnixTimestamp64Milli(i64, 'UTC') +``` + +Ответ: + +``` text +┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ +│ 2009-02-13 23:31:31.011 │ +└──────────────────────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 36a7a3c51e2..caa3d04659e 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -5,13 +5,15 @@ toc_title: Представление # CREATE VIEW {#create-view} -``` sql -CREATE [MATERIALIZED] VIEW [IF NOT EXISTS] [db.]table_name [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... -``` - Создаёт представление. Представления бывают двух видов - обычные и материализованные (MATERIALIZED). -Обычные представления не хранят никаких данных, а всего лишь производят чтение из другой таблицы. То есть, обычное представление - не более чем сохранённый запрос. При чтении из представления, этот сохранённый запрос, используется в качестве подзапроса в секции FROM. +## Обычные представления {#normal} + +``` sql +CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ... +``` + +Normal views don’t store any data, they just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause. Для примера, пусть вы создали представление: @@ -31,15 +33,24 @@ SELECT a, b, c FROM view SELECT a, b, c FROM (SELECT ...) ``` -Материализованные (MATERIALIZED) представления хранят данные, преобразованные соответствующим запросом SELECT. +## Материализованные представления {#materialized} -При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать ENGINE - движок таблицы для хранения данных. +``` sql +CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... +``` + +Материализованные (MATERIALIZED) представления хранят данные, преобразованные соответствующим запросом [SELECT](../../../sql-reference/statements/select/index.md). + +При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать `ENGINE` - движок таблицы для хранения данных. При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE` Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление. -Если указано POPULATE, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него. +!!! important "Важно" + Материализованные представлени в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление. + +Если указано `POPULATE`, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него. Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. @@ -50,4 +61,4 @@ SELECT a, b, c FROM (SELECT ...) Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`. [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) - \ No newline at end of file + diff --git a/docs/ru/sql-reference/statements/drop.md b/docs/ru/sql-reference/statements/drop.md index 4bfd53b1d47..22e553cfdac 100644 --- a/docs/ru/sql-reference/statements/drop.md +++ b/docs/ru/sql-reference/statements/drop.md @@ -5,18 +5,35 @@ toc_title: DROP # DROP {#drop} -Запрос имеет два вида: `DROP DATABASE` и `DROP TABLE`. +Удаляет существующий объект. +Если указано `IF EXISTS` - не выдавать ошибку, если объекта не существует. + +## DROP DATABASE {#drop-database} ``` sql DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] ``` +Удаляет все таблицы в базе данных db, затем удаляет саму базу данных db. + + +## DROP TABLE {#drop-table} + ``` sql DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` Удаляет таблицу. -Если указано `IF EXISTS` - не выдавать ошибку, если таблица не существует или база данных не существует. + + +## DROP DICTIONARY {#drop-dictionary} + +``` sql +DROP DICTIONARY [IF EXISTS] [db.]name +``` + +Удаляет словарь. + ## DROP USER {#drop-user-statement} @@ -41,6 +58,7 @@ DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` + ## DROP ROW POLICY {#drop-row-policy-statement} Удаляет политику доступа к строкам. @@ -80,5 +98,13 @@ DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] ``` +## DROP VIEW {#drop-view} -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) \ No newline at end of file +``` sql +DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +Удаляет представление. Представления могут быть удалены и командой `DROP TABLE`, но команда `DROP VIEW` проверяет, что `[db.]name` является представлением. + + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/drop/) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 89220251cda..ae4a72ef62a 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -16,6 +16,7 @@ option (ENABLE_CLICKHOUSE_COMPRESSOR "Enable clickhouse-compressor" ${ENABLE_CLI option (ENABLE_CLICKHOUSE_COPIER "Enable clickhouse-copier" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_FORMAT "Enable clickhouse-format" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_OBFUSCATOR "Enable clickhouse-obfuscator" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_GIT_IMPORT "Enable clickhouse-git-import" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "Enable clickhouse-odbc-bridge" ${ENABLE_CLICKHOUSE_ALL}) if (CLICKHOUSE_SPLIT_BINARY) @@ -91,21 +92,22 @@ add_subdirectory (copier) add_subdirectory (format) add_subdirectory (obfuscator) add_subdirectory (install) +add_subdirectory (git-import) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) - target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) - target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) + add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) + target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) + target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif() if (CLICKHOUSE_SPLIT_BINARY) - set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-copier) + set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-git-import clickhouse-copier) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) @@ -149,6 +151,9 @@ else () if (ENABLE_CLICKHOUSE_OBFUSCATOR) clickhouse_target_link_split_lib(clickhouse obfuscator) endif () + if (ENABLE_CLICKHOUSE_GIT_IMPORT) + clickhouse_target_link_split_lib(clickhouse git-import) + endif () if (ENABLE_CLICKHOUSE_INSTALL) clickhouse_target_link_split_lib(clickhouse install) endif () @@ -199,6 +204,11 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () + if (ENABLE_CLICKHOUSE_GIT_IMPORT) + add_custom_target (clickhouse-git-import ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-git-import DEPENDS clickhouse) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import) + endif () if(ENABLE_CLICKHOUSE_ODBC_BRIDGE) list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) endif() diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index c9701950dc5..139a5b367e4 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -866,6 +866,8 @@ private: // will exit. The ping() would be the best match here, but it's // private, probably for a good reason that the protocol doesn't allow // pings at any possible moment. + // Don't forget to reset the default database which might have changed. + connection->setDefaultDatabase(""); connection->forceConnected(connection_parameters.timeouts); if (text.size() > 4 * 1024) @@ -900,74 +902,127 @@ private: return processMultiQuery(text); } - bool processMultiQuery(const String & text) + bool processMultiQuery(const String & all_queries_text) { const bool test_mode = config().has("testmode"); { /// disable logs if expects errors - TestHint test_hint(test_mode, text); + TestHint test_hint(test_mode, all_queries_text); if (test_hint.clientError() || test_hint.serverError()) processTextAsSingleQuery("SET send_logs_level = 'none'"); } /// Several queries separated by ';'. /// INSERT data is ended by the end of line, not ';'. + /// An exception is VALUES format where we also support semicolon in + /// addition to end of line. - const char * begin = text.data(); - const char * end = begin + text.size(); + const char * this_query_begin = all_queries_text.data(); + const char * all_queries_end = all_queries_text.data() + all_queries_text.size(); - while (begin < end) + while (this_query_begin < all_queries_end) { - const char * pos = begin; - ASTPtr orig_ast = parseQuery(pos, end, true); + // Use the token iterator to skip any whitespace, semicolons and + // comments at the beginning of the query. An example from regression + // tests: + // insert into table t values ('invalid'); -- { serverError 469 } + // select 1 + // Here the test hint comment gets parsed as a part of second query. + // We parse the `INSERT VALUES` up to the semicolon, and the rest + // looks like a two-line query: + // -- { serverError 469 } + // select 1 + // and we expect it to fail with error 469, but this hint is actually + // for the previous query. Test hints should go after the query, so + // we can fix this by skipping leading comments. Token iterator skips + // comments and whitespace by itself, so we only have to check for + // semicolons. + // The code block is to limit visibility of `tokens` because we have + // another such variable further down the code, and get warnings for + // that. + { + Tokens tokens(this_query_begin, all_queries_end); + IParser::Pos token_iterator(tokens, + context.getSettingsRef().max_parser_depth); + while (token_iterator->type == TokenType::Semicolon + && token_iterator.isValid()) + { + ++token_iterator; + } + this_query_begin = token_iterator->begin; + if (this_query_begin >= all_queries_end) + { + break; + } + } - if (!orig_ast) + // Try to parse the query. + const char * this_query_end = this_query_begin; + parsed_query = parseQuery(this_query_end, all_queries_end, true); + + if (!parsed_query) { if (ignore_error) { - Tokens tokens(begin, end); + Tokens tokens(this_query_begin, all_queries_end); IParser::Pos token_iterator(tokens, context.getSettingsRef().max_parser_depth); while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid()) ++token_iterator; - begin = token_iterator->end; + this_query_begin = token_iterator->end; continue; } return true; } - auto * insert = orig_ast->as(); - - if (insert && insert->data) + // INSERT queries may have the inserted data in the query text + // that follow the query itself, e.g. "insert into t format CSV 1;2". + // They need special handling. First of all, here we find where the + // inserted data ends. In multy-query mode, it is delimited by a + // newline. + // The VALUES format needs even more handling -- we also allow the + // data to be delimited by semicolon. This case is handled later by + // the format parser itself. + auto * insert_ast = parsed_query->as(); + if (insert_ast && insert_ast->data) { - pos = find_first_symbols<'\n'>(insert->data, end); - insert->end = pos; + this_query_end = find_first_symbols<'\n'>(insert_ast->data, all_queries_end); + insert_ast->end = this_query_end; + query_to_send = all_queries_text.substr( + this_query_begin - all_queries_text.data(), + insert_ast->data - this_query_begin); + } + else + { + query_to_send = all_queries_text.substr( + this_query_begin - all_queries_text.data(), + this_query_end - this_query_begin); } - String str = text.substr(begin - text.data(), pos - begin); + // full_query is the query + inline INSERT data. + full_query = all_queries_text.substr( + this_query_begin - all_queries_text.data(), + this_query_end - this_query_begin); - begin = pos; - while (isWhitespaceASCII(*begin) || *begin == ';') - ++begin; - - TestHint test_hint(test_mode, str); + // Look for the hint in the text of query + insert data, if any. + // e.g. insert into t format CSV 'a' -- { serverError 123 }. + TestHint test_hint(test_mode, full_query); expected_client_error = test_hint.clientError(); expected_server_error = test_hint.serverError(); try { - auto ast_to_process = orig_ast; - if (insert && insert->data) + processParsedSingleQuery(); + + if (insert_ast && insert_ast->data) { - ast_to_process = nullptr; - processTextAsSingleQuery(str); - } - else - { - parsed_query = ast_to_process; - full_query = str; - query_to_send = str; - processParsedSingleQuery(); + // For VALUES format: use the end of inline data as reported + // by the format parser (it is saved in sendData()). This + // allows us to handle queries like: + // insert into t values (1); select 1 + //, where the inline data is delimited by semicolon and not + // by a newline. + this_query_end = parsed_query->as()->end; } } catch (...) @@ -975,7 +1030,7 @@ private: last_exception_received_from_server = std::make_unique(getCurrentExceptionMessage(true), getCurrentExceptionCode()); actual_client_error = last_exception_received_from_server->code(); if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error)) - std::cerr << "Error on processing query: " << str << std::endl << last_exception_received_from_server->message(); + std::cerr << "Error on processing query: " << full_query << std::endl << last_exception_received_from_server->message(); received_exception_from_server = true; } @@ -989,6 +1044,8 @@ private: else return false; } + + this_query_begin = this_query_end; } return true; @@ -1103,7 +1160,9 @@ private: { last_exception_received_from_server = std::make_unique(getCurrentExceptionMessage(true), getCurrentExceptionCode()); received_exception_from_server = true; - std::cerr << "Error on processing query: " << ast_to_process->formatForErrorMessage() << std::endl << last_exception_received_from_server->message(); + fmt::print(stderr, "Error on processing query '{}': {}\n", + ast_to_process->formatForErrorMessage(), + last_exception_received_from_server->message()); } if (!connection->isConnected()) @@ -1411,7 +1470,7 @@ private: void sendData(Block & sample, const ColumnsDescription & columns_description) { /// If INSERT data must be sent. - const auto * parsed_insert_query = parsed_query->as(); + auto * parsed_insert_query = parsed_query->as(); if (!parsed_insert_query) return; @@ -1420,6 +1479,9 @@ private: /// Send data contained in the query. ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); sendDataFrom(data_in, sample, columns_description); + // Remember where the data ended. We use this info later to determine + // where the next query begins. + parsed_insert_query->end = data_in.buffer().begin() + data_in.count(); } else if (!is_interactive) { diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index 11386aca60e..7cb5a6d883a 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -12,5 +12,6 @@ #cmakedefine01 ENABLE_CLICKHOUSE_COMPRESSOR #cmakedefine01 ENABLE_CLICKHOUSE_FORMAT #cmakedefine01 ENABLE_CLICKHOUSE_OBFUSCATOR +#cmakedefine01 ENABLE_CLICKHOUSE_GIT_IMPORT #cmakedefine01 ENABLE_CLICKHOUSE_INSTALL #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE diff --git a/programs/git-import/CMakeLists.txt b/programs/git-import/CMakeLists.txt new file mode 100644 index 00000000000..279bb35a272 --- /dev/null +++ b/programs/git-import/CMakeLists.txt @@ -0,0 +1,10 @@ +set (CLICKHOUSE_GIT_IMPORT_SOURCES git-import.cpp) + +set (CLICKHOUSE_GIT_IMPORT_LINK + PRIVATE + boost::program_options + dbms +) + +clickhouse_program_add(git-import) + diff --git a/programs/git-import/clickhouse-git-import.cpp b/programs/git-import/clickhouse-git-import.cpp new file mode 100644 index 00000000000..cfa06306604 --- /dev/null +++ b/programs/git-import/clickhouse-git-import.cpp @@ -0,0 +1,2 @@ +int mainEntryClickHouseGitImport(int argc, char ** argv); +int main(int argc_, char ** argv_) { return mainEntryClickHouseGitImport(argc_, argv_); } diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp new file mode 100644 index 00000000000..7cdd77b4b7c --- /dev/null +++ b/programs/git-import/git-import.cpp @@ -0,0 +1,1235 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +static constexpr auto documentation = R"( +A tool to extract information from Git repository for analytics. + +It dumps the data for the following tables: +- commits - commits with statistics; +- file_changes - files changed in every commit with the info about the change and statistics; +- line_changes - every changed line in every changed file in every commit with full info about the line and the information about previous change of this line. + +The largest and the most important table is "line_changes". + +Allows to answer questions like: +- list files with maximum number of authors; +- show me the oldest lines of code in the repository; +- show me the files with longest history; +- list favorite files for author; +- list largest files with lowest number of authors; +- at what weekday the code has highest chance to stay in repository; +- the distribution of code age across repository; +- files sorted by average code age; +- quickly show file with blame info (rough); +- commits and lines of code distribution by time; by weekday, by author; for specific subdirectories; +- show history for every subdirectory, file, line of file, the number of changes (lines and commits) across time; how the number of contributors was changed across time; +- list files with most modifications; +- list files that were rewritten most number of time or by most of authors; +- what is percentage of code removal by other authors, across authors; +- the matrix of authors that shows what authors tends to rewrite another authors code; +- what is the worst time to write code in sense that the code has highest chance to be rewritten; +- the average time before code will be rewritten and the median (half-life of code decay); +- comments/code percentage change in time / by author / by location; +- who tend to write more tests / cpp code / comments. + +The data is intended for analytical purposes. It can be imprecise by many reasons but it should be good enough for its purpose. + +The data is not intended to provide any conclusions for managers, it is especially counter-indicative for any kinds of "performance review". Instead you can spend multiple days looking at various interesting statistics. + +Run this tool inside your git repository. It will create .tsv files that can be loaded into ClickHouse (or into other DBMS if you dare). + +The tool can process large enough repositories in a reasonable time. +It has been tested on: +- ClickHouse: 31 seconds; 3 million rows; +- LLVM: 8 minues; 62 million rows; +- Linux - 12 minutes; 85 million rows; +- Chromium - 67 minutes; 343 million rows; +(the numbers as of Sep 2020) + + +Prepare the database by executing the following queries: + +DROP DATABASE IF EXISTS git; +CREATE DATABASE git; + +CREATE TABLE git.commits +( + hash String, + author LowCardinality(String), + time DateTime, + message String, + files_added UInt32, + files_deleted UInt32, + files_renamed UInt32, + files_modified UInt32, + lines_added UInt32, + lines_deleted UInt32, + hunks_added UInt32, + hunks_removed UInt32, + hunks_changed UInt32 +) ENGINE = MergeTree ORDER BY time; + +CREATE TABLE git.file_changes +( + change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6), + path LowCardinality(String), + old_path LowCardinality(String), + file_extension LowCardinality(String), + lines_added UInt32, + lines_deleted UInt32, + hunks_added UInt32, + hunks_removed UInt32, + hunks_changed UInt32, + + commit_hash String, + author LowCardinality(String), + time DateTime, + commit_message String, + commit_files_added UInt32, + commit_files_deleted UInt32, + commit_files_renamed UInt32, + commit_files_modified UInt32, + commit_lines_added UInt32, + commit_lines_deleted UInt32, + commit_hunks_added UInt32, + commit_hunks_removed UInt32, + commit_hunks_changed UInt32 +) ENGINE = MergeTree ORDER BY time; + +CREATE TABLE git.line_changes +( + sign Int8, + line_number_old UInt32, + line_number_new UInt32, + hunk_num UInt32, + hunk_start_line_number_old UInt32, + hunk_start_line_number_new UInt32, + hunk_lines_added UInt32, + hunk_lines_deleted UInt32, + hunk_context LowCardinality(String), + line LowCardinality(String), + indent UInt8, + line_type Enum('Empty' = 0, 'Comment' = 1, 'Punct' = 2, 'Code' = 3), + + prev_commit_hash String, + prev_author LowCardinality(String), + prev_time DateTime, + + file_change_type Enum('Add' = 1, 'Delete' = 2, 'Modify' = 3, 'Rename' = 4, 'Copy' = 5, 'Type' = 6), + path LowCardinality(String), + old_path LowCardinality(String), + file_extension LowCardinality(String), + file_lines_added UInt32, + file_lines_deleted UInt32, + file_hunks_added UInt32, + file_hunks_removed UInt32, + file_hunks_changed UInt32, + + commit_hash String, + author LowCardinality(String), + time DateTime, + commit_message String, + commit_files_added UInt32, + commit_files_deleted UInt32, + commit_files_renamed UInt32, + commit_files_modified UInt32, + commit_lines_added UInt32, + commit_lines_deleted UInt32, + commit_hunks_added UInt32, + commit_hunks_removed UInt32, + commit_hunks_changed UInt32 +) ENGINE = MergeTree ORDER BY time; + +Run the tool. + +Then insert the data with the following commands: + +clickhouse-client --query "INSERT INTO git.commits FORMAT TSV" < commits.tsv +clickhouse-client --query "INSERT INTO git.file_changes FORMAT TSV" < file_changes.tsv +clickhouse-client --query "INSERT INTO git.line_changes FORMAT TSV" < line_changes.tsv + +)"; + +namespace po = boost::program_options; + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + + +struct Commit +{ + std::string hash; + std::string author; + LocalDateTime time{}; + std::string message; + uint32_t files_added{}; + uint32_t files_deleted{}; + uint32_t files_renamed{}; + uint32_t files_modified{}; + uint32_t lines_added{}; + uint32_t lines_deleted{}; + uint32_t hunks_added{}; + uint32_t hunks_removed{}; + uint32_t hunks_changed{}; + + void writeTextWithoutNewline(WriteBuffer & out) const + { + writeText(hash, out); + writeChar('\t', out); + writeText(author, out); + writeChar('\t', out); + writeText(time, out); + writeChar('\t', out); + writeText(message, out); + writeChar('\t', out); + writeText(files_added, out); + writeChar('\t', out); + writeText(files_deleted, out); + writeChar('\t', out); + writeText(files_renamed, out); + writeChar('\t', out); + writeText(files_modified, out); + writeChar('\t', out); + writeText(lines_added, out); + writeChar('\t', out); + writeText(lines_deleted, out); + writeChar('\t', out); + writeText(hunks_added, out); + writeChar('\t', out); + writeText(hunks_removed, out); + writeChar('\t', out); + writeText(hunks_changed, out); + } +}; + + +enum class FileChangeType +{ + Add, + Delete, + Modify, + Rename, + Copy, + Type, +}; + +void writeText(FileChangeType type, WriteBuffer & out) +{ + switch (type) + { + case FileChangeType::Add: writeString("Add", out); break; + case FileChangeType::Delete: writeString("Delete", out); break; + case FileChangeType::Modify: writeString("Modify", out); break; + case FileChangeType::Rename: writeString("Rename", out); break; + case FileChangeType::Copy: writeString("Copy", out); break; + case FileChangeType::Type: writeString("Type", out); break; + } +} + +struct FileChange +{ + FileChangeType change_type{}; + std::string path; + std::string old_path; + std::string file_extension; + uint32_t lines_added{}; + uint32_t lines_deleted{}; + uint32_t hunks_added{}; + uint32_t hunks_removed{}; + uint32_t hunks_changed{}; + + void writeTextWithoutNewline(WriteBuffer & out) const + { + writeText(change_type, out); + writeChar('\t', out); + writeText(path, out); + writeChar('\t', out); + writeText(old_path, out); + writeChar('\t', out); + writeText(file_extension, out); + writeChar('\t', out); + writeText(lines_added, out); + writeChar('\t', out); + writeText(lines_deleted, out); + writeChar('\t', out); + writeText(hunks_added, out); + writeChar('\t', out); + writeText(hunks_removed, out); + writeChar('\t', out); + writeText(hunks_changed, out); + } +}; + + +enum class LineType +{ + Empty, + Comment, + Punct, + Code, +}; + +void writeText(LineType type, WriteBuffer & out) +{ + switch (type) + { + case LineType::Empty: writeString("Empty", out); break; + case LineType::Comment: writeString("Comment", out); break; + case LineType::Punct: writeString("Punct", out); break; + case LineType::Code: writeString("Code", out); break; + } +} + +struct LineChange +{ + int8_t sign{}; /// 1 if added, -1 if deleted + uint32_t line_number_old{}; + uint32_t line_number_new{}; + uint32_t hunk_num{}; /// ordinal number of hunk in diff, starting with 0 + uint32_t hunk_start_line_number_old{}; + uint32_t hunk_start_line_number_new{}; + uint32_t hunk_lines_added{}; + uint32_t hunk_lines_deleted{}; + std::string hunk_context; /// The context (like a line with function name) as it is calculated by git + std::string line; /// Line content without leading whitespaces + uint8_t indent{}; /// The number of leading whitespaces or tabs * 4 + LineType line_type{}; + /// Information from the history (blame). + std::string prev_commit_hash; + std::string prev_author; + LocalDateTime prev_time{}; + + /** Classify line to empty / code / comment / single punctuation char. + * Very rough and mostly suitable for our C++ style. + */ + void setLineInfo(std::string full_line) + { + uint32_t num_spaces = 0; + + const char * pos = full_line.data(); + const char * end = pos + full_line.size(); + + while (pos < end) + { + if (*pos == ' ') + ++num_spaces; + else if (*pos == '\t') + num_spaces += 4; + else + break; + ++pos; + } + + indent = std::max(255U, num_spaces); + line.assign(pos, end); + + if (pos == end) + { + line_type = LineType::Empty; + } + else if (pos + 1 < end + && ((pos[0] == '/' && (pos[1] == '/' || pos[1] == '*')) + || (pos[0] == '*' && pos[1] == ' ') /// This is not precise. + || (pos[0] == '#' && pos[1] == ' '))) + { + line_type = LineType::Comment; + } + else + { + while (pos < end) + { + if (isAlphaNumericASCII(*pos)) + { + line_type = LineType::Code; + break; + } + ++pos; + } + if (pos == end) + line_type = LineType::Punct; + } + } + + void writeTextWithoutNewline(WriteBuffer & out) const + { + writeText(sign, out); + writeChar('\t', out); + writeText(line_number_old, out); + writeChar('\t', out); + writeText(line_number_new, out); + writeChar('\t', out); + writeText(hunk_num, out); + writeChar('\t', out); + writeText(hunk_start_line_number_old, out); + writeChar('\t', out); + writeText(hunk_start_line_number_new, out); + writeChar('\t', out); + writeText(hunk_lines_added, out); + writeChar('\t', out); + writeText(hunk_lines_deleted, out); + writeChar('\t', out); + writeText(hunk_context, out); + writeChar('\t', out); + writeText(line, out); + writeChar('\t', out); + writeText(indent, out); + writeChar('\t', out); + writeText(line_type, out); + writeChar('\t', out); + writeText(prev_commit_hash, out); + writeChar('\t', out); + writeText(prev_author, out); + writeChar('\t', out); + writeText(prev_time, out); + } +}; + +using LineChanges = std::vector; + +struct FileDiff +{ + explicit FileDiff(FileChange file_change_) : file_change(file_change_) {} + + FileChange file_change; + LineChanges line_changes; +}; + +using CommitDiff = std::map; + + +/** Parsing helpers */ + +void skipUntilWhitespace(ReadBuffer & buf) +{ + while (!buf.eof()) + { + char * next_pos = find_first_symbols<'\t', '\n', ' '>(buf.position(), buf.buffer().end()); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == ' ') + return; + } +} + +void skipUntilNextLine(ReadBuffer & buf) +{ + while (!buf.eof()) + { + char * next_pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end()); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == '\n') + { + ++buf.position(); + return; + } + } +} + +void readStringUntilNextLine(std::string & s, ReadBuffer & buf) +{ + s.clear(); + while (!buf.eof()) + { + char * next_pos = find_first_symbols<'\n'>(buf.position(), buf.buffer().end()); + s.append(buf.position(), next_pos - buf.position()); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == '\n') + { + ++buf.position(); + return; + } + } +} + + +/** Writes the resulting tables to files that can be imported to ClickHouse. + */ +struct ResultWriter +{ + WriteBufferFromFile commits{"commits.tsv"}; + WriteBufferFromFile file_changes{"file_changes.tsv"}; + WriteBufferFromFile line_changes{"line_changes.tsv"}; + + void appendCommit(const Commit & commit, const CommitDiff & files) + { + /// commits table + { + auto & out = commits; + + commit.writeTextWithoutNewline(out); + writeChar('\n', out); + } + + for (const auto & elem : files) + { + const FileChange & file_change = elem.second.file_change; + + /// file_changes table + { + auto & out = file_changes; + + file_change.writeTextWithoutNewline(out); + writeChar('\t', out); + commit.writeTextWithoutNewline(out); + writeChar('\n', out); + } + + /// line_changes table + for (const auto & line_change : elem.second.line_changes) + { + auto & out = line_changes; + + line_change.writeTextWithoutNewline(out); + writeChar('\t', out); + file_change.writeTextWithoutNewline(out); + writeChar('\t', out); + commit.writeTextWithoutNewline(out); + writeChar('\n', out); + } + } + } +}; + + +/** See description in "main". + */ +struct Options +{ + bool skip_commits_without_parents = true; + bool skip_commits_with_duplicate_diffs = true; + size_t threads = 1; + std::optional skip_paths; + std::optional skip_commits_with_messages; + std::unordered_set skip_commits; + std::optional diff_size_limit; + std::string stop_after_commit; + + explicit Options(const po::variables_map & options) + { + skip_commits_without_parents = options["skip-commits-without-parents"].as(); + skip_commits_with_duplicate_diffs = options["skip-commits-with-duplicate-diffs"].as(); + threads = options["threads"].as(); + if (options.count("skip-paths")) + { + skip_paths.emplace(options["skip-paths"].as()); + } + if (options.count("skip-commits-with-messages")) + { + skip_commits_with_messages.emplace(options["skip-commits-with-messages"].as()); + } + if (options.count("skip-commit")) + { + auto vec = options["skip-commit"].as>(); + skip_commits.insert(vec.begin(), vec.end()); + } + if (options.count("diff-size-limit")) + { + diff_size_limit = options["diff-size-limit"].as(); + } + if (options.count("stop-after-commit")) + { + stop_after_commit = options["stop-after-commit"].as(); + } + } +}; + + +/** Rough snapshot of repository calculated by application of diffs. It's used to calculate blame info. + * Represented by a list of lines. For every line it contains information about commit that modified this line the last time. + * + * Note that there are many cases when this info may become incorrect. + * The first reason is that git history is non-linear but we form this snapshot by application of commit diffs in some order + * that cannot give us correct results even theoretically. + * The second reason is that we don't process merge commits. But merge commits may contain differences for conflict resolution. + * + * We expect that the information will be mostly correct for the purpose of analytics. + * So, it can provide the expected "blame" info for the most of the lines. + */ +struct FileBlame +{ + using Lines = std::list; + Lines lines; + + /// We walk through this list adding or removing lines. + Lines::iterator it; + size_t current_idx = 1; + + FileBlame() + { + it = lines.begin(); + } + + /// This is important when file was copied or renamed. + FileBlame & operator=(const FileBlame & rhs) + { + lines = rhs.lines; + it = lines.begin(); + current_idx = 1; + return *this; + } + + FileBlame(const FileBlame & rhs) + { + *this = rhs; + } + + /// Move iterator to requested line or stop at the end. + void walk(uint32_t num) + { + while (current_idx < num && it != lines.end()) + { + ++current_idx; + ++it; + } + while (current_idx > num) + { + --current_idx; + --it; + } + } + + const Commit * find(uint32_t num) + { + walk(num); + +// std::cerr << "current_idx: " << current_idx << ", num: " << num << "\n"; + + if (current_idx == num && it != lines.end()) + return &*it; + return {}; + } + + void addLine(uint32_t num, Commit commit) + { + walk(num); + + /// If the inserted line is over the end of file, we insert empty lines before it. + while (it == lines.end() && current_idx < num) + { + lines.emplace_back(); + ++current_idx; + } + + it = lines.insert(it, commit); + } + + void removeLine(uint32_t num) + { +// std::cerr << "Removing line " << num << ", current_idx: " << current_idx << "\n"; + + walk(num); + + if (current_idx == num && it != lines.end()) + it = lines.erase(it); + } +}; + +/// All files with their blame info. When file is renamed, we also rename it in snapshot. +using Snapshot = std::map; + + +/** Enrich the line changes data with the history info from the snapshot + * - the author, time and commit of the previous change to every found line (blame). + * And update the snapshot. + */ +void updateSnapshot(Snapshot & snapshot, const Commit & commit, CommitDiff & file_changes) +{ + /// Renames and copies. + for (auto & elem : file_changes) + { + auto & file = elem.second.file_change; + if (file.path != file.old_path) + snapshot[file.path] = snapshot[file.old_path]; + } + + for (auto & elem : file_changes) + { +// std::cerr << elem.first << "\n"; + + FileBlame & file_snapshot = snapshot[elem.first]; + std::unordered_map deleted_lines; + + /// Obtain blame info from previous state of the snapshot + + for (auto & line_change : elem.second.line_changes) + { + if (line_change.sign == -1) + { + if (const Commit * prev_commit = file_snapshot.find(line_change.line_number_old); + prev_commit && prev_commit->time <= commit.time) + { + line_change.prev_commit_hash = prev_commit->hash; + line_change.prev_author = prev_commit->author; + line_change.prev_time = prev_commit->time; + deleted_lines[line_change.line_number_old] = *prev_commit; + } + else + { + // std::cerr << "Did not find line " << line_change.line_number_old << " from file " << elem.first << ": " << line_change.line << "\n"; + } + } + else if (line_change.sign == 1) + { + uint32_t this_line_in_prev_commit = line_change.hunk_start_line_number_old + + (line_change.line_number_new - line_change.hunk_start_line_number_new); + + if (deleted_lines.count(this_line_in_prev_commit)) + { + const auto & prev_commit = deleted_lines[this_line_in_prev_commit]; + if (prev_commit.time <= commit.time) + { + line_change.prev_commit_hash = prev_commit.hash; + line_change.prev_author = prev_commit.author; + line_change.prev_time = prev_commit.time; + } + } + } + } + + /// Update the snapshot + + for (const auto & line_change : elem.second.line_changes) + { + if (line_change.sign == -1) + { + file_snapshot.removeLine(line_change.line_number_new); + } + else if (line_change.sign == 1) + { + file_snapshot.addLine(line_change.line_number_new, commit); + } + } + } +} + + +/** Deduplication of commits with identical diffs. + */ +using DiffHashes = std::unordered_set; + +UInt128 diffHash(const CommitDiff & file_changes) +{ + SipHash hasher; + + for (const auto & elem : file_changes) + { + hasher.update(elem.second.file_change.change_type); + hasher.update(elem.second.file_change.old_path.size()); + hasher.update(elem.second.file_change.old_path); + hasher.update(elem.second.file_change.path.size()); + hasher.update(elem.second.file_change.path); + + hasher.update(elem.second.line_changes.size()); + for (const auto & line_change : elem.second.line_changes) + { + hasher.update(line_change.sign); + hasher.update(line_change.line_number_old); + hasher.update(line_change.line_number_new); + hasher.update(line_change.indent); + hasher.update(line_change.line.size()); + hasher.update(line_change.line); + } + } + + UInt128 hash_of_diff; + hasher.get128(hash_of_diff.low, hash_of_diff.high); + + return hash_of_diff; +} + + +/** File changes in form + * :100644 100644 b90fe6bb94 3ffe4c380f M src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp + * :100644 100644 828dedf6b5 828dedf6b5 R100 dbms/src/Functions/GeoUtils.h dbms/src/Functions/PolygonUtils.h + * according to the output of 'git show --raw' + */ +void processFileChanges( + ReadBuffer & in, + const Options & options, + Commit & commit, + CommitDiff & file_changes) +{ + while (checkChar(':', in)) + { + FileChange file_change; + + /// We don't care about file mode and content hashes. + for (size_t i = 0; i < 4; ++i) + { + skipUntilWhitespace(in); + skipWhitespaceIfAny(in); + } + + char change_type; + readChar(change_type, in); + + /// For rename and copy there is a number called "score". We ignore it. + int score; + + switch (change_type) + { + case 'A': + file_change.change_type = FileChangeType::Add; + ++commit.files_added; + break; + case 'D': + file_change.change_type = FileChangeType::Delete; + ++commit.files_deleted; + break; + case 'M': + file_change.change_type = FileChangeType::Modify; + ++commit.files_modified; + break; + case 'R': + file_change.change_type = FileChangeType::Rename; + ++commit.files_renamed; + readText(score, in); + break; + case 'C': + file_change.change_type = FileChangeType::Copy; + readText(score, in); + break; + case 'T': + file_change.change_type = FileChangeType::Type; + break; + default: + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected file change type: {}", change_type); + } + + skipWhitespaceIfAny(in); + + if (change_type == 'R' || change_type == 'C') + { + readText(file_change.old_path, in); + skipWhitespaceIfAny(in); + readText(file_change.path, in); + } + else + { + readText(file_change.path, in); + } + + file_change.file_extension = std::filesystem::path(file_change.path).extension(); + /// It gives us extension in form of '.cpp'. There is a reason for it but we remove initial dot for simplicity. + if (!file_change.file_extension.empty() && file_change.file_extension.front() == '.') + file_change.file_extension = file_change.file_extension.substr(1, std::string::npos); + + assertChar('\n', in); + + if (!(options.skip_paths && re2::RE2::PartialMatch(file_change.path, *options.skip_paths))) + { + file_changes.emplace( + file_change.path, + FileDiff(file_change)); + } + } +} + + +/** Process the list of diffs for every file from the result of "git show". + * Caveats: + * - changes in binary files can be ignored; + * - if a line content begins with '+' or '-' it will be skipped + * it means that if you store diffs in repository and "git show" will display diff-of-diff for you, + * it won't be processed correctly; + * - we expect some specific format of the diff; but it may actually depend on git config; + * - non-ASCII file names are not processed correctly (they will not be found and will be ignored). + */ +void processDiffs( + ReadBuffer & in, + std::optional size_limit, + Commit & commit, + CommitDiff & file_changes) +{ + std::string old_file_path; + std::string new_file_path; + FileDiff * file_change_and_line_changes = nullptr; + LineChange line_change; + + /// Diffs for every file in form of + /// --- a/src/Storages/StorageReplicatedMergeTree.cpp + /// +++ b/src/Storages/StorageReplicatedMergeTree.cpp + /// @@ -1387,2 +1387 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) + /// - table_lock, entry.create_time, reserved_space, entry.deduplicate, + /// - entry.force_ttl); + /// + table_lock, entry.create_time, reserved_space, entry.deduplicate); + + size_t diff_size = 0; + while (!in.eof()) + { + if (checkString("@@ ", in)) + { + if (!file_change_and_line_changes) + { + auto file_name = new_file_path.empty() ? old_file_path : new_file_path; + auto it = file_changes.find(file_name); + if (file_changes.end() != it) + file_change_and_line_changes = &it->second; + } + + if (file_change_and_line_changes) + { + uint32_t old_lines = 1; + uint32_t new_lines = 1; + + assertChar('-', in); + readText(line_change.hunk_start_line_number_old, in); + if (checkChar(',', in)) + readText(old_lines, in); + + assertString(" +", in); + readText(line_change.hunk_start_line_number_new, in); + if (checkChar(',', in)) + readText(new_lines, in); + + /// This is needed to simplify the logic of updating snapshot: + /// When all lines are removed we can treat it as repeated removal of line with number 1. + if (line_change.hunk_start_line_number_new == 0) + line_change.hunk_start_line_number_new = 1; + + assertString(" @@", in); + if (checkChar(' ', in)) + readStringUntilNextLine(line_change.hunk_context, in); + else + assertChar('\n', in); + + line_change.hunk_lines_added = new_lines; + line_change.hunk_lines_deleted = old_lines; + + ++line_change.hunk_num; + line_change.line_number_old = line_change.hunk_start_line_number_old; + line_change.line_number_new = line_change.hunk_start_line_number_new; + + if (old_lines && new_lines) + { + ++commit.hunks_changed; + ++file_change_and_line_changes->file_change.hunks_changed; + } + else if (old_lines) + { + ++commit.hunks_removed; + ++file_change_and_line_changes->file_change.hunks_removed; + } + else if (new_lines) + { + ++commit.hunks_added; + ++file_change_and_line_changes->file_change.hunks_added; + } + } + } + else if (checkChar('-', in)) + { + if (checkString("-- ", in)) + { + if (checkString("a/", in)) + { + readStringUntilNextLine(old_file_path, in); + line_change = LineChange{}; + file_change_and_line_changes = nullptr; + } + else if (checkString("/dev/null", in)) + { + old_file_path.clear(); + assertChar('\n', in); + line_change = LineChange{}; + file_change_and_line_changes = nullptr; + } + else + skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity. + } + else + { + ++diff_size; + if (file_change_and_line_changes) + { + ++commit.lines_deleted; + ++file_change_and_line_changes->file_change.lines_deleted; + + line_change.sign = -1; + readStringUntilNextLine(line_change.line, in); + line_change.setLineInfo(line_change.line); + + file_change_and_line_changes->line_changes.push_back(line_change); + ++line_change.line_number_old; + } + } + } + else if (checkChar('+', in)) + { + if (checkString("++ ", in)) + { + if (checkString("b/", in)) + { + readStringUntilNextLine(new_file_path, in); + line_change = LineChange{}; + file_change_and_line_changes = nullptr; + } + else if (checkString("/dev/null", in)) + { + new_file_path.clear(); + assertChar('\n', in); + line_change = LineChange{}; + file_change_and_line_changes = nullptr; + } + else + skipUntilNextLine(in); /// Actually it can be the line in diff. Skip it for simplicity. + } + else + { + ++diff_size; + if (file_change_and_line_changes) + { + ++commit.lines_added; + ++file_change_and_line_changes->file_change.lines_added; + + line_change.sign = 1; + readStringUntilNextLine(line_change.line, in); + line_change.setLineInfo(line_change.line); + + file_change_and_line_changes->line_changes.push_back(line_change); + ++line_change.line_number_new; + } + } + } + else + { + /// Unknown lines are ignored. + skipUntilNextLine(in); + } + + if (size_limit && diff_size > *size_limit) + { + return; + } + } +} + + +/** Process the "git show" result for a single commit. Append the result to tables. + */ +void processCommit( + ReadBuffer & in, + const Options & options, + size_t commit_num, + size_t total_commits, + std::string hash, + Snapshot & snapshot, + DiffHashes & diff_hashes, + ResultWriter & result) +{ + Commit commit; + commit.hash = hash; + + time_t commit_time; + readText(commit_time, in); + commit.time = commit_time; + assertChar('\0', in); + readNullTerminated(commit.author, in); + std::string parent_hash; + readNullTerminated(parent_hash, in); + readNullTerminated(commit.message, in); + + if (options.skip_commits_with_messages && re2::RE2::PartialMatch(commit.message, *options.skip_commits_with_messages)) + return; + + std::string message_to_print = commit.message; + std::replace_if(message_to_print.begin(), message_to_print.end(), [](char c){ return std::iscntrl(c); }, ' '); + + std::cerr << fmt::format("{}% {} {} {}\n", + commit_num * 100 / total_commits, toString(commit.time), hash, message_to_print); + + if (options.skip_commits_without_parents && commit_num != 0 && parent_hash.empty()) + { + std::cerr << "Warning: skipping commit without parents\n"; + return; + } + + if (!in.eof()) + assertChar('\n', in); + + CommitDiff file_changes; + processFileChanges(in, options, commit, file_changes); + + if (!in.eof()) + { + assertChar('\n', in); + processDiffs(in, commit_num != 0 ? options.diff_size_limit : std::nullopt, commit, file_changes); + } + + /// Skip commits with too large diffs. + if (options.diff_size_limit && commit_num != 0 && commit.lines_added + commit.lines_deleted > *options.diff_size_limit) + return; + + /// Calculate hash of diff and skip duplicates + if (options.skip_commits_with_duplicate_diffs && !diff_hashes.insert(diffHash(file_changes)).second) + return; + + /// Update snapshot and blame info + updateSnapshot(snapshot, commit, file_changes); + + /// Write the result + result.appendCommit(commit, file_changes); +} + + +/** Runs child process and allows to read the result. + * Multiple processes can be run for parallel processing. + */ +auto gitShow(const std::string & hash) +{ + std::string command = fmt::format( + "git show --raw --pretty='format:%ct%x00%aN%x00%P%x00%s%x00' --patch --unified=0 {}", + hash); + + return ShellCommand::execute(command); +} + + +/** Obtain the list of commits and process them. + */ +void processLog(const Options & options) +{ + ResultWriter result; + + std::string command = "git log --reverse --no-merges --pretty=%H"; + fmt::print("{}\n", command); + auto git_log = ShellCommand::execute(command); + + /// Collect hashes in memory. This is inefficient but allows to display beautiful progress. + /// The number of commits is in order of single millions for the largest repositories, + /// so don't care about potential waste of ~100 MB of memory. + + std::vector hashes; + + auto & in = git_log->out; + while (!in.eof()) + { + std::string hash; + readString(hash, in); + assertChar('\n', in); + + if (!options.skip_commits.count(hash)) + hashes.emplace_back(std::move(hash)); + } + + size_t num_commits = hashes.size(); + fmt::print("Total {} commits to process.\n", num_commits); + + /// Will run multiple processes in parallel + size_t num_threads = options.threads; + if (num_threads == 0) + throw Exception("num-threads cannot be zero", ErrorCodes::INCORRECT_DATA); + + std::vector> show_commands(num_threads); + for (size_t i = 0; i < num_commits && i < num_threads; ++i) + show_commands[i] = gitShow(hashes[i]); + + Snapshot snapshot; + DiffHashes diff_hashes; + + for (size_t i = 0; i < num_commits; ++i) + { + processCommit(show_commands[i % num_threads]->out, options, i, num_commits, hashes[i], snapshot, diff_hashes, result); + + if (!options.stop_after_commit.empty() && hashes[i] == options.stop_after_commit) + break; + + if (i + num_threads < num_commits) + show_commands[i % num_threads] = gitShow(hashes[i + num_threads]); + } +} + + +} + +int mainEntryClickHouseGitImport(int argc, char ** argv) +try +{ + using namespace DB; + + po::options_description desc("Allowed options", getTerminalWidth()); + desc.add_options() + ("help,h", "produce help message") + ("skip-commits-without-parents", po::value()->default_value(true), + "Skip commits without parents (except the initial commit)." + " These commits are usually erroneous but they can make sense in very rare cases.") + ("skip-commits-with-duplicate-diffs", po::value()->default_value(true), + "Skip commits with duplicate diffs." + " These commits are usually results of cherry-pick or merge after rebase.") + ("skip-commit", po::value>(), + "Skip commit with specified hash. The option can be specified multiple times.") + ("skip-paths", po::value(), + "Skip paths that matches regular expression (re2 syntax).") + ("skip-commits-with-messages", po::value(), + "Skip commits whose messages matches regular expression (re2 syntax).") + ("diff-size-limit", po::value()->default_value(100000), + "Skip commits whose diff size (number of added + removed lines) is larger than specified threshold. Does not apply for initial commit.") + ("stop-after-commit", po::value(), + "Stop processing after specified commit hash.") + ("threads", po::value()->default_value(std::thread::hardware_concurrency()), + "Number of concurrent git subprocesses to spawn") + ; + + po::variables_map options; + po::store(boost::program_options::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << documentation << '\n' + << "Usage: " << argv[0] << '\n' + << desc << '\n' + << "\nExample:\n" + << "\nclickhouse git-import --skip-paths 'generated\\.cpp|^(contrib|docs?|website|libs/(libcityhash|liblz4|libdivide|libvectorclass|libdouble-conversion|libcpuid|libzstd|libfarmhash|libmetrohash|libpoco|libwidechar_width))/' --skip-commits-with-messages '^Merge branch '\n"; + return 1; + } + + processLog(Options(options)); + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; + throw; +} diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 7b7ab149447..bd60fbb63ba 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -205,6 +205,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) "clickhouse-benchmark", "clickhouse-copier", "clickhouse-obfuscator", + "clickhouse-git-import", "clickhouse-compressor", "clickhouse-format", "clickhouse-extract-from-config" diff --git a/programs/main.cpp b/programs/main.cpp index 3df5f9f683b..b91bd732f21 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -46,6 +46,9 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv); #if ENABLE_CLICKHOUSE_OBFUSCATOR int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_GIT_IMPORT +int mainEntryClickHouseGitImport(int argc, char ** argv); +#endif #if ENABLE_CLICKHOUSE_INSTALL int mainEntryClickHouseInstall(int argc, char ** argv); int mainEntryClickHouseStart(int argc, char ** argv); @@ -91,6 +94,9 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR {"obfuscator", mainEntryClickHouseObfuscator}, #endif +#if ENABLE_CLICKHOUSE_GIT_IMPORT + {"git-import", mainEntryClickHouseGitImport}, +#endif #if ENABLE_CLICKHOUSE_INSTALL {"install", mainEntryClickHouseInstall}, {"start", mainEntryClickHouseStart}, diff --git a/programs/server/config.xml b/programs/server/config.xml index af01e880dc2..3d7ebf0cd96 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -212,8 +212,17 @@ /var/lib/clickhouse/user_files/ - - /var/lib/clickhouse/access/ + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + @@ -256,9 +265,6 @@ --> - - users.xml - default diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 1fa26c85354..41137867213 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -181,6 +181,15 @@ void AccessControlManager::addUsersConfigStorage( const String & preprocessed_dir_, const zkutil::GetZooKeeper & get_zookeeper_function_) { + auto storages = getStoragesPtr(); + for (const auto & storage : *storages) + { + if (auto users_config_storage = typeid_cast>(storage)) + { + if (users_config_storage->getStoragePath() == users_config_path_) + return; + } + } auto check_setting_name_function = [this](const std::string_view & setting_name) { checkSettingNameIsAllowed(setting_name); }; auto new_storage = std::make_shared(storage_name_, check_setting_name_function); new_storage->load(users_config_path_, include_from_path_, preprocessed_dir_, get_zookeeper_function_); @@ -210,17 +219,36 @@ void AccessControlManager::startPeriodicReloadingUsersConfigs() void AccessControlManager::addDiskStorage(const String & directory_, bool readonly_) { - addStorage(std::make_shared(directory_, readonly_)); + addDiskStorage(DiskAccessStorage::STORAGE_TYPE, directory_, readonly_); } void AccessControlManager::addDiskStorage(const String & storage_name_, const String & directory_, bool readonly_) { + auto storages = getStoragesPtr(); + for (const auto & storage : *storages) + { + if (auto disk_storage = typeid_cast>(storage)) + { + if (disk_storage->isStoragePathEqual(directory_)) + { + if (readonly_) + disk_storage->setReadOnly(readonly_); + return; + } + } + } addStorage(std::make_shared(storage_name_, directory_, readonly_)); } void AccessControlManager::addMemoryStorage(const String & storage_name_) { + auto storages = getStoragesPtr(); + for (const auto & storage : *storages) + { + if (auto memory_storage = typeid_cast>(storage)) + return; + } addStorage(std::make_shared(storage_name_)); } diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index fc80859885d..9965e54df7e 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -218,6 +218,16 @@ namespace } + /// Converts a path to an absolute path and append it with a separator. + String makeDirectoryPathCanonical(const String & directory_path) + { + auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path); + if (canonical_directory_path.has_filename()) + canonical_directory_path += std::filesystem::path::preferred_separator; + return canonical_directory_path; + } + + /// Calculates the path to a file named .sql for saving an access entity. String getEntityFilePath(const String & directory_path, const UUID & id) { @@ -298,22 +308,17 @@ DiskAccessStorage::DiskAccessStorage(const String & directory_path_, bool readon { } - DiskAccessStorage::DiskAccessStorage(const String & storage_name_, const String & directory_path_, bool readonly_) : IAccessStorage(storage_name_) { - auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path_); - if (canonical_directory_path.has_filename()) - canonical_directory_path += std::filesystem::path::preferred_separator; + directory_path = makeDirectoryPathCanonical(directory_path_); + readonly = readonly_; std::error_code create_dir_error_code; - std::filesystem::create_directories(canonical_directory_path, create_dir_error_code); + std::filesystem::create_directories(directory_path, create_dir_error_code); - if (!std::filesystem::exists(canonical_directory_path) || !std::filesystem::is_directory(canonical_directory_path) || create_dir_error_code) - throw Exception("Couldn't create directory " + canonical_directory_path.string() + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST); - - directory_path = canonical_directory_path; - readonly = readonly_; + if (!std::filesystem::exists(directory_path) || !std::filesystem::is_directory(directory_path) || create_dir_error_code) + throw Exception("Couldn't create directory " + directory_path + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST); bool should_rebuild_lists = std::filesystem::exists(getNeedRebuildListsMarkFilePath(directory_path)); if (!should_rebuild_lists) @@ -337,6 +342,12 @@ DiskAccessStorage::~DiskAccessStorage() } +bool DiskAccessStorage::isStoragePathEqual(const String & directory_path_) const +{ + return getStoragePath() == makeDirectoryPathCanonical(directory_path_); +} + + void DiskAccessStorage::clear() { entries_by_id.clear(); @@ -426,33 +437,41 @@ bool DiskAccessStorage::writeLists() void DiskAccessStorage::scheduleWriteLists(EntityType type) { if (failed_to_write_lists) - return; + return; /// We don't try to write list files after the first fail. + /// The next restart of the server will invoke rebuilding of the list files. - bool already_scheduled = !types_of_lists_to_write.empty(); types_of_lists_to_write.insert(type); - if (already_scheduled) - return; + if (lists_writing_thread_is_waiting) + return; /// If the lists' writing thread is still waiting we can update `types_of_lists_to_write` easily, + /// without restarting that thread. + + if (lists_writing_thread.joinable()) + lists_writing_thread.join(); /// Create the 'need_rebuild_lists.mark' file. /// This file will be used later to find out if writing lists is successful or not. std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)}; - startListsWritingThread(); + lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this}; + lists_writing_thread_is_waiting = true; } -void DiskAccessStorage::startListsWritingThread() +void DiskAccessStorage::listsWritingThreadFunc() { - if (lists_writing_thread.joinable()) + std::unique_lock lock{mutex}; + { - if (!lists_writing_thread_exited) - return; - lists_writing_thread.detach(); + /// It's better not to write the lists files too often, that's why we need + /// the following timeout. + const auto timeout = std::chrono::minutes(1); + SCOPE_EXIT({ lists_writing_thread_is_waiting = false; }); + if (lists_writing_thread_should_exit.wait_for(lock, timeout) != std::cv_status::timeout) + return; /// The destructor requires us to exit. } - lists_writing_thread_exited = false; - lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this}; + writeLists(); } @@ -466,21 +485,6 @@ void DiskAccessStorage::stopListsWritingThread() } -void DiskAccessStorage::listsWritingThreadFunc() -{ - std::unique_lock lock{mutex}; - SCOPE_EXIT({ lists_writing_thread_exited = true; }); - - /// It's better not to write the lists files too often, that's why we need - /// the following timeout. - const auto timeout = std::chrono::minutes(1); - if (lists_writing_thread_should_exit.wait_for(lock, timeout) != std::cv_status::timeout) - return; /// The destructor requires us to exit. - - writeLists(); -} - - /// Reads and parses all the ".sql" files from a specified directory /// and then saves the files "users.list", "roles.list", etc. to the same directory. bool DiskAccessStorage::rebuildLists() diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 11eb1c3b1ad..f6bef078aba 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -18,7 +18,11 @@ public: ~DiskAccessStorage() override; const char * getStorageType() const override { return STORAGE_TYPE; } + String getStoragePath() const override { return directory_path; } + bool isStoragePathEqual(const String & directory_path_) const; + + void setReadOnly(bool readonly_) { readonly = readonly_; } bool isStorageReadOnly() const override { return readonly; } private: @@ -42,9 +46,8 @@ private: void scheduleWriteLists(EntityType type); bool rebuildLists(); - void startListsWritingThread(); - void stopListsWritingThread(); void listsWritingThreadFunc(); + void stopListsWritingThread(); void insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, Notifications & notifications); void removeNoLock(const UUID & id, Notifications & notifications); @@ -67,14 +70,14 @@ private: void prepareNotifications(const UUID & id, const Entry & entry, bool remove, Notifications & notifications) const; String directory_path; - bool readonly; + std::atomic readonly; std::unordered_map entries_by_id; std::unordered_map entries_by_name_and_type[static_cast(EntityType::MAX)]; boost::container::flat_set types_of_lists_to_write; bool failed_to_write_lists = false; /// Whether writing of the list files has been failed since the recent restart of the server. ThreadFromGlobalPool lists_writing_thread; /// List files are written in a separate thread. std::condition_variable lists_writing_thread_should_exit; /// Signals `lists_writing_thread` to exit. - std::atomic lists_writing_thread_exited = false; + bool lists_writing_thread_is_waiting = false; mutable std::list handlers_by_type[static_cast(EntityType::MAX)]; mutable std::mutex mutex; }; diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 1090de556a0..55ab67d6214 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -130,7 +131,7 @@ public: void insertFrom(const IColumn & src, size_t n) override { - data.push_back(static_cast(src).getData()[n]); + data.push_back(assert_cast(src).getData()[n]); } void insertData(const char * pos, size_t) override @@ -205,14 +206,14 @@ public: /// This method implemented in header because it could be possibly devirtualized. int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override { - return CompareHelper::compare(data[n], static_cast(rhs_).data[m], nan_direction_hint); + return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], nan_direction_hint); } void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override { - return this->template doCompareColumn(static_cast(rhs), rhs_row_num, row_indexes, + return this->template doCompareColumn(assert_cast(rhs), rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint); } diff --git a/src/Columns/ya.make b/src/Columns/ya.make index 78c0e1b992d..910c479c2a9 100644 --- a/src/Columns/ya.make +++ b/src/Columns/ya.make @@ -2,8 +2,6 @@ LIBRARY() ADDINCL( - contrib/libs/icu/common - contrib/libs/icu/i18n contrib/libs/pdqsort ) diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp index 7b5a896015b..a4981fa5be3 100644 --- a/src/Common/Macros.cpp +++ b/src/Common/Macros.cpp @@ -68,8 +68,14 @@ String Macros::expand(const String & s, res += database_name; else if (macro_name == "table" && !table_name.empty()) res += table_name; - else if (macro_name == "uuid" && uuid != UUIDHelpers::Nil) + else if (macro_name == "uuid") + { + if (uuid == UUIDHelpers::Nil) + throw Exception("Macro 'uuid' and empty arguments of ReplicatedMergeTree " + "are supported only for ON CLUSTER queries with Atomic database engine", + ErrorCodes::SYNTAX_ERROR); res += toString(uuid); + } else throw Exception("No macro '" + macro_name + "' in config while processing substitutions in '" + s + "' at '" diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 9d073cf8dd8..5d51fc9f301 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,10 @@ namespace DB } } +namespace ProfileEvents +{ + extern const Event QueryMemoryLimitExceeded; +} static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; @@ -104,6 +109,7 @@ void MemoryTracker::alloc(Int64 size) /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc auto untrack_lock = blocker.cancel(); // NOLINT + ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded); std::stringstream message; message << "Memory tracker"; if (const auto * description = description_ptr.load(std::memory_order_relaxed)) @@ -136,6 +142,7 @@ void MemoryTracker::alloc(Int64 size) /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc auto no_track = blocker.cancel(); // NOLINT + ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded); std::stringstream message; message << "Memory limit"; if (const auto * description = description_ptr.load(std::memory_order_relaxed)) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 475e073d253..486cb7e1a6e 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -233,6 +233,7 @@ M(S3WriteRequestsErrors, "Number of non-throttling errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \ M(S3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \ M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \ + M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ namespace ProfileEvents diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 53ab2301a0a..bbb8801f190 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -57,7 +57,16 @@ ShellCommand::~ShellCommand() LOG_WARNING(getLogger(), "Cannot kill shell command pid {} errno '{}'", pid, errnoToString(retcode)); } else if (!wait_called) - tryWait(); + { + try + { + tryWait(); + } + catch (...) + { + tryLogCurrentException(getLogger()); + } + } } void ShellCommand::logCommand(const char * filename, char * const argv[]) @@ -74,7 +83,8 @@ void ShellCommand::logCommand(const char * filename, char * const argv[]) LOG_TRACE(ShellCommand::getLogger(), "Will start shell command '{}' with arguments {}", filename, args.str()); } -std::unique_ptr ShellCommand::executeImpl(const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor) +std::unique_ptr ShellCommand::executeImpl( + const char * filename, char * const argv[], bool pipe_stdin_only, bool terminate_in_destructor) { logCommand(filename, argv); @@ -130,7 +140,8 @@ std::unique_ptr ShellCommand::executeImpl(const char * filename, c _exit(int(ReturnCodes::CANNOT_EXEC)); } - std::unique_ptr res(new ShellCommand(pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor)); + std::unique_ptr res(new ShellCommand( + pid, pipe_stdin.fds_rw[1], pipe_stdout.fds_rw[0], pipe_stderr.fds_rw[0], terminate_in_destructor)); LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid); @@ -143,7 +154,8 @@ std::unique_ptr ShellCommand::executeImpl(const char * filename, c } -std::unique_ptr ShellCommand::execute(const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor) +std::unique_ptr ShellCommand::execute( + const std::string & command, bool pipe_stdin_only, bool terminate_in_destructor) { /// Arguments in non-constant chunks of memory (as required for `execv`). /// Moreover, their copying must be done before calling `vfork`, so after `vfork` do a minimum of things. @@ -157,7 +169,8 @@ std::unique_ptr ShellCommand::execute(const std::string & command, } -std::unique_ptr ShellCommand::executeDirect(const std::string & path, const std::vector & arguments, bool terminate_in_destructor) +std::unique_ptr ShellCommand::executeDirect( + const std::string & path, const std::vector & arguments, bool terminate_in_destructor) { size_t argv_sum_size = path.size() + 1; for (const auto & arg : arguments) @@ -186,6 +199,10 @@ int ShellCommand::tryWait() { wait_called = true; + in.close(); + out.close(); + err.close(); + LOG_TRACE(getLogger(), "Will wait for shell command pid {}", pid); int status = 0; diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt index f6c232cdd22..8de9424e044 100644 --- a/src/Common/tests/CMakeLists.txt +++ b/src/Common/tests/CMakeLists.txt @@ -84,3 +84,6 @@ target_link_libraries (procfs_metrics_provider_perf PRIVATE clickhouse_common_io add_executable (average average.cpp) target_link_libraries (average PRIVATE clickhouse_common_io) + +add_executable (shell_command_inout shell_command_inout.cpp) +target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) diff --git a/src/Common/tests/shell_command_inout.cpp b/src/Common/tests/shell_command_inout.cpp new file mode 100644 index 00000000000..615700cd042 --- /dev/null +++ b/src/Common/tests/shell_command_inout.cpp @@ -0,0 +1,47 @@ +#include + +#include +#include + +#include +#include +#include + +/** This example shows how we can proxy stdin to ShellCommand and obtain stdout in streaming fashion. */ + +int main(int argc, char ** argv) +try +{ + using namespace DB; + + if (argc < 2) + { + std::cerr << "Usage: shell_command_inout 'command...' < in > out\n"; + return 1; + } + + auto command = ShellCommand::execute(argv[1]); + + ReadBufferFromFileDescriptor in(STDIN_FILENO); + WriteBufferFromFileDescriptor out(STDOUT_FILENO); + WriteBufferFromFileDescriptor err(STDERR_FILENO); + + /// Background thread sends data and foreground thread receives result. + + std::thread thread([&] + { + copyData(in, command->in); + command->in.close(); + }); + + copyData(command->out, out); + copyData(command->err, err); + + thread.join(); + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; + throw; +} diff --git a/src/Core/MySQL/IMySQLReadPacket.cpp b/src/Core/MySQL/IMySQLReadPacket.cpp index 8fc8855c8a4..5f6bbc7bceb 100644 --- a/src/Core/MySQL/IMySQLReadPacket.cpp +++ b/src/Core/MySQL/IMySQLReadPacket.cpp @@ -50,21 +50,22 @@ uint64_t readLengthEncodedNumber(ReadBuffer & buffer) uint64_t buf = 0; buffer.readStrict(c); auto cc = static_cast(c); - if (cc < 0xfc) + switch (cc) { - return cc; - } - else if (cc < 0xfd) - { - buffer.readStrict(reinterpret_cast(&buf), 2); - } - else if (cc < 0xfe) - { - buffer.readStrict(reinterpret_cast(&buf), 3); - } - else - { - buffer.readStrict(reinterpret_cast(&buf), 8); + /// NULL + case 0xfb: + break; + case 0xfc: + buffer.readStrict(reinterpret_cast(&buf), 2); + break; + case 0xfd: + buffer.readStrict(reinterpret_cast(&buf), 3); + break; + case 0xfe: + buffer.readStrict(reinterpret_cast(&buf), 8); + break; + default: + return cc; } return buf; } diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 42d077260f8..e7f113ba7af 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -171,7 +171,7 @@ namespace MySQLReplication /// Ignore MySQL 8.0 optional metadata fields. /// https://mysqlhighavailability.com/more-metadata-is-written-into-binary-log/ - payload.ignore(payload.available() - CHECKSUM_CRC32_SIGNATURE_LENGTH); + payload.ignoreAll(); } /// Types that do not used in the binlog event: @@ -221,6 +221,7 @@ namespace MySQLReplication } case MYSQL_TYPE_NEWDECIMAL: case MYSQL_TYPE_STRING: { + /// Big-Endian auto b0 = UInt16(meta[pos] << 8); auto b1 = UInt8(meta[pos + 1]); column_meta.emplace_back(UInt16(b0 + b1)); @@ -231,6 +232,7 @@ namespace MySQLReplication case MYSQL_TYPE_BIT: case MYSQL_TYPE_VARCHAR: case MYSQL_TYPE_VAR_STRING: { + /// Little-Endian auto b0 = UInt8(meta[pos]); auto b1 = UInt16(meta[pos + 1] << 8); column_meta.emplace_back(UInt16(b0 + b1)); @@ -911,7 +913,7 @@ namespace MySQLReplication break; } } - payload.tryIgnore(CHECKSUM_CRC32_SIGNATURE_LENGTH); + payload.ignoreAll(); } } diff --git a/src/Core/tests/mysql_protocol.cpp b/src/Core/tests/mysql_protocol.cpp index acae8603c40..6cad095fc85 100644 --- a/src/Core/tests/mysql_protocol.cpp +++ b/src/Core/tests/mysql_protocol.cpp @@ -283,6 +283,7 @@ int main(int argc, char ** argv) } { + /// mysql_protocol --host=172.17.0.3 --user=root --password=123 --db=sbtest try { boost::program_options::options_description desc("Allowed options"); diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 67acf89ef42..9c738da9f6a 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -308,16 +308,30 @@ ReturnType DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer const DataTypePtr & nested_data_type) { return safeDeserialize(column, *nested_data_type, - [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); }, + [&istr] + { + return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); + }, [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsTextQuoted(nested, istr, settings); }); } void DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - safeDeserialize(column, *nested_data_type, - [&istr] { return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); }, - [this, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); }); + deserializeWholeText(column, istr, settings, nested_data_type); +} + +template +ReturnType DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, + const DataTypePtr & nested_data_type) +{ + return safeDeserialize(column, *nested_data_type, + [&istr] + { + return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr) + || checkStringByFirstCharacterAndAssertTheRest("ᴺᵁᴸᴸ", istr); + }, + [&nested_data_type, &istr, &settings] (IColumn & nested) { nested_data_type->deserializeAsWholeText(nested, istr, settings); }); } @@ -544,6 +558,7 @@ DataTypePtr removeNullable(const DataTypePtr & type) } +template bool DataTypeNullable::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); template bool DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); template bool DataTypeNullable::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested); template bool DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 22d403da6c4..587eecdf32e 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -103,6 +103,8 @@ public: /// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false) /// If ReturnType is void, deserialize Nullable(T) template + static ReturnType deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); + template static ReturnType deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested); template static ReturnType deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested); diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp index 851ea351876..465a7cb912a 100644 --- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp @@ -195,6 +195,7 @@ void MaterializeMySQLSyncThread::synchronization(const String & mysql_version) } catch (...) { + client.disconnect(); tryLogCurrentException(log); getDatabase(database_name).setException(std::current_exception()); } @@ -206,6 +207,7 @@ void MaterializeMySQLSyncThread::stopSynchronization() { sync_quit = true; background_thread_pool->join(); + client.disconnect(); } } diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 918cf0732ab..cc250727261 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -1,12 +1,13 @@ #include "ExecutableDictionarySource.h" -#include -#include +#include #include #include #include #include #include +#include +#include #include #include #include @@ -16,6 +17,7 @@ #include "DictionaryStructure.h" #include "registerDictionaries.h" + namespace DB { static const UInt64 max_block_size = 8192; @@ -31,15 +33,23 @@ namespace /// Owns ShellCommand and calls wait for it. class ShellCommandOwningBlockInputStream : public OwningBlockInputStream { + private: + Poco::Logger * log; public: - ShellCommandOwningBlockInputStream(const BlockInputStreamPtr & impl, std::unique_ptr own_) - : OwningBlockInputStream(std::move(impl), std::move(own_)) + ShellCommandOwningBlockInputStream(Poco::Logger * log_, const BlockInputStreamPtr & impl, std::unique_ptr command_) + : OwningBlockInputStream(std::move(impl), std::move(command_)), log(log_) { } void readSuffix() override { OwningBlockInputStream::readSuffix(); + + std::string err; + readStringUntilEOF(err, own->err); + if (!err.empty()) + LOG_ERROR(log, "Having stderr: {}", err); + own->wait(); } }; @@ -80,7 +90,7 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll() LOG_TRACE(log, "loadAll {}", toString()); auto process = ShellCommand::execute(command); auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); - return std::make_shared(input_stream, std::move(process)); + return std::make_shared(log, input_stream, std::move(process)); } BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll() @@ -95,67 +105,73 @@ BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll() LOG_TRACE(log, "loadUpdatedAll {}", command_with_update_field); auto process = ShellCommand::execute(command_with_update_field); auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); - return std::make_shared(input_stream, std::move(process)); + return std::make_shared(log, input_stream, std::move(process)); } namespace { - /** A stream, that also runs and waits for background thread - * (that will feed data into pipe to be read from the other side of the pipe). + /** A stream, that runs child process and sends data to its stdin in background thread, + * and receives data from its stdout. */ class BlockInputStreamWithBackgroundThread final : public IBlockInputStream { public: BlockInputStreamWithBackgroundThread( - const BlockInputStreamPtr & stream_, std::unique_ptr && command_, std::packaged_task && task_) - : stream{stream_}, command{std::move(command_)}, task(std::move(task_)), thread([this] { - task(); - command->in.close(); - }) + const Context & context, + const std::string & format, + const Block & sample_block, + const std::string & command_str, + Poco::Logger * log_, + std::function && send_data_) + : log(log_), + command(ShellCommand::execute(command_str)), + send_data(std::move(send_data_)), + thread([this] { send_data(command->in); }) { - children.push_back(stream); + stream = context.getInputFormat(format, command->out, sample_block, max_block_size); } ~BlockInputStreamWithBackgroundThread() override { if (thread.joinable()) - { - try - { - readSuffix(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } + thread.join(); } - Block getHeader() const override { return stream->getHeader(); } + Block getHeader() const override + { + return stream->getHeader(); + } private: - Block readImpl() override { return stream->read(); } + Block readImpl() override + { + return stream->read(); + } + + void readPrefix() override + { + stream->readPrefix(); + } void readSuffix() override { - IBlockInputStream::readSuffix(); - if (!wait_called) - { - wait_called = true; - command->wait(); - } - thread.join(); - /// To rethrow an exception, if any. - task.get_future().get(); + stream->readSuffix(); + + std::string err; + readStringUntilEOF(err, command->err); + if (!err.empty()) + LOG_ERROR(log, "Having stderr: {}", err); + + command->wait(); } String getName() const override { return "WithBackgroundThread"; } + Poco::Logger * log; BlockInputStreamPtr stream; std::unique_ptr command; - std::packaged_task task; + std::function send_data; ThreadFromGlobalPool thread; - bool wait_called = false; }; } @@ -164,28 +180,29 @@ namespace BlockInputStreamPtr ExecutableDictionarySource::loadIds(const std::vector & ids) { LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size()); - auto process = ShellCommand::execute(command); - - auto output_stream = context.getOutputFormat(format, process->in, sample_block); - auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); return std::make_shared( - input_stream, std::move(process), std::packaged_task([output_stream, &ids]() mutable { formatIDs(output_stream, ids); })); + context, format, sample_block, command, log, + [&ids, this](WriteBufferFromFile & out) mutable + { + auto output_stream = context.getOutputFormat(format, out, sample_block); + formatIDs(output_stream, ids); + out.close(); + }); } BlockInputStreamPtr ExecutableDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) { LOG_TRACE(log, "loadKeys {} size = {}", toString(), requested_rows.size()); - auto process = ShellCommand::execute(command); - - auto output_stream = context.getOutputFormat(format, process->in, sample_block); - auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size); return std::make_shared( - input_stream, std::move(process), std::packaged_task([output_stream, key_columns, &requested_rows, this]() mutable + context, format, sample_block, command, log, + [key_columns, &requested_rows, this](WriteBufferFromFile & out) mutable { + auto output_stream = context.getOutputFormat(format, out, sample_block); formatKeys(dict_struct, output_stream, key_columns, requested_rows); - })); + out.close(); + }); } bool ExecutableDictionarySource::isModified() const diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a1065b2c452..522149d3cfd 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -324,13 +324,86 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm target = std::move(file_segmentation_engine); } +/// File Segmentation Engines for parallel reading + +void registerFileSegmentationEngineTabSeparated(FormatFactory & factory); +void registerFileSegmentationEngineCSV(FormatFactory & factory); +void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); +void registerFileSegmentationEngineRegexp(FormatFactory & factory); +void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); + +/// Formats for both input/output. + +void registerInputFormatNative(FormatFactory & factory); +void registerOutputFormatNative(FormatFactory & factory); + +void registerInputFormatProcessorNative(FormatFactory & factory); +void registerOutputFormatProcessorNative(FormatFactory & factory); +void registerInputFormatProcessorRowBinary(FormatFactory & factory); +void registerOutputFormatProcessorRowBinary(FormatFactory & factory); +void registerInputFormatProcessorTabSeparated(FormatFactory & factory); +void registerOutputFormatProcessorTabSeparated(FormatFactory & factory); +void registerInputFormatProcessorValues(FormatFactory & factory); +void registerOutputFormatProcessorValues(FormatFactory & factory); +void registerInputFormatProcessorCSV(FormatFactory & factory); +void registerOutputFormatProcessorCSV(FormatFactory & factory); +void registerInputFormatProcessorTSKV(FormatFactory & factory); +void registerOutputFormatProcessorTSKV(FormatFactory & factory); +void registerInputFormatProcessorJSONEachRow(FormatFactory & factory); +void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory); +void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory); +void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory); +void registerInputFormatProcessorProtobuf(FormatFactory & factory); +void registerOutputFormatProcessorProtobuf(FormatFactory & factory); +void registerInputFormatProcessorTemplate(FormatFactory & factory); +void registerOutputFormatProcessorTemplate(FormatFactory & factory); +void registerInputFormatProcessorMsgPack(FormatFactory & factory); +void registerOutputFormatProcessorMsgPack(FormatFactory & factory); +void registerInputFormatProcessorORC(FormatFactory & factory); +void registerOutputFormatProcessorORC(FormatFactory & factory); +void registerInputFormatProcessorParquet(FormatFactory & factory); +void registerOutputFormatProcessorParquet(FormatFactory & factory); +void registerInputFormatProcessorArrow(FormatFactory & factory); +void registerOutputFormatProcessorArrow(FormatFactory & factory); +void registerInputFormatProcessorAvro(FormatFactory & factory); +void registerOutputFormatProcessorAvro(FormatFactory & factory); + +/// Output only (presentational) formats. + +void registerOutputFormatNull(FormatFactory & factory); + +void registerOutputFormatProcessorPretty(FormatFactory & factory); +void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory); +void registerOutputFormatProcessorPrettySpace(FormatFactory & factory); +void registerOutputFormatProcessorVertical(FormatFactory & factory); +void registerOutputFormatProcessorJSON(FormatFactory & factory); +void registerOutputFormatProcessorJSONCompact(FormatFactory & factory); +void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory); +void registerOutputFormatProcessorXML(FormatFactory & factory); +void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory); +void registerOutputFormatProcessorNull(FormatFactory & factory); +void registerOutputFormatProcessorMySQLWire(FormatFactory & factory); +void registerOutputFormatProcessorMarkdown(FormatFactory & factory); +void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory); + +/// Input only formats. + +void registerInputFormatProcessorRegexp(FormatFactory & factory); +void registerInputFormatProcessorJSONAsString(FormatFactory & factory); +void registerInputFormatProcessorLineAsString(FormatFactory & factory); +void registerInputFormatProcessorCapnProto(FormatFactory & factory); + FormatFactory::FormatFactory() { + registerFileSegmentationEngineTabSeparated(*this); + registerFileSegmentationEngineCSV(*this); + registerFileSegmentationEngineJSONEachRow(*this); + registerFileSegmentationEngineRegexp(*this); + registerFileSegmentationEngineJSONAsString(*this); + registerInputFormatNative(*this); registerOutputFormatNative(*this); - registerOutputFormatProcessorJSONEachRowWithProgress(*this); - registerInputFormatProcessorNative(*this); registerOutputFormatProcessorNative(*this); registerInputFormatProcessorRowBinary(*this); @@ -349,8 +422,11 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorJSONCompactEachRow(*this); registerInputFormatProcessorProtobuf(*this); registerOutputFormatProcessorProtobuf(*this); + registerInputFormatProcessorTemplate(*this); + registerOutputFormatProcessorTemplate(*this); + registerInputFormatProcessorMsgPack(*this); + registerOutputFormatProcessorMsgPack(*this); #if !defined(ARCADIA_BUILD) - registerInputFormatProcessorCapnProto(*this); registerInputFormatProcessorORC(*this); registerOutputFormatProcessorORC(*this); registerInputFormatProcessorParquet(*this); @@ -360,18 +436,6 @@ FormatFactory::FormatFactory() registerInputFormatProcessorAvro(*this); registerOutputFormatProcessorAvro(*this); #endif - registerInputFormatProcessorTemplate(*this); - registerOutputFormatProcessorTemplate(*this); - registerInputFormatProcessorRegexp(*this); - registerInputFormatProcessorMsgPack(*this); - registerOutputFormatProcessorMsgPack(*this); - registerInputFormatProcessorJSONAsString(*this); - - registerFileSegmentationEngineTabSeparated(*this); - registerFileSegmentationEngineCSV(*this); - registerFileSegmentationEngineJSONEachRow(*this); - registerFileSegmentationEngineRegexp(*this); - registerFileSegmentationEngineJSONAsString(*this); registerOutputFormatNull(*this); @@ -381,12 +445,20 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorVertical(*this); registerOutputFormatProcessorJSON(*this); registerOutputFormatProcessorJSONCompact(*this); + registerOutputFormatProcessorJSONEachRowWithProgress(*this); registerOutputFormatProcessorXML(*this); registerOutputFormatProcessorODBCDriver2(*this); registerOutputFormatProcessorNull(*this); registerOutputFormatProcessorMySQLWire(*this); registerOutputFormatProcessorMarkdown(*this); registerOutputFormatProcessorPostgreSQLWire(*this); + + registerInputFormatProcessorRegexp(*this); + registerInputFormatProcessorJSONAsString(*this); + registerInputFormatProcessorLineAsString(*this); +#if !defined(ARCADIA_BUILD) + registerInputFormatProcessorCapnProto(*this); +#endif } FormatFactory & FormatFactory::instance() diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index f0d2b7826a0..54bff1eefc6 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -141,73 +141,4 @@ private: const Creators & getCreators(const String & name) const; }; -/// Formats for both input/output. - -void registerInputFormatNative(FormatFactory & factory); -void registerOutputFormatNative(FormatFactory & factory); - -void registerInputFormatProcessorNative(FormatFactory & factory); -void registerOutputFormatProcessorNative(FormatFactory & factory); -void registerInputFormatProcessorRowBinary(FormatFactory & factory); -void registerOutputFormatProcessorRowBinary(FormatFactory & factory); -void registerInputFormatProcessorTabSeparated(FormatFactory & factory); -void registerOutputFormatProcessorTabSeparated(FormatFactory & factory); -void registerInputFormatProcessorValues(FormatFactory & factory); -void registerOutputFormatProcessorValues(FormatFactory & factory); -void registerInputFormatProcessorCSV(FormatFactory & factory); -void registerOutputFormatProcessorCSV(FormatFactory & factory); -void registerInputFormatProcessorTSKV(FormatFactory & factory); -void registerOutputFormatProcessorTSKV(FormatFactory & factory); -void registerInputFormatProcessorJSONEachRow(FormatFactory & factory); -void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory); -void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory); -void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory); -void registerInputFormatProcessorParquet(FormatFactory & factory); -void registerOutputFormatProcessorParquet(FormatFactory & factory); -void registerInputFormatProcessorArrow(FormatFactory & factory); -void registerOutputFormatProcessorArrow(FormatFactory & factory); -void registerInputFormatProcessorProtobuf(FormatFactory & factory); -void registerOutputFormatProcessorProtobuf(FormatFactory & factory); -void registerInputFormatProcessorAvro(FormatFactory & factory); -void registerOutputFormatProcessorAvro(FormatFactory & factory); -void registerInputFormatProcessorTemplate(FormatFactory & factory); -void registerOutputFormatProcessorTemplate(FormatFactory & factory); -void registerInputFormatProcessorMsgPack(FormatFactory & factory); -void registerOutputFormatProcessorMsgPack(FormatFactory & factory); -void registerInputFormatProcessorORC(FormatFactory & factory); -void registerOutputFormatProcessorORC(FormatFactory & factory); - - -/// File Segmentation Engines for parallel reading - -void registerFileSegmentationEngineTabSeparated(FormatFactory & factory); -void registerFileSegmentationEngineCSV(FormatFactory & factory); -void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); -void registerFileSegmentationEngineRegexp(FormatFactory & factory); -void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); - -/// Output only (presentational) formats. - -void registerOutputFormatNull(FormatFactory & factory); - -void registerOutputFormatProcessorPretty(FormatFactory & factory); -void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory); -void registerOutputFormatProcessorPrettySpace(FormatFactory & factory); -void registerOutputFormatProcessorPrettyASCII(FormatFactory & factory); -void registerOutputFormatProcessorVertical(FormatFactory & factory); -void registerOutputFormatProcessorJSON(FormatFactory & factory); -void registerOutputFormatProcessorJSONCompact(FormatFactory & factory); -void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory); -void registerOutputFormatProcessorXML(FormatFactory & factory); -void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory); -void registerOutputFormatProcessorNull(FormatFactory & factory); -void registerOutputFormatProcessorMySQLWire(FormatFactory & factory); -void registerOutputFormatProcessorMarkdown(FormatFactory & factory); -void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory); - -/// Input only formats. -void registerInputFormatProcessorCapnProto(FormatFactory & factory); -void registerInputFormatProcessorRegexp(FormatFactory & factory); -void registerInputFormatProcessorJSONAsString(FormatFactory & factory); - } diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 78caabb6941..0a99a034a33 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -53,8 +53,28 @@ endif() target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR}) -# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. -target_compile_options(clickhouse_functions PRIVATE "-g0") +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" + OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" + OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") + set (STRIP_DSF_DEFAULT ON) +else() + set (STRIP_DSF_DEFAULT OFF) +endif() + + +option(STRIP_DEBUG_SYMBOLS_FUNCTIONS + "Do not generate debugger info for ClickHouse functions. + Provides faster linking and lower binary size. + Tradeoff is the inability to debug some source files with e.g. gdb + (empty stack frames and no local variables)." + ${STRIP_DSF_DEFAULT}) + +if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) + message(WARNING "Not generating debugger info for ClickHouse functions") + target_compile_options(clickhouse_functions PRIVATE "-g0") +else() + message(STATUS "Generating debugger info for ClickHouse functions") +endif() if (USE_ICU) target_link_libraries (clickhouse_functions PRIVATE ${ICU_LIBRARIES}) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 15b6ea6ca5d..ca0cc876035 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -561,6 +561,8 @@ public: template