diff --git a/.github/ISSUE_TEMPLATE/40_bug-report.md b/.github/ISSUE_TEMPLATE/40_bug-report.md index 4dfd19266d0..97137366189 100644 --- a/.github/ISSUE_TEMPLATE/40_bug-report.md +++ b/.github/ISSUE_TEMPLATE/40_bug-report.md @@ -7,7 +7,7 @@ assignees: '' --- -(you don't have to strictly follow this form) +You have to provide the following information whenever possible. **Describe the bug** A clear and concise description of what works not as it is supposed to. diff --git a/.gitignore b/.gitignore index d33dbf0600d..1db6e0a78c9 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ /docs/zh/single.md /docs/ja/single.md /docs/fa/single.md +/docs/en/development/cmake-in-clickhouse.md # callgrind files callgrind.out.* diff --git a/CMakeLists.txt b/CMakeLists.txt index ebda42224cc..a3c0f0fe863 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -167,9 +167,10 @@ endif () # If turned `ON`, assumes the user has either the system GTest library or the bundled one. option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) +option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) -if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") - # Only for Linux, x86_64. +if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") + # Only for Linux, x86_64 or aarch64. option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") @@ -499,6 +500,7 @@ include (cmake/find/krb5.cmake) include (cmake/find/libgsasl.cmake) include (cmake/find/cyrus-sasl.cmake) include (cmake/find/rdkafka.cmake) +include (cmake/find/libuv.cmake) # for amqpcpp and cassandra include (cmake/find/amqpcpp.cmake) include (cmake/find/capnp.cmake) include (cmake/find/llvm.cmake) diff --git a/base/bridge/IBridge.cpp b/base/bridge/IBridge.cpp index b1f71315fef..b2ec53158b1 100644 --- a/base/bridge/IBridge.cpp +++ b/base/bridge/IBridge.cpp @@ -159,17 +159,12 @@ void IBridge::initialize(Application & self) if (port > 0xFFFF) throw Exception("Out of range 'http-port': " + std::to_string(port), ErrorCodes::ARGUMENT_OUT_OF_BOUND); - http_timeout = config().getUInt("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT); + http_timeout = config().getUInt64("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT); max_server_connections = config().getUInt("max-server-connections", 1024); - keep_alive_timeout = config().getUInt("keep-alive-timeout", 10); + keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10); initializeTerminationAndSignalProcessing(); -#if USE_ODBC - if (bridgeName() == "ODBCBridge") - Poco::Data::ODBC::Connector::registerConnector(); -#endif - ServerApplication::initialize(self); // NOLINT } diff --git a/base/common/DecomposedFloat.h b/base/common/DecomposedFloat.h new file mode 100644 index 00000000000..078ba823c15 --- /dev/null +++ b/base/common/DecomposedFloat.h @@ -0,0 +1,216 @@ +#pragma once + +#include +#include +#include +#include + + +/// Allows to check the internals of IEEE-754 floating point number. + +template struct FloatTraits; + +template <> +struct FloatTraits +{ + using UInt = uint32_t; + static constexpr size_t bits = 32; + static constexpr size_t exponent_bits = 8; + static constexpr size_t mantissa_bits = bits - exponent_bits - 1; +}; + +template <> +struct FloatTraits +{ + using UInt = uint64_t; + static constexpr size_t bits = 64; + static constexpr size_t exponent_bits = 11; + static constexpr size_t mantissa_bits = bits - exponent_bits - 1; +}; + + +/// x = sign * (2 ^ normalized_exponent) * (1 + mantissa * 2 ^ -mantissa_bits) +/// x = sign * (2 ^ normalized_exponent + mantissa * 2 ^ (normalized_exponent - mantissa_bits)) +template +struct DecomposedFloat +{ + using Traits = FloatTraits; + + DecomposedFloat(T x) + { + memcpy(&x_uint, &x, sizeof(x)); + } + + typename Traits::UInt x_uint; + + bool is_negative() const + { + return x_uint >> (Traits::bits - 1); + } + + /// Returns 0 for both +0. and -0. + int sign() const + { + return (exponent() == 0 && mantissa() == 0) + ? 0 + : (is_negative() + ? -1 + : 1); + } + + uint16_t exponent() const + { + return (x_uint >> (Traits::mantissa_bits)) & (((1ull << (Traits::exponent_bits + 1)) - 1) >> 1); + } + + int16_t normalized_exponent() const + { + return int16_t(exponent()) - ((1ull << (Traits::exponent_bits - 1)) - 1); + } + + uint64_t mantissa() const + { + return x_uint & ((1ull << Traits::mantissa_bits) - 1); + } + + int64_t mantissa_with_sign() const + { + return is_negative() ? -mantissa() : mantissa(); + } + + /// NOTE Probably floating point instructions can be better. + bool is_integer_in_representable_range() const + { + return x_uint == 0 + || (normalized_exponent() >= 0 /// The number is not less than one + /// The number is inside the range where every integer has exact representation in float + && normalized_exponent() <= static_cast(Traits::mantissa_bits) + /// After multiplying by 2^exp, the fractional part becomes zero, means the number is integer + && ((mantissa() & ((1ULL << (Traits::mantissa_bits - normalized_exponent())) - 1)) == 0)); + } + + + /// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic. + /// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers. + /// (note that we need total order) + template + int compare(Int rhs) + { + if (rhs == 0) + return sign(); + + /// Different signs + if (is_negative() && rhs > 0) + return -1; + if (!is_negative() && rhs < 0) + return 1; + + /// Fractional number with magnitude less than one + if (normalized_exponent() < 0) + { + if (!is_negative()) + return rhs > 0 ? -1 : 1; + else + return rhs >= 0 ? -1 : 1; + } + + /// The case of the most negative integer + if constexpr (is_signed_v) + { + if (rhs == std::numeric_limits::lowest()) + { + assert(is_negative()); + + if (normalized_exponent() < static_cast(8 * sizeof(Int) - is_signed_v)) + return 1; + if (normalized_exponent() > static_cast(8 * sizeof(Int) - is_signed_v)) + return -1; + + if (mantissa() == 0) + return 0; + else + return -1; + } + } + + /// Too large number: abs(float) > abs(rhs). Also the case with infinities and NaN. + if (normalized_exponent() >= static_cast(8 * sizeof(Int) - is_signed_v)) + return is_negative() ? -1 : 1; + + using UInt = make_unsigned_t; + UInt uint_rhs = rhs < 0 ? -rhs : rhs; + + /// Smaller octave: abs(rhs) < abs(float) + if (uint_rhs < (static_cast(1) << normalized_exponent())) + return is_negative() ? -1 : 1; + + /// Larger octave: abs(rhs) > abs(float) + if (normalized_exponent() + 1 < static_cast(8 * sizeof(Int) - is_signed_v) + && uint_rhs >= (static_cast(1) << (normalized_exponent() + 1))) + return is_negative() ? 1 : -1; + + /// The same octave + /// uint_rhs == 2 ^ normalized_exponent + mantissa * 2 ^ (normalized_exponent - mantissa_bits) + + bool large_and_always_integer = normalized_exponent() >= static_cast(Traits::mantissa_bits); + + typename Traits::UInt a = large_and_always_integer + ? mantissa() << (normalized_exponent() - Traits::mantissa_bits) + : mantissa() >> (Traits::mantissa_bits - normalized_exponent()); + + typename Traits::UInt b = uint_rhs - (static_cast(1) << normalized_exponent()); + + if (a < b) + return is_negative() ? 1 : -1; + if (a > b) + return is_negative() ? -1 : 1; + + /// Float has no fractional part means that the numbers are equal. + if (large_and_always_integer || (mantissa() & ((1ULL << (Traits::mantissa_bits - normalized_exponent())) - 1)) == 0) + return 0; + else + /// Float has fractional part means its abs value is larger. + return is_negative() ? -1 : 1; + } + + + template + bool equals(Int rhs) + { + return compare(rhs) == 0; + } + + template + bool notEquals(Int rhs) + { + return compare(rhs) != 0; + } + + template + bool less(Int rhs) + { + return compare(rhs) < 0; + } + + template + bool greater(Int rhs) + { + return compare(rhs) > 0; + } + + template + bool lessOrEquals(Int rhs) + { + return compare(rhs) <= 0; + } + + template + bool greaterOrEquals(Int rhs) + { + return compare(rhs) >= 0; + } +}; + + +using DecomposedFloat64 = DecomposedFloat; +using DecomposedFloat32 = DecomposedFloat; diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index fcd1610e589..7893e56d751 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -91,6 +91,10 @@ ReplxxLineReader::ReplxxLineReader( /// it also binded to M-p/M-n). rx.bind_key(Replxx::KEY::meta('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_NEXT, code); }); rx.bind_key(Replxx::KEY::meta('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_PREVIOUS, code); }); + /// By default M-BACKSPACE is KILL_TO_WHITESPACE_ON_LEFT, while in readline it is backward-kill-word + rx.bind_key(Replxx::KEY::meta(Replxx::KEY::BACKSPACE), [this](char32_t code) { return rx.invoke(Replxx::ACTION::KILL_TO_BEGINING_OF_WORD, code); }); + /// By default C-w is KILL_TO_BEGINING_OF_WORD, while in readline it is unix-word-rubout + rx.bind_key(Replxx::KEY::control('W'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::KILL_TO_WHITESPACE_ON_LEFT, code); }); rx.bind_key(Replxx::KEY::meta('E'), [this](char32_t) { openEditor(); return Replxx::ACTION_RESULT::CONTINUE; }); } diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h index c170d214636..175e75a62f4 100644 --- a/base/common/arithmeticOverflow.h +++ b/base/common/arithmeticOverflow.h @@ -56,27 +56,33 @@ namespace common } template <> - inline bool addOverflow(__int128 x, __int128 y, __int128 & res) + inline bool addOverflow(Int128 x, Int128 y, Int128 & res) { - static constexpr __int128 min_int128 = minInt128(); - static constexpr __int128 max_int128 = maxInt128(); res = addIgnoreOverflow(x, y); - return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y); + return (y > 0 && x > std::numeric_limits::max() - y) || + (y < 0 && x < std::numeric_limits::min() - y); } template <> - inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res) + inline bool addOverflow(UInt128 x, UInt128 y, UInt128 & res) { res = addIgnoreOverflow(x, y); - return (y > 0 && x > std::numeric_limits::max() - y) || - (y < 0 && x < std::numeric_limits::min() - y); + return x > std::numeric_limits::max() - y; } template <> - inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) + inline bool addOverflow(Int256 x, Int256 y, Int256 & res) { res = addIgnoreOverflow(x, y); - return x > std::numeric_limits::max() - y; + return (y > 0 && x > std::numeric_limits::max() - y) || + (y < 0 && x < std::numeric_limits::min() - y); + } + + template <> + inline bool addOverflow(UInt256 x, UInt256 y, UInt256 & res) + { + res = addIgnoreOverflow(x, y); + return x > std::numeric_limits::max() - y; } template @@ -104,24 +110,30 @@ namespace common } template <> - inline bool subOverflow(__int128 x, __int128 y, __int128 & res) + inline bool subOverflow(Int128 x, Int128 y, Int128 & res) { - static constexpr __int128 min_int128 = minInt128(); - static constexpr __int128 max_int128 = maxInt128(); res = subIgnoreOverflow(x, y); - return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y); + return (y < 0 && x > std::numeric_limits::max() + y) || + (y > 0 && x < std::numeric_limits::min() + y); } template <> - inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res) + inline bool subOverflow(UInt128 x, UInt128 y, UInt128 & res) { res = subIgnoreOverflow(x, y); - return (y < 0 && x > std::numeric_limits::max() + y) || - (y > 0 && x < std::numeric_limits::min() + y); + return x < y; } template <> - inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) + inline bool subOverflow(Int256 x, Int256 y, Int256 & res) + { + res = subIgnoreOverflow(x, y); + return (y < 0 && x > std::numeric_limits::max() + y) || + (y > 0 && x < std::numeric_limits::min() + y); + } + + template <> + inline bool subOverflow(UInt256 x, UInt256 y, UInt256 & res) { res = subIgnoreOverflow(x, y); return x < y; @@ -151,36 +163,33 @@ namespace common return __builtin_smulll_overflow(x, y, &res); } + /// Overflow check is not implemented for big integers. + template <> - inline bool mulOverflow(__int128 x, __int128 y, __int128 & res) + inline bool mulOverflow(Int128 x, Int128 y, Int128 & res) { res = mulIgnoreOverflow(x, y); - if (!x || !y) - return false; - - unsigned __int128 a = (x > 0) ? x : -x; - unsigned __int128 b = (y > 0) ? y : -y; - return mulIgnoreOverflow(a, b) / b != a; + return false; } template <> - inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res) + inline bool mulOverflow(Int256 x, Int256 y, Int256 & res) { res = mulIgnoreOverflow(x, y); - if (!x || !y) - return false; - - wInt256 a = (x > 0) ? x : -x; - wInt256 b = (y > 0) ? y : -y; - return mulIgnoreOverflow(a, b) / b != a; + return false; } template <> - inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) + inline bool mulOverflow(UInt128 x, UInt128 y, UInt128 & res) { res = mulIgnoreOverflow(x, y); - if (!x || !y) - return false; - return res / y != x; + return false; + } + + template <> + inline bool mulOverflow(UInt256 x, UInt256 y, UInt256 & res) + { + res = mulIgnoreOverflow(x, y); + return false; } } diff --git a/base/common/extended_types.h b/base/common/extended_types.h index 2ae70c0f432..79209568ef5 100644 --- a/base/common/extended_types.h +++ b/base/common/extended_types.h @@ -5,16 +5,14 @@ #include #include -using Int128 = __int128; -using wInt256 = wide::integer<256, signed>; -using wUInt256 = wide::integer<256, unsigned>; +using Int128 = wide::integer<128, signed>; +using UInt128 = wide::integer<128, unsigned>; +using Int256 = wide::integer<256, signed>; +using UInt256 = wide::integer<256, unsigned>; -static_assert(sizeof(wInt256) == 32); -static_assert(sizeof(wUInt256) == 32); - -static constexpr __int128 minInt128() { return static_cast(1) << 127; } -static constexpr __int128 maxInt128() { return (static_cast(1) << 127) - 1; } +static_assert(sizeof(Int256) == 32); +static_assert(sizeof(UInt256) == 32); /// The standard library type traits, such as std::is_arithmetic, with one exception /// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior. @@ -26,7 +24,7 @@ struct is_signed }; template <> struct is_signed { static constexpr bool value = true; }; -template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; template inline constexpr bool is_signed_v = is_signed::value; @@ -37,7 +35,8 @@ struct is_unsigned static constexpr bool value = std::is_unsigned_v; }; -template <> struct is_unsigned { static constexpr bool value = true; }; +template <> struct is_unsigned { static constexpr bool value = true; }; +template <> struct is_unsigned { static constexpr bool value = true; }; template inline constexpr bool is_unsigned_v = is_unsigned::value; @@ -51,8 +50,9 @@ struct is_integer }; template <> struct is_integer { static constexpr bool value = true; }; -template <> struct is_integer { static constexpr bool value = true; }; -template <> struct is_integer { static constexpr bool value = true; }; +template <> struct is_integer { static constexpr bool value = true; }; +template <> struct is_integer { static constexpr bool value = true; }; +template <> struct is_integer { static constexpr bool value = true; }; template inline constexpr bool is_integer_v = is_integer::value; @@ -64,7 +64,11 @@ struct is_arithmetic static constexpr bool value = std::is_arithmetic_v; }; -template <> struct is_arithmetic<__int128> { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; +template <> struct is_arithmetic { static constexpr bool value = true; }; + template inline constexpr bool is_arithmetic_v = is_arithmetic::value; @@ -75,9 +79,10 @@ struct make_unsigned typedef std::make_unsigned_t type; }; -template <> struct make_unsigned { using type = unsigned __int128; }; -template <> struct make_unsigned { using type = wUInt256; }; -template <> struct make_unsigned { using type = wUInt256; }; +template <> struct make_unsigned { using type = UInt128; }; +template <> struct make_unsigned { using type = UInt128; }; +template <> struct make_unsigned { using type = UInt256; }; +template <> struct make_unsigned { using type = UInt256; }; template using make_unsigned_t = typename make_unsigned::type; @@ -87,8 +92,10 @@ struct make_signed typedef std::make_signed_t type; }; -template <> struct make_signed { using type = wInt256; }; -template <> struct make_signed { using type = wInt256; }; +template <> struct make_signed { using type = Int128; }; +template <> struct make_signed { using type = Int128; }; +template <> struct make_signed { using type = Int256; }; +template <> struct make_signed { using type = Int256; }; template using make_signed_t = typename make_signed::type; @@ -98,8 +105,10 @@ struct is_big_int static constexpr bool value = false; }; -template <> struct is_big_int { static constexpr bool value = true; }; -template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; +template <> struct is_big_int { static constexpr bool value = true; }; template inline constexpr bool is_big_int_v = is_big_int::value; diff --git a/base/common/getThreadId.cpp b/base/common/getThreadId.cpp index 700c51f21fc..054e9be9074 100644 --- a/base/common/getThreadId.cpp +++ b/base/common/getThreadId.cpp @@ -25,6 +25,10 @@ uint64_t getThreadId() current_tid = syscall(SYS_gettid); /// This call is always successful. - man gettid #elif defined(OS_FREEBSD) current_tid = pthread_getthreadid_np(); +#elif defined(OS_SUNOS) + // On Solaris-derived systems, this returns the ID of the LWP, analogous + // to a thread. + current_tid = static_cast(pthread_self()); #else if (0 != pthread_threadid_np(nullptr, ¤t_tid)) throw std::logic_error("pthread_threadid_np returned error"); diff --git a/base/common/itoa.h b/base/common/itoa.h index a02e7b68c05..4c86239de36 100644 --- a/base/common/itoa.h +++ b/base/common/itoa.h @@ -30,9 +30,8 @@ #include #include #include +#include -using int128_t = __int128; -using uint128_t = unsigned __int128; namespace impl { @@ -106,7 +105,7 @@ using UnsignedOfSize = typename SelectType uint16_t, uint32_t, uint64_t, - uint128_t + __uint128_t >::Result; /// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in @@ -313,7 +312,8 @@ namespace convert } } -static inline int digits10(uint128_t x) +template +static inline int digits10(T x) { if (x < 10ULL) return 1; @@ -346,8 +346,11 @@ static inline int digits10(uint128_t x) return 12 + digits10(x / 1000000000000ULL); } -static inline char * writeUIntText(uint128_t x, char * p) +template +static inline char * writeUIntText(T x, char * p) { + static_assert(is_unsigned_v); + int len = digits10(x); auto pp = p + len; while (x >= 100) @@ -370,14 +373,28 @@ static inline char * writeLeadingMinus(char * pos) return pos + 1; } -static inline char * writeSIntText(int128_t x, char * pos) +template +static inline char * writeSIntText(T x, char * pos) { - static constexpr int128_t min_int128 = uint128_t(1) << 127; + static_assert(std::is_same_v || std::is_same_v); - if (unlikely(x == min_int128)) + using UnsignedT = make_unsigned_t; + static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1); + + if (unlikely(x == min_int)) { - memcpy(pos, "-170141183460469231731687303715884105728", 40); - return pos + 40; + if constexpr (std::is_same_v) + { + const char * res = "-170141183460469231731687303715884105728"; + memcpy(pos, res, strlen(res)); + return pos + strlen(res); + } + else if constexpr (std::is_same_v) + { + const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968"; + memcpy(pos, res, strlen(res)); + return pos + strlen(res); + } } if (x < 0) @@ -385,7 +402,7 @@ static inline char * writeSIntText(int128_t x, char * pos) x = -x; pos = writeLeadingMinus(pos); } - return writeUIntText(static_cast(x), pos); + return writeUIntText(UnsignedT(x), pos); } } @@ -403,13 +420,25 @@ inline char * itoa(char8_t i, char * p) } template <> -inline char * itoa(uint128_t i, char * p) +inline char * itoa(UInt128 i, char * p) { return impl::writeUIntText(i, p); } template <> -inline char * itoa(int128_t i, char * p) +inline char * itoa(Int128 i, char * p) +{ + return impl::writeSIntText(i, p); +} + +template <> +inline char * itoa(UInt256 i, char * p) +{ + return impl::writeUIntText(i, p); +} + +template <> +inline char * itoa(Int256 i, char * p) { return impl::writeSIntText(i, p); } diff --git a/base/common/strong_typedef.h b/base/common/strong_typedef.h index 77b83bfa6e5..a1e2b253aa7 100644 --- a/base/common/strong_typedef.h +++ b/base/common/strong_typedef.h @@ -4,7 +4,8 @@ #include #include -template + +template struct StrongTypedef { private: @@ -38,14 +39,16 @@ public: bool operator==(const Self & rhs) const { return t == rhs.t; } bool operator<(const Self & rhs) const { return t < rhs.t; } + bool operator>(const Self & rhs) const { return t > rhs.t; } T & toUnderType() { return t; } const T & toUnderType() const { return t; } }; + namespace std { - template + template struct hash> { size_t operator()(const StrongTypedef & x) const diff --git a/base/common/throwError.h b/base/common/throwError.h index b495a0fbc7a..dd352913e78 100644 --- a/base/common/throwError.h +++ b/base/common/throwError.h @@ -1,13 +1,15 @@ #pragma once + #include + /// Throw DB::Exception-like exception before its definition. /// DB::Exception derived from Poco::Exception derived from std::exception. -/// DB::Exception generally cought as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes. +/// DB::Exception generally caught as Poco::Exception. std::exception generally has other catch blocks and could lead to other outcomes. /// DB::Exception is not defined yet. It'd better to throw Poco::Exception but we do not want to include any big header here, even . /// So we throw some std::exception instead in the hope its catch block is the same as DB::Exception one. template -inline void throwError(const T & err) +[[noreturn]] inline void throwError(const T & err) { throw std::runtime_error(err); } diff --git a/base/common/time.h b/base/common/time.h index 1bf588b7cb3..d0b8e94a9a5 100644 --- a/base/common/time.h +++ b/base/common/time.h @@ -2,7 +2,7 @@ #include -#if defined (OS_DARWIN) +#if defined (OS_DARWIN) || defined (OS_SUNOS) # define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC #elif defined (OS_FREEBSD) # define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC_FAST diff --git a/base/common/types.h b/base/common/types.h index bd5c28fe73b..e178653f7c6 100644 --- a/base/common/types.h +++ b/base/common/types.h @@ -13,7 +13,12 @@ using char8_t = unsigned char; #endif /// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713 +#if !defined(PVS_STUDIO) /// But PVS-Studio does not treat it correctly. using UInt8 = char8_t; +#else +using UInt8 = uint8_t; +#endif + using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; diff --git a/base/common/wide_integer.h b/base/common/wide_integer.h index c9d1eaa32aa..419b4e4558c 100644 --- a/base/common/wide_integer.h +++ b/base/common/wide_integer.h @@ -58,9 +58,11 @@ public: using signed_base_type = int64_t; // ctors - constexpr integer() noexcept; + constexpr integer() noexcept = default; + template constexpr integer(T rhs) noexcept; + template constexpr integer(std::initializer_list il) noexcept; @@ -108,9 +110,9 @@ public: constexpr explicit operator bool() const noexcept; template - using __integral_not_wide_integer_class = typename std::enable_if::value, T>::type; + using _integral_not_wide_integer_class = typename std::enable_if::value, T>::type; - template > + template > constexpr operator T() const noexcept; constexpr operator long double() const noexcept; @@ -119,25 +121,27 @@ public: struct _impl; + base_type items[_impl::item_count]; + private: template friend class integer; friend class std::numeric_limits>; friend class std::numeric_limits>; - - base_type items[_impl::item_count]; }; template static constexpr bool ArithmeticConcept() noexcept; + template -using __only_arithmetic = typename std::enable_if() && ArithmeticConcept()>::type; +using _only_arithmetic = typename std::enable_if() && ArithmeticConcept()>::type; template static constexpr bool IntegralConcept() noexcept; + template -using __only_integer = typename std::enable_if() && IntegralConcept()>::type; +using _only_integer = typename std::enable_if() && IntegralConcept()>::type; // Unary operators template @@ -153,54 +157,55 @@ constexpr integer operator+(const integer & lhs) noe template std::common_type_t, integer> constexpr operator*(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator*(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator/(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator/(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator+(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator+(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator-(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator-(const Arithmetic & rhs, const Arithmetic2 & lhs); template std::common_type_t, integer> constexpr operator%(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator%(const Integral & rhs, const Integral2 & lhs); template std::common_type_t, integer> constexpr operator&(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator&(const Integral & rhs, const Integral2 & lhs); template std::common_type_t, integer> constexpr operator|(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator|(const Integral & rhs, const Integral2 & lhs); template std::common_type_t, integer> constexpr operator^(const integer & lhs, const integer & rhs); -template > +template > std::common_type_t constexpr operator^(const Integral & rhs, const Integral2 & lhs); // TODO: Integral template constexpr integer operator<<(const integer & lhs, int n) noexcept; + template constexpr integer operator>>(const integer & lhs, int n) noexcept; @@ -217,32 +222,32 @@ constexpr integer operator>>(const integer & lhs, In template constexpr bool operator<(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator<(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator>(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator>(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator<=(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator<=(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator>=(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator>=(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator==(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator==(const Arithmetic & rhs, const Arithmetic2 & lhs); template constexpr bool operator!=(const integer & lhs, const integer & rhs); -template > +template > constexpr bool operator!=(const Arithmetic & rhs, const Arithmetic2 & lhs); } diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h index 456c10a22e4..725caec6a3e 100644 --- a/base/common/wide_integer_impl.h +++ b/base/common/wide_integer_impl.h @@ -5,6 +5,7 @@ /// (See at http://www.boost.org/LICENSE_1_0.txt) #include "throwError.h" + #include #include #include @@ -81,7 +82,7 @@ public: res.items[T::_impl::big(0)] = std::numeric_limits::signed_base_type>::min(); return res; } - return 0; + return wide::integer(0); } static constexpr wide::integer max() noexcept @@ -176,7 +177,7 @@ struct integer::_impl constexpr static bool is_negative(const integer & n) noexcept { if constexpr (std::is_same_v) - return static_cast(n.items[big(0)]) < 0; + return static_cast(n.items[integer::_impl::big(0)]) < 0; else return false; } @@ -193,40 +194,36 @@ struct integer::_impl template constexpr static integer make_positive(const integer & n) noexcept { - return is_negative(n) ? operator_unary_minus(n) : n; + return is_negative(n) ? integer(operator_unary_minus(n)) : n; } template __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept { - if constexpr (std::is_same_v) - return f; - else if constexpr (std::is_signed_v) + if constexpr (std::is_signed_v) return static_cast(f); else return static_cast(f); } template - constexpr static void wide_integer_from_bultin(integer & self, Integral rhs) noexcept + constexpr static void wide_integer_from_builtin(integer & self, Integral rhs) noexcept { - self.items[0] = _impl::to_Integral(rhs); - if constexpr (std::is_same_v) - self.items[1] = rhs >> base_bits; + static_assert(sizeof(Integral) <= sizeof(base_type)); - constexpr const unsigned start = (sizeof(Integral) == 16) ? 2 : 1; + self.items[0] = _impl::to_Integral(rhs); if constexpr (std::is_signed_v) { if (rhs < 0) { - for (unsigned i = start; i < item_count; ++i) + for (size_t i = 1; i < item_count; ++i) self.items[i] = -1; return; } } - for (unsigned i = start; i < item_count; ++i) + for (size_t i = 1; i < item_count; ++i) self.items[i] = 0; } @@ -239,7 +236,8 @@ struct integer::_impl * a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case. */ template - constexpr static void set_multiplier(integer & self, T t) noexcept { + constexpr static void set_multiplier(integer & self, T t) noexcept + { constexpr uint64_t max_int = std::numeric_limits::max(); /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast). @@ -260,7 +258,8 @@ struct integer::_impl self += static_cast(t - alpha * static_cast(max_int)); // += b_i } - constexpr static void wide_integer_from_bultin(integer& self, double rhs) noexcept { + constexpr static void wide_integer_from_builtin(integer& self, double rhs) noexcept + { constexpr int64_t max_int = std::numeric_limits::max(); constexpr int64_t min_int = std::numeric_limits::min(); @@ -383,13 +382,13 @@ struct integer::_impl if (bit_shift) lhs.items[big(items_shift)] |= std::numeric_limits::max() << (base_bits - bit_shift); - for (unsigned i = item_count - items_shift; i < items_shift; ++i) - lhs.items[little(i)] = std::numeric_limits::max(); + for (unsigned i = 0; i < items_shift; ++i) + lhs.items[big(i)] = std::numeric_limits::max(); } else { - for (unsigned i = item_count - items_shift; i < items_shift; ++i) - lhs.items[little(i)] = 0; + for (unsigned i = 0; i < items_shift; ++i) + lhs.items[big(i)] = 0; } return lhs; @@ -397,23 +396,23 @@ struct integer::_impl private: template - constexpr static base_type get_item(const T & x, unsigned number) + constexpr static base_type get_item(const T & x, unsigned idx) { if constexpr (IsWideInteger::value) { - if (number < T::_impl::item_count) - return x.items[number]; + if (idx < T::_impl::item_count) + return x.items[idx]; return 0; } else { if constexpr (sizeof(T) <= sizeof(base_type)) { - if (!number) + if (0 == idx) return x; } - else if (number * sizeof(base_type) < sizeof(T)) - return x >> (number * base_bits); // & std::numeric_limits::max() + else if (idx * sizeof(base_type) < sizeof(T)) + return x >> (idx * base_bits); // & std::numeric_limits::max() return 0; } } @@ -439,7 +438,7 @@ private: for (unsigned i = 1; i < item_count; ++i) { - if (underflows[i-1]) + if (underflows[i - 1]) { base_type & res_item = res.items[little(i)]; if (res_item == 0) @@ -472,7 +471,7 @@ private: for (unsigned i = 1; i < item_count; ++i) { - if (overflows[i-1]) + if (overflows[i - 1]) { base_type & res_item = res.items[little(i)]; ++res_item; @@ -532,6 +531,17 @@ private: res.items[little(2)] = r12 >> 64; return res; } + else if constexpr (Bits == 128 && sizeof(base_type) == 8) + { + using CompilerUInt128 = unsigned __int128; + CompilerUInt128 a = (CompilerUInt128(lhs.items[1]) << 64) + lhs.items[0]; + CompilerUInt128 b = (CompilerUInt128(rhs.items[1]) << 64) + rhs.items[0]; + CompilerUInt128 c = a * b; + integer res; + res.items[0] = c; + res.items[1] = c >> 64; + return res; + } else { integer res{}; @@ -657,7 +667,7 @@ public: } template - constexpr static bool operator_more(const integer & lhs, const T & rhs) noexcept + constexpr static bool operator_greater(const integer & lhs, const T & rhs) noexcept { if constexpr (should_keep_size()) { @@ -677,7 +687,7 @@ public: else { static_assert(IsWideInteger::value); - return std::common_type_t, T>::_impl::operator_more(T(lhs), rhs); + return std::common_type_t, T>::_impl::operator_greater(T(lhs), rhs); } } @@ -764,7 +774,6 @@ public: } } -private: template constexpr static bool is_zero(const T & x) { @@ -781,46 +790,65 @@ private: } /// returns quotient as result and remainder in numerator. - template - constexpr static T divide(T & numerator, T && denominator) + template + constexpr static integer divide(integer & numerator, integer denominator) { - if (is_zero(denominator)) - throwError("divide by zero"); + static_assert(std::is_unsigned_v); - T & n = numerator; - T & d = denominator; - T x = 1; - T quotient = 0; - - while (!operator_more(d, n) && operator_eq(operator_amp(shift_right(d, base_bits * item_count - 1), 1), 0)) + if constexpr (Bits == 128 && sizeof(base_type) == 8) { - x = shift_left(x, 1); - d = shift_left(d, 1); + using CompilerUInt128 = unsigned __int128; + + CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0]; + CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0]; + CompilerUInt128 c = a / b; + + integer res; + res.items[0] = c; + res.items[1] = c >> 64; + + CompilerUInt128 remainder = a - b * c; + numerator.items[0] = remainder; + numerator.items[1] = remainder >> 64; + + return res; } - while (!operator_eq(x, 0)) + if (is_zero(denominator)) + throwError("Division by zero"); + + integer x = 1; + integer quotient = 0; + + while (!operator_greater(denominator, numerator) && is_zero(operator_amp(shift_right(denominator, Bits2 - 1), 1))) { - if (!operator_more(d, n)) + x = shift_left(x, 1); + denominator = shift_left(denominator, 1); + } + + while (!is_zero(x)) + { + if (!operator_greater(denominator, numerator)) { - n = operator_minus(n, d); + numerator = operator_minus(numerator, denominator); quotient = operator_pipe(quotient, x); } x = shift_right(x, 1); - d = shift_right(d, 1); + denominator = shift_right(denominator, 1); } return quotient; } -public: template constexpr static auto operator_slash(const integer & lhs, const T & rhs) { if constexpr (should_keep_size()) { - integer numerator = make_positive(lhs); - integer quotient = divide(numerator, make_positive(integer(rhs))); + integer numerator = make_positive(lhs); + integer denominator = make_positive(integer(rhs)); + integer quotient = integer::_impl::divide(numerator, std::move(denominator)); if (std::is_same_v && is_negative(rhs) != is_negative(lhs)) quotient = operator_unary_minus(quotient); @@ -838,8 +866,9 @@ public: { if constexpr (should_keep_size()) { - integer remainder = make_positive(lhs); - divide(remainder, make_positive(integer(rhs))); + integer remainder = make_positive(lhs); + integer denominator = make_positive(integer(rhs)); + integer::_impl::divide(remainder, std::move(denominator)); if (std::is_same_v && is_negative(lhs)) remainder = operator_unary_minus(remainder); @@ -905,7 +934,7 @@ public: ++c; } else - throwError("invalid char from"); + throwError("Invalid char from"); } } else @@ -913,7 +942,7 @@ public: while (*c) { if (*c < '0' || *c > '9') - throwError("invalid char from"); + throwError("Invalid char from"); res = multiply(res, 10U); res = plus(res, *c - '0'); @@ -930,11 +959,6 @@ public: // Members -template -constexpr integer::integer() noexcept - : items{} -{} - template template constexpr integer::integer(T rhs) noexcept @@ -943,7 +967,7 @@ constexpr integer::integer(T rhs) noexcept if constexpr (IsWideInteger::value) _impl::wide_integer_from_wide_integer(*this, rhs); else - _impl::wide_integer_from_bultin(*this, rhs); + _impl::wide_integer_from_builtin(*this, rhs); } template @@ -956,10 +980,19 @@ constexpr integer::integer(std::initializer_list il) noexcept if constexpr (IsWideInteger::value) _impl::wide_integer_from_wide_integer(*this, *il.begin()); else - _impl::wide_integer_from_bultin(*this, *il.begin()); + _impl::wide_integer_from_builtin(*this, *il.begin()); + } + else if (il.size() == 0) + { + _impl::wide_integer_from_builtin(*this, 0); } else - _impl::wide_integer_from_bultin(*this, 0); + { + auto it = il.begin(); + for (size_t i = 0; i < _impl::item_count; ++i) + if (it < il.end()) + items[i] = *it; + } } template @@ -974,7 +1007,7 @@ template template constexpr integer & integer::operator=(T rhs) noexcept { - _impl::wide_integer_from_bultin(*this, rhs); + _impl::wide_integer_from_builtin(*this, rhs); return *this; } @@ -1057,7 +1090,7 @@ constexpr integer & integer::operator>>=(int n) noex { if (static_cast(n) >= Bits) { - if (is_negative(*this)) + if (_impl::is_negative(*this)) *this = -1; else *this = 0; @@ -1107,16 +1140,17 @@ template template constexpr integer::operator T() const noexcept { - if constexpr (std::is_same_v) - { - static_assert(Bits >= 128); - return (__int128(items[1]) << 64) | items[0]; - } - else - { - static_assert(std::numeric_limits::is_integer); - return items[0]; - } + static_assert(std::numeric_limits::is_integer); + + /// NOTE: memcpy will suffice, but unfortunately, this function is constexpr. + + using UnsignedT = std::make_unsigned_t; + + UnsignedT res{}; + for (unsigned i = 0; i < _impl::item_count && i < (sizeof(T) + sizeof(base_type) - 1) / sizeof(base_type); ++i) + res += UnsignedT(items[i]) << (sizeof(base_type) * 8 * i); + + return res; } template @@ -1280,7 +1314,7 @@ template constexpr integer operator<<(const integer & lhs, int n) noexcept { if (static_cast(n) >= Bits) - return 0; + return integer(0); if (n <= 0) return lhs; return integer::_impl::shift_left(lhs, n); @@ -1289,7 +1323,7 @@ template constexpr integer operator>>(const integer & lhs, int n) noexcept { if (static_cast(n) >= Bits) - return 0; + return integer(0); if (n <= 0) return lhs; return integer::_impl::shift_right(lhs, n); @@ -1309,7 +1343,7 @@ constexpr bool operator<(const Arithmetic & lhs, const Arithmetic2 & rhs) template constexpr bool operator>(const integer & lhs, const integer & rhs) { - return std::common_type_t, integer>::_impl::operator_more(lhs, rhs); + return std::common_type_t, integer>::_impl::operator_greater(lhs, rhs); } template constexpr bool operator>(const Arithmetic & lhs, const Arithmetic2 & rhs) @@ -1332,7 +1366,7 @@ constexpr bool operator<=(const Arithmetic & lhs, const Arithmetic2 & rhs) template constexpr bool operator>=(const integer & lhs, const integer & rhs) { - return std::common_type_t, integer>::_impl::operator_more(lhs, rhs) + return std::common_type_t, integer>::_impl::operator_greater(lhs, rhs) || std::common_type_t, integer>::_impl::operator_eq(lhs, rhs); } template diff --git a/base/common/wide_integer_to_string.h b/base/common/wide_integer_to_string.h index 9908ef4be7a..8b794fe9bcb 100644 --- a/base/common/wide_integer_to_string.h +++ b/base/common/wide_integer_to_string.h @@ -1,9 +1,12 @@ #pragma once #include +#include +#include #include "wide_integer.h" + namespace wide { @@ -33,3 +36,34 @@ inline std::string to_string(const integer & n) } } + + +template +std::ostream & operator<<(std::ostream & out, const wide::integer & value) +{ + return out << to_string(value); +} + + +/// See https://fmt.dev/latest/api.html#formatting-user-defined-types +template +struct fmt::formatter> +{ + constexpr auto parse(format_parse_context & ctx) + { + auto it = ctx.begin(); + auto end = ctx.end(); + + /// Only support {}. + if (it != end && *it != '}') + throw format_error("invalid format"); + + return it; + } + + template + auto format(const wide::integer & value, FormatContext & ctx) + { + return format_to(ctx.out(), "{}", to_string(value)); + } +}; diff --git a/base/common/ya.make.in b/base/common/ya.make.in index b5c2bbc1717..3deb36a2c71 100644 --- a/base/common/ya.make.in +++ b/base/common/ya.make.in @@ -35,7 +35,7 @@ PEERDIR( CFLAGS(-g0) SRCS( - + ) END() diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 1b7d0064b99..1028dc7d2dc 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -102,7 +102,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) auto * logger = &Poco::Logger::get("SentryWriter"); if (config.getBool("send_crash_reports.enabled", false)) { - if (debug || (strlen(VERSION_OFFICIAL) > 0)) + if (debug || (strlen(VERSION_OFFICIAL) > 0)) //-V560 { enabled = true; } diff --git a/base/glibc-compatibility/CMakeLists.txt b/base/glibc-compatibility/CMakeLists.txt index e785e2ab2ce..8cba91de33f 100644 --- a/base/glibc-compatibility/CMakeLists.txt +++ b/base/glibc-compatibility/CMakeLists.txt @@ -15,7 +15,7 @@ if (GLIBC_COMPATIBILITY) add_headers_and_sources(glibc_compatibility .) add_headers_and_sources(glibc_compatibility musl) - if (ARCH_ARM) + if (ARCH_AARCH64) list (APPEND glibc_compatibility_sources musl/aarch64/syscall.s musl/aarch64/longjmp.s) set (musl_arch_include_dir musl/aarch64) elseif (ARCH_AMD64) diff --git a/base/glibc-compatibility/musl/lgamma.c b/base/glibc-compatibility/musl/lgamma.c index fb9d105d0fa..5e959504e29 100644 --- a/base/glibc-compatibility/musl/lgamma.c +++ b/base/glibc-compatibility/musl/lgamma.c @@ -78,6 +78,9 @@ * */ +// Disable warnings by PVS-Studio +//-V::GA + static const double pi = 3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */ a0 = 7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */ diff --git a/base/glibc-compatibility/musl/lgammal.c b/base/glibc-compatibility/musl/lgammal.c index b158748ce1f..775559f13b6 100644 --- a/base/glibc-compatibility/musl/lgammal.c +++ b/base/glibc-compatibility/musl/lgammal.c @@ -85,6 +85,9 @@ * */ +// Disable warnings by PVS-Studio +//-V::GA + #include #include #include "libm.h" diff --git a/base/glibc-compatibility/musl/libm.h b/base/glibc-compatibility/musl/libm.h index 55520c2fb03..e5029318693 100644 --- a/base/glibc-compatibility/musl/libm.h +++ b/base/glibc-compatibility/musl/libm.h @@ -155,7 +155,7 @@ static inline long double fp_barrierl(long double x) static inline void fp_force_evalf(float x) { volatile float y; - y = x; + y = x; //-V1001 } #endif @@ -164,7 +164,7 @@ static inline void fp_force_evalf(float x) static inline void fp_force_eval(double x) { volatile double y; - y = x; + y = x; //-V1001 } #endif @@ -173,7 +173,7 @@ static inline void fp_force_eval(double x) static inline void fp_force_evall(long double x) { volatile long double y; - y = x; + y = x; //-V1001 } #endif diff --git a/base/glibc-compatibility/musl/powf.c b/base/glibc-compatibility/musl/powf.c index de8fab54554..35dc3611b94 100644 --- a/base/glibc-compatibility/musl/powf.c +++ b/base/glibc-compatibility/musl/powf.c @@ -3,6 +3,9 @@ * SPDX-License-Identifier: MIT */ +// Disable warnings by PVS-Studio +//-V::GA + #include #include #include "libm.h" diff --git a/base/loggers/Loggers.cpp b/base/loggers/Loggers.cpp index ed806741895..4a66d43606b 100644 --- a/base/loggers/Loggers.cpp +++ b/base/loggers/Loggers.cpp @@ -40,7 +40,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log split->addTextLog(log, text_log_max_priority); auto current_logger = config.getString("logger", ""); - if (config_logger == current_logger) + if (config_logger == current_logger) //-V1051 return; config_logger = current_logger; @@ -69,7 +69,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log log_file->setProperty(Poco::FileChannel::PROP_ROTATEONOPEN, config.getRawString("logger.rotateOnOpen", "false")); log_file->open(); - Poco::AutoPtr pf = new OwnPatternFormatter(this); + Poco::AutoPtr pf = new OwnPatternFormatter; Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, log_file); split->addChannel(log); @@ -90,7 +90,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log error_log_file->setProperty(Poco::FileChannel::PROP_FLUSH, config.getRawString("logger.flush", "true")); error_log_file->setProperty(Poco::FileChannel::PROP_ROTATEONOPEN, config.getRawString("logger.rotateOnOpen", "false")); - Poco::AutoPtr pf = new OwnPatternFormatter(this); + Poco::AutoPtr pf = new OwnPatternFormatter; Poco::AutoPtr errorlog = new DB::OwnFormattingChannel(pf, error_log_file); errorlog->setLevel(Poco::Message::PRIO_NOTICE); @@ -98,10 +98,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log split->addChannel(errorlog); } - /// "dynamic_layer_selection" is needed only for Yandex.Metrika, that share part of ClickHouse code. - /// We don't need this configuration parameter. - - if (config.getBool("logger.use_syslog", false) || config.getBool("dynamic_layer_selection", false)) + if (config.getBool("logger.use_syslog", false)) { //const std::string & cmd_name = commandName(); @@ -127,7 +124,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } syslog_channel->open(); - Poco::AutoPtr pf = new OwnPatternFormatter(this, OwnPatternFormatter::ADD_LAYER_TAG); + Poco::AutoPtr pf = new OwnPatternFormatter; Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, syslog_channel); split->addChannel(log); @@ -141,7 +138,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log { bool color_enabled = config.getBool("logger.color_terminal", color_logs_by_default); - Poco::AutoPtr pf = new OwnPatternFormatter(this, OwnPatternFormatter::ADD_NOTHING, color_enabled); + Poco::AutoPtr pf = new OwnPatternFormatter(color_enabled); Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, new Poco::ConsoleChannel); logger.warning("Logging " + log_level + " to console"); split->addChannel(log); diff --git a/base/loggers/Loggers.h b/base/loggers/Loggers.h index 9ed75046468..151c1d3566f 100644 --- a/base/loggers/Loggers.h +++ b/base/loggers/Loggers.h @@ -8,6 +8,7 @@ #include #include "OwnSplitChannel.h" + namespace Poco::Util { class AbstractConfiguration; @@ -21,16 +22,8 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); - std::optional getLayer() const - { - return layer; /// layer set in inheritor class BaseDaemonApplication. - } - void setTextLog(std::shared_ptr log, int max_priority); -protected: - std::optional layer; - private: Poco::AutoPtr log_file; Poco::AutoPtr error_log_file; diff --git a/base/loggers/OwnPatternFormatter.cpp b/base/loggers/OwnPatternFormatter.cpp index 029d06ff949..e62039f4a27 100644 --- a/base/loggers/OwnPatternFormatter.cpp +++ b/base/loggers/OwnPatternFormatter.cpp @@ -13,31 +13,18 @@ #include "Loggers.h" -OwnPatternFormatter::OwnPatternFormatter(const Loggers * loggers_, OwnPatternFormatter::Options options_, bool color_) - : Poco::PatternFormatter(""), loggers(loggers_), options(options_), color(color_) +OwnPatternFormatter::OwnPatternFormatter(bool color_) + : Poco::PatternFormatter(""), color(color_) { } -void OwnPatternFormatter::formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text) +void OwnPatternFormatter::formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text) const { DB::WriteBufferFromString wb(text); const Poco::Message & msg = msg_ext.base; - /// For syslog: tag must be before message and first whitespace. - /// This code is only used in Yandex.Metrika and unneeded in ClickHouse. - if ((options & ADD_LAYER_TAG) && loggers) - { - auto layer = loggers->getLayer(); - if (layer) - { - writeCString("layer[", wb); - DB::writeIntText(*layer, wb); - writeCString("]: ", wb); - } - } - /// Change delimiters in date for compatibility with old logs. DB::writeDateTimeText<'.', ':'>(msg_ext.time_seconds, wb); diff --git a/base/loggers/OwnPatternFormatter.h b/base/loggers/OwnPatternFormatter.h index 4aedcc04637..fba4f0964cb 100644 --- a/base/loggers/OwnPatternFormatter.h +++ b/base/loggers/OwnPatternFormatter.h @@ -24,20 +24,11 @@ class Loggers; class OwnPatternFormatter : public Poco::PatternFormatter { public: - /// ADD_LAYER_TAG is needed only for Yandex.Metrika, that share part of ClickHouse code. - enum Options - { - ADD_NOTHING = 0, - ADD_LAYER_TAG = 1 << 0 - }; - - OwnPatternFormatter(const Loggers * loggers_, Options options_ = ADD_NOTHING, bool color_ = false); + OwnPatternFormatter(bool color_ = false); void format(const Poco::Message & msg, std::string & text) override; - void formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text); + void formatExtended(const DB::ExtendedLogMessage & msg_ext, std::string & text) const; private: - const Loggers * loggers; - Options options; bool color; }; diff --git a/base/pcg-random/pcg_extras.hpp b/base/pcg-random/pcg_extras.hpp index b71e859a25f..39c91c4ecfa 100644 --- a/base/pcg-random/pcg_extras.hpp +++ b/base/pcg-random/pcg_extras.hpp @@ -447,69 +447,6 @@ inline SrcIter uneven_copy(SrcIter src_first, std::integral_constant{}); } -/* generate_to, fill in a fixed-size array of integral type using a SeedSeq - * (actually works for any random-access iterator) - */ - -template -inline void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::true_type) -{ - generator.generate(dest, dest+size); -} - -template -void generate_to_impl(SeedSeq&& generator, DestIter dest, - std::false_type) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr auto DEST_SIZE = sizeof(dest_t); - constexpr auto GEN_SIZE = sizeof(uint32_t); - - constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; - constexpr size_t FROM_ELEMS = - GEN_IS_SMALLER - ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) - : (size + (GEN_SIZE / DEST_SIZE) - 1) - / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); - // this odd code ^^^^^^^^^^^^^^^^^ is work-around for - // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 - - if (FROM_ELEMS <= 1024) { - uint32_t buffer[FROM_ELEMS]; - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - } else { - uint32_t* buffer = static_cast(malloc(GEN_SIZE * FROM_ELEMS)); - generator.generate(buffer, buffer+FROM_ELEMS); - uneven_copy(buffer, dest, dest+size); - free(static_cast(buffer)); - } -} - -template -inline void generate_to(SeedSeq&& generator, DestIter dest) -{ - typedef typename std::iterator_traits::value_type dest_t; - constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); - - generate_to_impl(std::forward(generator), dest, - std::integral_constant{}); -} - -/* generate_one, produce a value of integral type using a SeedSeq - * (optionally, we can have it produce more than one and pick which one - * we want) - */ - -template -inline UInt generate_one(SeedSeq&& generator) -{ - UInt result[N]; - generate_to(std::forward(generator), result); - return result[i]; -} - template auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) -> typename RngType::result_type @@ -517,7 +454,7 @@ auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) typedef typename RngType::result_type rtype; rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) % upper_bound; - for (;;) { + for (;;) { //-V1044 rtype r = rng() - RngType::min(); if (r >= threshold) return r % upper_bound; diff --git a/base/pcg-random/pcg_random.hpp b/base/pcg-random/pcg_random.hpp index abf83a60ee1..d9d3519a4cf 100644 --- a/base/pcg-random/pcg_random.hpp +++ b/base/pcg-random/pcg_random.hpp @@ -928,7 +928,7 @@ struct rxs_m_xs_mixin { constexpr bitcount_t shift = bits - xtypebits; constexpr bitcount_t mask = (1 << opbits) - 1; bitcount_t rshift = - opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; + opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; //-V547 internal ^= internal >> (opbits + rshift); internal *= mcg_multiplier::multiplier(); xtype result = internal >> shift; @@ -950,7 +950,7 @@ struct rxs_m_xs_mixin { internal *= mcg_unmultiplier::unmultiplier(); - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; //-V547 internal = unxorshift(internal, bits, opbits + rshift); return internal; @@ -975,7 +975,7 @@ struct rxs_m_mixin { : 2; constexpr bitcount_t shift = bits - xtypebits; constexpr bitcount_t mask = (1 << opbits) - 1; - bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; + bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; //-V547 internal ^= internal >> (opbits + rshift); internal *= mcg_multiplier::multiplier(); xtype result = internal >> shift; @@ -1366,7 +1366,7 @@ void extended::selfinit() // - any strange correlations would only be apparent if we // were to backstep the generator so that the base generator // was generating the same values again - result_type xdiff = baseclass::operator()() - baseclass::operator()(); + result_type xdiff = baseclass::operator()() - baseclass::operator()(); //-V501 for (size_t i = 0; i < table_size; ++i) { data_[i] = baseclass::operator()() ^ xdiff; } @@ -1643,22 +1643,22 @@ typedef setseq_base template -using ext_std8 = extended; template -using ext_std16 = extended; template -using ext_std32 = extended; template -using ext_std64 = extended; diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake index e3eaaf33ddb..a4a58349508 100644 --- a/cmake/find/amqpcpp.cmake +++ b/cmake/find/amqpcpp.cmake @@ -1,5 +1,5 @@ -if (OS_DARWIN AND COMPILER_GCC) - # AMQP-CPP requires libuv which cannot be built with GCC in macOS due to a bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082 +if (MISSING_INTERNAL_LIBUV_LIBRARY) + message (WARNING "Can't find internal libuv needed for AMQP-CPP library") set (ENABLE_AMQPCPP OFF CACHE INTERNAL "") endif() @@ -17,11 +17,13 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt") endif () set (USE_AMQPCPP 1) -set (AMQPCPP_LIBRARY AMQP-CPP) +set (AMQPCPP_LIBRARY amqp-cpp) set (AMQPCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include") list (APPEND AMQPCPP_INCLUDE_DIR - "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include" + "${LIBUV_INCLUDE_DIR}" "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP") +list (APPEND AMQPCPP_LIBRARY "${LIBUV_LIBRARY}") + message (STATUS "Using AMQP-CPP=${USE_AMQPCPP}: ${AMQPCPP_INCLUDE_DIR} : ${AMQPCPP_LIBRARY}") diff --git a/cmake/find/cassandra.cmake b/cmake/find/cassandra.cmake index ded25a5bf41..b6e97ff5ef8 100644 --- a/cmake/find/cassandra.cmake +++ b/cmake/find/cassandra.cmake @@ -1,5 +1,5 @@ -if (OS_DARWIN AND COMPILER_GCC) - # Cassandra requires libuv which cannot be built with GCC in macOS due to a bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082 +if (MISSING_INTERNAL_LIBUV_LIBRARY) + message (WARNING "Disabling cassandra due to missing libuv") set (ENABLE_CASSANDRA OFF CACHE INTERNAL "") endif() @@ -13,27 +13,22 @@ if (APPLE) set(CMAKE_MACOSX_RPATH ON) endif() -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") - message (ERROR "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") - message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libuv needed for Cassandra") -elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal Cassandra") -else() - set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") - set (CASSANDRA_INCLUDE_DIR - "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") - if (MAKE_STATIC_LIBRARIES) - set (LIBUV_LIBRARY uv_a) - set (CASSANDRA_LIBRARY cassandra_static) - else() - set (LIBUV_LIBRARY uv) - set (CASSANDRA_LIBRARY cassandra) - endif() - - set (USE_CASSANDRA 1) - set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + set (USE_CASSANDRA 0) + return() endif() +set (USE_CASSANDRA 1) +set (CASSANDRA_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") +if (MAKE_STATIC_LIBRARIES) + set (CASSANDRA_LIBRARY cassandra_static) +else() + set (CASSANDRA_LIBRARY cassandra) +endif() + +set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}") -message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}") diff --git a/cmake/find/libuv.cmake b/cmake/find/libuv.cmake new file mode 100644 index 00000000000..f0023209309 --- /dev/null +++ b/cmake/find/libuv.cmake @@ -0,0 +1,22 @@ +if (OS_DARWIN AND COMPILER_GCC) + message (WARNING "libuv cannot be built with GCC in macOS due to a bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082") + SET(MISSING_INTERNAL_LIBUV_LIBRARY 1) + return() +endif() + +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") + message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") + SET(MISSING_INTERNAL_LIBUV_LIBRARY 1) + return() +endif() + +if (MAKE_STATIC_LIBRARIES) + set (LIBUV_LIBRARY uv_a) +else() + set (LIBUV_LIBRARY uv) +endif() + +set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") +set (LIBUV_INCLUDE_DIR "${LIBUV_ROOT_DIR}/include") + +message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}") diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake index e0ba1d9b039..e08f45b9932 100644 --- a/cmake/find/llvm.cmake +++ b/cmake/find/llvm.cmake @@ -2,7 +2,7 @@ if (APPLE OR SPLIT_SHARED_LIBRARIES OR NOT ARCH_AMD64) set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "") endif() -option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile_expressions' option for query execution" ${ENABLE_LIBRARIES}) +option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ON) # Broken in macos. TODO: update clang, re-test, enable on Apple if (ENABLE_EMBEDDED_COMPILER AND NOT SPLIT_SHARED_LIBRARIES AND ARCH_AMD64 AND NOT (SANITIZE STREQUAL "undefined")) option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library." ${NOT_UNBUNDLED}) @@ -24,9 +24,9 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/CMakeLists.txt") endif () if (NOT USE_INTERNAL_LLVM_LIBRARY) - set (LLVM_PATHS "/usr/local/lib/llvm") + set (LLVM_PATHS "/usr/local/lib/llvm" "/usr/lib/llvm") - foreach(llvm_v 10 9 8) + foreach(llvm_v 11.1 11) if (NOT LLVM_FOUND) find_package (LLVM ${llvm_v} CONFIG PATHS ${LLVM_PATHS}) endif () @@ -102,7 +102,6 @@ LLVMRuntimeDyld LLVMX86CodeGen LLVMX86Desc LLVMX86Info -LLVMX86Utils LLVMAsmPrinter LLVMDebugInfoDWARF LLVMGlobalISel diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 6c23ce8bc91..f60f7431389 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -40,7 +40,7 @@ if (SANITIZE) # RelWithDebInfo, and downgrade optimizations to -O1 but not to -Og, to # keep the binary size down. # TODO: try compiling with -Og and with ld.gold. - set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt") + set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") diff --git a/cmake/target.cmake b/cmake/target.cmake index 7174ca3c2a9..d1a0b8f9cbf 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -12,6 +12,9 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") set (OS_DARWIN 1) add_definitions(-D OS_DARWIN) +elseif (CMAKE_SYSTEM_NAME MATCHES "SunOS") + set (OS_SUNOS 1) + add_definitions(-D OS_SUNOS) endif () if (CMAKE_CROSSCOMPILING) diff --git a/contrib/boost b/contrib/boost index a8d43d3142c..1ccbb5a522a 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit a8d43d3142cc6b26fc55bec33f7f6edb1156ab7a +Subproject commit 1ccbb5a522a571ce83b606dbc2e1011c42ecccfb diff --git a/contrib/cyrus-sasl b/contrib/cyrus-sasl index 9995bf9d8e1..e6466edfd63 160000 --- a/contrib/cyrus-sasl +++ b/contrib/cyrus-sasl @@ -1 +1 @@ -Subproject commit 9995bf9d8e14f58934d9313ac64f13780d6dd3c9 +Subproject commit e6466edfd638cc5073debe941c53345b18a09512 diff --git a/contrib/librdkafka b/contrib/librdkafka index cf11d0aa36d..43491d33ca2 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit cf11d0aa36d4738f2c9bf4377807661660f1be76 +Subproject commit 43491d33ca2826531d1e3cae70d4bf1e5249e3c9 diff --git a/contrib/llvm b/contrib/llvm index 8f24d507c1c..cfaf365cf96 160000 --- a/contrib/llvm +++ b/contrib/llvm @@ -1 +1 @@ -Subproject commit 8f24d507c1cfeec66d27f48fe74518fd278e2d25 +Subproject commit cfaf365cf96918999d09d976ec736b4518cf5d02 diff --git a/docker/packager/README.md b/docker/packager/README.md index 9fbc2d7f8b5..a745f6225fa 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -3,10 +3,10 @@ compilers and build settings. Correctly configured Docker daemon is single depen Usage: -Build deb package with `gcc-9` in `debug` mode: +Build deb package with `clang-11` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=gcc-9 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-11 --build-type=debug $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_18.14.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_18.14.2+debug_amd64.deb @@ -18,11 +18,11 @@ $ ls -l deb/test_output ``` -Build ClickHouse binary with `clang-10` and `address` sanitizer in `relwithdebuginfo` +Build ClickHouse binary with `clang-11` and `address` sanitizer in `relwithdebuginfo` mode: ``` $ mkdir $HOME/some_clickhouse -$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-10 --sanitizer=address +$ ./packager --output-dir=$HOME/some_clickhouse --package-type binary --compiler=clang-11 --sanitizer=address $ ls -l $HOME/some_clickhouse -rwxr-xr-x 1 root root 787061952 clickhouse lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index fccae66b66b..56b2af5cf84 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -37,23 +37,18 @@ RUN apt-get update \ bash \ build-essential \ ccache \ - clang-10 \ clang-11 \ - clang-tidy-10 \ clang-tidy-11 \ cmake \ curl \ - g++-9 \ - gcc-9 \ + g++-10 \ + gcc-10 \ gdb \ git \ gperf \ libicu-dev \ libreadline-dev \ - lld-10 \ lld-11 \ - llvm-10 \ - llvm-10-dev \ llvm-11 \ llvm-11-dev \ moreutils \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 902929a2644..2f1d28efe61 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -35,22 +35,15 @@ RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/1/dpkg-deb \ RUN apt-get update \ && apt-get install \ alien \ - clang-10 \ clang-11 \ - clang-tidy-10 \ clang-tidy-11 \ cmake \ debhelper \ devscripts \ - g++-9 \ - gcc-9 \ gdb \ git \ gperf \ - lld-10 \ lld-11 \ - llvm-10 \ - llvm-10-dev \ llvm-11 \ llvm-11-dev \ moreutils \ diff --git a/docker/packager/packager b/docker/packager/packager index 0e01d629db2..836f30dec42 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -181,9 +181,8 @@ if __name__ == "__main__": parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)) parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") - parser.add_argument("--compiler", choices=("clang-10", "clang-10-darwin", "clang-10-aarch64", "clang-10-freebsd", - "clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd", - "gcc-9", "gcc-10"), default="gcc-9") + parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-aarch64", "clang-11-freebsd", + "gcc-10"), default="clang-11") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") parser.add_argument("--unbundled", action="store_true") parser.add_argument("--split-binary", action="store_true") diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 48c978366c6..d302fec7417 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -64,6 +64,8 @@ RUN groupadd -r clickhouse --gid=101 \ clickhouse-client=$version \ clickhouse-server=$version ; \ fi \ + && wget --progress=bar:force:noscroll "https://github.com/tianon/gosu/releases/download/$gosu_ver/gosu-$(dpkg --print-architecture)" -O /bin/gosu \ + && chmod +x /bin/gosu \ && clickhouse-local -q 'SELECT * FROM system.build_options' \ && rm -rf \ /var/lib/apt/lists/* \ @@ -76,8 +78,6 @@ RUN groupadd -r clickhouse --gid=101 \ # we need to allow "others" access to clickhouse folder, because docker container # can be started with arbitrary uid (openshift usecase) -ADD https://github.com/tianon/gosu/releases/download/$gosu_ver/gosu-amd64 /bin/gosu - RUN locale-gen en_US.UTF-8 ENV LANG en_US.UTF-8 ENV LANGUAGE en_US:en @@ -88,10 +88,7 @@ RUN mkdir /docker-entrypoint-initdb.d COPY docker_related_config.xml /etc/clickhouse-server/config.d/ COPY entrypoint.sh /entrypoint.sh - -RUN chmod +x \ - /entrypoint.sh \ - /bin/gosu +RUN chmod +x /entrypoint.sh EXPOSE 9000 8123 9009 VOLUME /var/lib/clickhouse diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index e8653c2122e..44b9d42d6a1 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -51,13 +51,13 @@ RUN apt-get update \ # Sanitizer options for services (clickhouse-server) RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment; \ echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \ - echo "MSAN_OPTIONS='abort_on_error=1'" >> /etc/environment; \ + echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment; \ echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \ ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) ENV TSAN_OPTIONS='halt_on_error=1 history_size=7' ENV UBSAN_OPTIONS='print_stacktrace=1' -ENV MSAN_OPTIONS='abort_on_error=1' +ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' CMD sleep 1 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index a7cc398e5c9..86e1252b6f3 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -370,6 +370,10 @@ function run_tests # Depends on AWS 01801_s3_cluster + + # Depends on LLVM JIT + 01852_jit_if + 01865_jit_comparison_constant_result ) (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 1e82f137961..dd6b7467afc 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -14,11 +14,6 @@ 10G - - - - - diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index bda6f5a719d..9b100d947b0 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -1,6 +1,17 @@ #!/bin/bash set -e +mkdir -p /etc/docker/ +cat > /etc/docker/daemon.json << EOF +{ + "ipv6": true, + "fixed-cidr-v6": "fd00::/8", + "ip-forward": true, + "insecure-registries" : ["dockerhub-proxy.sas.yp-c.yandex.net:5000"], + "registry-mirrors" : ["http://dockerhub-proxy.sas.yp-c.yandex.net:5000"] +} +EOF + dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile & set +e diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 3ee3612d8da..093629e61fc 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -243,9 +243,12 @@ function run_tests profile_seconds_left=600 # Run the tests. + total_tests=$(echo "$test_files" | wc -w) + current_test=0 test_name="" for test in $test_files do + echo "$current_test of $total_tests tests complete" > status.txt # Check that both servers are alive, and restart them if they die. clickhouse-client --port $LEFT_SERVER_PORT --query "select 1 format Null" \ || { echo $test_name >> left-server-died.log ; restart ; } @@ -273,6 +276,7 @@ function run_tests profile_seconds_left=$(awk -F' ' \ 'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \ "$test_name-raw.tsv") + current_test=$((current_test + 1)) done unset TIMEFORMAT @@ -1015,6 +1019,7 @@ done wait # Create per-query flamegraphs +touch report/query-files.txt IFS=$'\n' for version in {right,left} do @@ -1205,6 +1210,12 @@ function upload_results /^metric/ { print old_sha, new_sha, $2, $3 }' \ | "${client[@]}" --query "INSERT INTO run_attributes_v1 FORMAT TSV" + # Grepping numactl results from log is too crazy, I'll just call it again. + "${client[@]}" --query "INSERT INTO run_attributes_v1 FORMAT TSV" < @@ -638,12 +639,13 @@ elif args.report == 'all-queries': for t in tables: print(t) - print(""" + print(f""" diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 253ca1b729a..6bcdc3df5cd 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -2,6 +2,7 @@ FROM ubuntu:20.04 RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends + RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip RUN mkdir /sqlancer && \ cd /sqlancer && \ diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index bd7eee4c166..c5e24b5e37f 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip3 install urllib3 testflows==1.6.74 docker-compose docker dicttoxml kazoo tzlocal +RUN pip3 install urllib3 testflows==1.6.74 docker-compose docker dicttoxml kazoo tzlocal python-dateutil numpy ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce @@ -74,4 +74,3 @@ VOLUME /var/lib/docker EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] CMD ["sh", "-c", "python3 regression.py --no-color -o classic --local --clickhouse-binary-path ${CLICKHOUSE_TESTS_SERVER_BIN_PATH} --log test.log ${TESTFLOWS_OPTS}; cat test.log | tfs report results --format json > results.json; /usr/local/bin/process_testflows_result.py || echo -e 'failure\tCannot parse results' > check_status.tsv"] - diff --git a/docs/en/commercial/cloud.md b/docs/en/commercial/cloud.md index 953a0ab5748..5a897a77db2 100644 --- a/docs/en/commercial/cloud.md +++ b/docs/en/commercial/cloud.md @@ -31,10 +31,10 @@ toc_title: Cloud ## Alibaba Cloud {#alibaba-cloud} -Alibaba Cloud Managed Service for ClickHouse. [China Site](https://www.aliyun.com/product/clickhouse) (will be available at the international site in May 2021). Provides the following key features: +[Alibaba Cloud Managed Service for ClickHouse](https://www.alibabacloud.com/product/clickhouse) provides the following key features: - Highly reliable cloud disk storage engine based on [Alibaba Cloud Apsara](https://www.alibabacloud.com/product/apsara-stack) distributed system -- Expand capacity on-demand without manual data migration +- Expand capacity on demand without manual data migration - Support single-node, single-replica, multi-node, and multi-replica architectures, and support hot and cold data tiering - Support access allow-list, one-key recovery, multi-layer network security protection, cloud disk encryption - Seamless integration with cloud log systems, databases, and data application tools diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 24ecbdc1c2c..f34107ca3d3 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -124,4 +124,11 @@ Reboot. To check if it’s working, you can use `ulimit -n` command. +## Run ClickHouse server: + +``` +cd ClickHouse +./build/programs/clickhouse-server --config-file ./programs/server/config.xml +``` + [Original article](https://clickhouse.tech/docs/en/development/build_osx/) diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 3126eb76e21..b6cb68f7ff8 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -73,9 +73,9 @@ The build requires the following components: - Git (is used only to checkout the sources, it’s not needed for the build) - CMake 3.10 or newer -- Ninja (recommended) or Make -- C++ compiler: gcc 10 or clang 8 or newer -- Linker: lld or gold (the classic GNU ld won’t work) +- Ninja +- C++ compiler: clang-11 or newer +- Linker: lld - Python (is only used inside LLVM build and it is optional) If all the components are installed, you may build in the same way as the steps above. @@ -83,7 +83,7 @@ If all the components are installed, you may build in the same way as the steps Example for Ubuntu Eoan: ``` bash sudo apt update -sudo apt install git cmake ninja-build g++ python +sudo apt install git cmake ninja-build clang++ python git clone --recursive https://github.com/ClickHouse/ClickHouse.git mkdir build && cd build cmake ../ClickHouse @@ -92,7 +92,7 @@ ninja Example for OpenSUSE Tumbleweed: ``` bash -sudo zypper install git cmake ninja gcc-c++ python lld +sudo zypper install git cmake ninja clang-c++ python lld git clone --recursive https://github.com/ClickHouse/ClickHouse.git mkdir build && cd build cmake ../ClickHouse @@ -102,7 +102,7 @@ ninja Example for Fedora Rawhide: ``` bash sudo yum update -yum --nogpg install git cmake make gcc-c++ python3 +yum --nogpg install git cmake make clang-c++ python3 git clone --recursive https://github.com/ClickHouse/ClickHouse.git mkdir build && cd build cmake ../ClickHouse diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md deleted file mode 100644 index c06115b627f..00000000000 --- a/docs/en/development/cmake-in-clickhouse.md +++ /dev/null @@ -1,288 +0,0 @@ -# CMake in ClickHouse - -## TL; DR How to make ClickHouse compile and link faster? - -Developer only! This command will likely fulfill most of your needs. Run before calling `ninja`. - -```cmake -cmake .. \ - -DCMAKE_C_COMPILER=/bin/clang-10 \ - -DCMAKE_CXX_COMPILER=/bin/clang++-10 \ - -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_CLICKHOUSE_ALL=OFF \ - -DENABLE_CLICKHOUSE_SERVER=ON \ - -DENABLE_CLICKHOUSE_CLIENT=ON \ - -DUSE_STATIC_LIBRARIES=OFF \ - -DSPLIT_SHARED_LIBRARIES=ON \ - -DENABLE_LIBRARIES=OFF \ - -DUSE_UNWIND=ON \ - -DENABLE_UTILS=OFF \ - -DENABLE_TESTS=OFF -``` - -## CMake files types - -1. ClickHouse's source CMake files (located in the root directory and in `/src`). -2. Arch-dependent CMake files (located in `/cmake/*os_name*`). -3. Libraries finders (search for contrib libraries, located in `/cmake/find`). -3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`) - -## List of CMake flags - -* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py). -* The flag name is a link to its position in the code. -* If an option's default value is itself an option, it's also a link to its position in this list. -### ClickHouse modes - -| Name | Default value | Description | Comment | -|------|---------------|-------------|---------| -| [`ENABLE_CLICKHOUSE_ALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L8) | `ON` | Enable all ClickHouse modes by default | The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only. | -| [`ENABLE_CLICKHOUSE_BENCHMARK`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L18) | `ENABLE_CLICKHOUSE_ALL` | Queries benchmarking mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-benchmark/ | -| [`ENABLE_CLICKHOUSE_CLIENT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L11) | `ENABLE_CLICKHOUSE_ALL` | Client mode (interactive tui/shell that connects to the server) | | -| [`ENABLE_CLICKHOUSE_COMPRESSOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L23) | `ENABLE_CLICKHOUSE_ALL` | Data compressor and decompressor | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-compressor/ | -| [`ENABLE_CLICKHOUSE_COPIER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L26) | `ENABLE_CLICKHOUSE_ALL` | Inter-cluster data copying mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-copier/ | -| [`ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L20) | `ENABLE_CLICKHOUSE_ALL` | Configs processor (extract values etc.) | | -| [`ENABLE_CLICKHOUSE_FORMAT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L28) | `ENABLE_CLICKHOUSE_ALL` | Queries pretty-printer and formatter with syntax highlighting | | -| [`ENABLE_CLICKHOUSE_GIT_IMPORT`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L47) | `ENABLE_CLICKHOUSE_ALL` | A tool to analyze Git repositories | https://presentations.clickhouse.tech/matemarketing_2020/ | -| [`ENABLE_CLICKHOUSE_INSTALL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L51) | `OFF` | Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only) | | -| [`ENABLE_CLICKHOUSE_LIBRARY_BRIDGE`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L43) | `ENABLE_CLICKHOUSE_ALL` | HTTP-server working like a proxy to Library dictionary source | | -| [`ENABLE_CLICKHOUSE_LOCAL`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L15) | `ENABLE_CLICKHOUSE_ALL` | Local files fast processing mode | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-local/ | -| [`ENABLE_CLICKHOUSE_OBFUSCATOR`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L32) | `ENABLE_CLICKHOUSE_ALL` | Table data obfuscator (convert real data to benchmark-ready one) | https://clickhouse.tech/docs/en/operations/utilities/clickhouse-obfuscator/ | -| [`ENABLE_CLICKHOUSE_ODBC_BRIDGE`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L37) | `ENABLE_CLICKHOUSE_ALL` | HTTP-server working like a proxy to ODBC driver | | -| [`ENABLE_CLICKHOUSE_SERVER`](https://github.com/clickhouse/clickhouse/blob/master/programs/CMakeLists.txt#L10) | `ENABLE_CLICKHOUSE_ALL` | Server mode (main mode) | | - -### External libraries -Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras. - -| Name | Default value | Description | Comment | -|------|---------------|-------------|---------| -| [`ENABLE_AMQPCPP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/amqpcpp.cmake#L6) | `ENABLE_LIBRARIES` | Enalbe AMQP-CPP | | -| [`ENABLE_AVRO`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L2) | `ENABLE_LIBRARIES` | Enable Avro | Needed when using Apache Avro serialization format | -| [`ENABLE_BASE64`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/base64.cmake#L2) | `ENABLE_LIBRARIES` | Enable base64 | | -| [`ENABLE_BROTLI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L1) | `ENABLE_LIBRARIES` | Enable brotli | | -| [`ENABLE_CAPNP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L1) | `ENABLE_LIBRARIES` | Enable Cap'n Proto | | -| [`ENABLE_CASSANDRA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cassandra.cmake#L6) | `ENABLE_LIBRARIES` | Enable Cassandra | | -| [`ENABLE_CCACHE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ccache.cmake#L22) | `ENABLE_CCACHE_BY_DEFAULT` | Speedup re-compilations using ccache (external tool) | https://ccache.dev/ | -| [`ENABLE_CLANG_TIDY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/analysis.cmake#L2) | `OFF` | Use clang-tidy static analyzer | https://clang.llvm.org/extra/clang-tidy/ | -| [`ENABLE_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L1) | `ENABLE_LIBRARIES` | Enable curl | | -| [`ENABLE_DATASKETCHES`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/datasketches.cmake#L1) | `ENABLE_LIBRARIES` | Enable DataSketches | | -| [`ENABLE_EMBEDDED_COMPILER`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L5) | `ENABLE_LIBRARIES` | Set to TRUE to enable support for 'compile_expressions' option for query execution | | -| [`ENABLE_FASTOPS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/fastops.cmake#L2) | `ENABLE_LIBRARIES` | Enable fast vectorized mathematical functions library by Mikhail Parakhin | | -| [`ENABLE_GPERF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gperf.cmake#L5) | `ENABLE_LIBRARIES` | Use gperf function hash generator tool | | -| [`ENABLE_GRPC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L8) | `ENABLE_GRPC_DEFAULT` | Use gRPC | | -| [`ENABLE_GSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L1) | `ENABLE_LIBRARIES` | Enable gsasl library | | -| [`ENABLE_H3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L1) | `ENABLE_LIBRARIES` | Enable H3 | | -| [`ENABLE_HDFS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L2) | `ENABLE_LIBRARIES` | Enable HDFS | | -| [`ENABLE_ICU`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L2) | `ENABLE_LIBRARIES` | Enable ICU | | -| [`ENABLE_LDAP`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L5) | `ENABLE_LIBRARIES` | Enable LDAP | | -| [`ENABLE_LIBPQXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libpqxx.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe libpqxx | | -| [`ENABLE_MSGPACK`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L1) | `ENABLE_LIBRARIES` | Enable msgpack library | | -| [`ENABLE_MYSQL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L2) | `ENABLE_LIBRARIES` | Enable MySQL | | -| [`ENABLE_NURAFT`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/nuraft.cmake#L1) | `ENABLE_LIBRARIES` | Enable NuRaft | | -| [`ENABLE_ODBC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ODBC library | | -| [`ENABLE_ORC`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L1) | `ENABLE_LIBRARIES` | Enable ORC | | -| [`ENABLE_PARQUET`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L2) | `ENABLE_LIBRARIES` | Enable parquet | | -| [`ENABLE_PROTOBUF`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L1) | `ENABLE_LIBRARIES` | Enable protobuf | | -| [`ENABLE_RAPIDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L1) | `ENABLE_LIBRARIES` | Use rapidjson | | -| [`ENABLE_RDKAFKA`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L1) | `ENABLE_LIBRARIES` | Enable kafka | | -| [`ENABLE_ROCKSDB`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L5) | `ENABLE_LIBRARIES` | Enable ROCKSDB | | -| [`ENABLE_S3`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L2) | `ENABLE_LIBRARIES` | Enable S3 | | -| [`ENABLE_SSL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L3) | `ENABLE_LIBRARIES` | Enable ssl | Needed when securely connecting to an external server, e.g. clickhouse-client --host ... --secure | -| [`ENABLE_STATS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/stats.cmake#L1) | `ENABLE_LIBRARIES` | Enalbe StatsLib library | | - - -### External libraries system/bundled mode - -| Name | Default value | Description | Comment | -|------|---------------|-------------|---------| -| [`USE_INTERNAL_AVRO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/avro.cmake#L11) | `ON` | Set to FALSE to use system avro library instead of bundled | | -| [`USE_INTERNAL_AWS_S3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/s3.cmake#L14) | `ON` | Set to FALSE to use system S3 instead of bundled (experimental set to OFF on your own risk) | | -| [`USE_INTERNAL_BROTLI_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/brotli.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libbrotli library instead of bundled | Many system ship only dynamic brotly libraries, so we back off to bundled by default | -| [`USE_INTERNAL_CAPNP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/capnp.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system capnproto library instead of bundled | | -| [`USE_INTERNAL_CURL`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/curl.cmake#L10) | `NOT_UNBUNDLED` | Use internal curl library | | -| [`USE_INTERNAL_DATASKETCHES_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/datasketches.cmake#L5) | `NOT_UNBUNDLED` | Set to FALSE to use system DataSketches library instead of bundled | | -| [`USE_INTERNAL_GRPC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/grpc.cmake#L25) | `NOT_UNBUNDLED` | Set to FALSE to use system gRPC library instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal gRPC framework. You can set USE_INTERNAL_GRPC_LIBRARY to OFF to force using the external gRPC framework, which should be installed in the system in this case. The external gRPC framework can be installed in the system by running sudo apt-get install libgrpc++-dev protobuf-compiler-grpc | -| [`USE_INTERNAL_GTEST_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/gtest.cmake#L3) | `NOT_UNBUNDLED` | Set to FALSE to use system Google Test instead of bundled | | -| [`USE_INTERNAL_H3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/h3.cmake#L9) | `ON` | Set to FALSE to use system h3 library instead of bundled | | -| [`USE_INTERNAL_HDFS3_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/hdfs3.cmake#L14) | `ON` | Set to FALSE to use system HDFS3 instead of bundled (experimental - set to OFF on your own risk) | | -| [`USE_INTERNAL_ICU_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/icu.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system ICU library instead of bundled | | -| [`USE_INTERNAL_LDAP_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ldap.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system *LDAP library instead of bundled | | -| [`USE_INTERNAL_LIBCXX_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L15) | `USE_INTERNAL_LIBCXX_LIBRARY_DEFAULT` | Disable to use system libcxx and libcxxabi libraries instead of bundled | | -| [`USE_INTERNAL_LIBGSASL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libgsasl.cmake#L12) | `USE_STATIC_LIBRARIES` | Set to FALSE to use system libgsasl library instead of bundled | when USE_STATIC_LIBRARIES we usually need to pick up hell a lot of dependencies for libgsasl | -| [`USE_INTERNAL_LIBXML2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/libxml2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system libxml2 library instead of bundled | | -| [`USE_INTERNAL_LLVM_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L8) | `NOT_UNBUNDLED` | Use bundled or system LLVM library. | | -| [`USE_INTERNAL_MSGPACK_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/msgpack.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system msgpack library instead of bundled | | -| [`USE_INTERNAL_MYSQL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/mysqlclient.cmake#L15) | `NOT_UNBUNDLED` | Set to FALSE to use system mysqlclient library instead of bundled | | -| [`USE_INTERNAL_ODBC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/odbc.cmake#L22) | `NOT_UNBUNDLED` | Use internal ODBC library | | -| [`USE_INTERNAL_ORC_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/orc.cmake#L11) | `ON` | Set to FALSE to use system ORC instead of bundled (experimental set to OFF on your own risk) | | -| [`USE_INTERNAL_PARQUET_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/parquet.cmake#L16) | `NOT_UNBUNDLED` | Set to FALSE to use system parquet library instead of bundled | | -| [`USE_INTERNAL_POCO_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/poco.cmake#L1) | `ON` | Use internal Poco library | | -| [`USE_INTERNAL_PROTOBUF_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/protobuf.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system protobuf instead of bundled. (Experimental. Set to OFF on your own risk) | Normally we use the internal protobuf library. You can set USE_INTERNAL_PROTOBUF_LIBRARY to OFF to force using the external protobuf library, which should be installed in the system in this case. The external protobuf library can be installed in the system by running sudo apt-get install libprotobuf-dev protobuf-compiler libprotoc-dev | -| [`USE_INTERNAL_RAPIDJSON_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rapidjson.cmake#L9) | `NOT_UNBUNDLED` | Set to FALSE to use system rapidjson library instead of bundled | | -| [`USE_INTERNAL_RDKAFKA_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rdkafka.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system librdkafka instead of the bundled | | -| [`USE_INTERNAL_RE2_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/re2.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system re2 library instead of bundled [slower] | | -| [`USE_INTERNAL_ROCKSDB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/rocksdb.cmake#L14) | `NOT_UNBUNDLED` | Set to FALSE to use system ROCKSDB library instead of bundled | | -| [`USE_INTERNAL_SNAPPY_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L10) | `NOT_UNBUNDLED` | Set to FALSE to use system snappy library instead of bundled | | -| [`USE_INTERNAL_SPARSEHASH_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sparsehash.cmake#L1) | `ON` | Set to FALSE to use system sparsehash library instead of bundled | | -| [`USE_INTERNAL_SSL_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/ssl.cmake#L12) | `NOT_UNBUNDLED` | Set to FALSE to use system *ssl library instead of bundled | | -| [`USE_INTERNAL_XZ_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/xz.cmake#L1) | `NOT_UNBUNDLED` | Set to OFF to use system xz (lzma) library instead of bundled | | -| [`USE_INTERNAL_ZLIB_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zlib.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zlib library instead of bundled | | -| [`USE_INTERNAL_ZSTD_LIBRARY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/zstd.cmake#L1) | `NOT_UNBUNDLED` | Set to FALSE to use system zstd library instead of bundled | | - - -### Other flags - -| Name | Default value | Description | Comment | -|------|---------------|-------------|---------| -| [`ADD_GDB_INDEX_FOR_GOLD`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L209) | `OFF` | Add .gdb-index to resulting binaries for gold linker. | Ignored if `lld` is used | -| [`ARCH_NATIVE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L260) | `OFF` | Add -march=native compiler flag | | -| [`CLICKHOUSE_SPLIT_BINARY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L113) | `OFF` | Make several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled | | -| [`COMPILER_PIPE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L249) | `ON` | -pipe compiler option | Less `/tmp` usage, more RAM usage. | -| [`ENABLE_CHECK_HEAVY_BUILDS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L80) | `OFF` | Don't allow C++ translation units to compile too long or to take too much memory while compiling. | Take care to add prlimit in command line before ccache, or else ccache thinks that prlimit is compiler, and clang++ is its input file, and refuses to work with multiple inputs, e.g in ccache log: [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp Another way would be to use --ccache-skip option before clang++-11 to make ccache ignore it. | -| [`ENABLE_FUZZING`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L130) | `OFF` | Fuzzy testing using libfuzzer | Implies `WITH_COVERAGE` | -| [`ENABLE_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L383) | `ON` | Enable all external libraries by default | Turns on all external libs like s3, kafka, ODBC, ... | -| [`ENABLE_MULTITARGET_CODE`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L102) | `ON` | Enable platform-dependent code | ClickHouse developers may use platform-dependent code under some macro (e.g. `ifdef ENABLE_MULTITARGET`). If turned ON, this option defines such macro. See `src/Functions/TargetSpecific.h` | -| [`ENABLE_TESTS`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L169) | `ON` | Provide unit_test_dbms target with Google.Test unit tests | If turned `ON`, assumes the user has either the system GTest library or the bundled one. | -| [`ENABLE_THINLTO`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L339) | `ON` | Clang-specific link time optimization | https://clang.llvm.org/docs/ThinLTO.html Applies to clang only. Disabled when building with tests or sanitizers. | -| [`FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L32) | `ON` | Stop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfy | If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. | -| [`GLIBC_COMPATIBILITY`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L173) | `ON` | Enable compatibility with older glibc libraries. | Only for Linux, x86_64. | -| [`LINKER_NAME`](https://github.com/clickhouse/clickhouse/blob/master/cmake/tools.cmake#L44) | `OFF` | Linker name or full path | Example values: `lld-10`, `gold`. | -| [`LLVM_HAS_RTTI`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/llvm.cmake#L40) | `ON` | Enable if LLVM was build with RTTI enabled | | -| [`MAKE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L106) | `USE_STATIC_LIBRARIES` | Disable to make shared libraries | | -| [`PARALLEL_COMPILE_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L10) | `""` | Maximum number of concurrent compilation jobs | 1 if not set | -| [`PARALLEL_LINK_JOBS`](https://github.com/clickhouse/clickhouse/blob/master/cmake/limit_jobs.cmake#L13) | `""` | Maximum number of concurrent link jobs | 1 if not set | -| [`SANITIZE`](https://github.com/clickhouse/clickhouse/blob/master/cmake/sanitize.cmake#L7) | `""` | Enable one of the code sanitizers | Possible values: - `address` (ASan) - `memory` (MSan) - `thread` (TSan) - `undefined` (UBSan) - "" (no sanitizing) | -| [`SPLIT_SHARED_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L111) | `OFF` | Keep all internal libraries as separate .so files | DEVELOPER ONLY. Faster linking if turned on. | -| [`STRIP_DEBUG_SYMBOLS_FUNCTIONS`](https://github.com/clickhouse/clickhouse/blob/master/src/Functions/CMakeLists.txt#L51) | `STRIP_DSF_DEFAULT` | Do not generate debugger info for ClickHouse functions | Provides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)." | -| [`UNBUNDLED`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L389) | `OFF` | Use system libraries instead of ones in contrib/ | We recommend avoiding this mode for production builds because we can't guarantee all needed libraries exist in your system. This mode exists for enthusiastic developers who are searching for trouble. Useful for maintainers of OS packages. | -| [`USE_INCLUDE_WHAT_YOU_USE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L444) | `OFF` | Automatically reduce unneeded includes in source code (external tool) | https://github.com/include-what-you-use/include-what-you-use | -| [`USE_LIBCXX`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/cxx.cmake#L1) | `NOT_UNBUNDLED` | Use libc++ and libc++abi instead of libstdc++ | | -| [`USE_SENTRY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/sentry.cmake#L13) | `ENABLE_LIBRARIES` | Use Sentry | | -| [`USE_SIMDJSON`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/simdjson.cmake#L1) | `ENABLE_LIBRARIES` | Use simdjson | | -| [`USE_SNAPPY`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/snappy.cmake#L1) | `ENABLE_LIBRARIES` | Enable snappy library | | -| [`USE_STATIC_LIBRARIES`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L105) | `ON` | Disable to use shared libraries | | -| [`USE_UNWIND`](https://github.com/clickhouse/clickhouse/blob/master/cmake/find/unwind.cmake#L1) | `ENABLE_LIBRARIES` | Enable libunwind (better stacktraces) | | -| [`WERROR`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L399) | `OFF` | Enable -Werror compiler option | Using system libs can cause a lot of warnings in includes (on macro expansion). | -| [`WEVERYTHING`](https://github.com/clickhouse/clickhouse/blob/master/cmake/warnings.cmake#L17) | `ON` | Enable -Weverything option with some exceptions. | Add some warnings that are not available even with -Wall -Wextra -Wpedantic. Intended for exploration of new compiler warnings that may be found useful. Applies to clang only | -| [`WITH_COVERAGE`](https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L300) | `OFF` | Profile the resulting binary/binaries | Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc | - -## Developer's guide for adding new CMake options - -### Don't be obvious. Be informative. - -Bad: -```cmake -option (ENABLE_TESTS "Enables testing" OFF) -``` - -This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose. - -Better: - -```cmake -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some -pre-conditions, leave a comment above the `option()` line and explain what it does. -The best way would be linking the docs page (if it exists). -The comment is parsed into a separate column (see below). - -Even better: - -```cmake -# implies ${TESTS_ARE_ENABLED} -# see tests/CMakeLists.txt for implementation detail. -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -### If the option's state could produce unwanted (or unusual) result, explicitly warn the user. - -Suppose you have an option that may strip debug symbols from the ClickHouse's part. -This can speed up the linking process, but produces a binary that cannot be debugged. -In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong. -Also, such options should be disabled if applies. - -Bad: -```cmake -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions. - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() - -``` -Better: - -```cmake -# Provides faster linking and lower binary size. -# Tradeoff is the inability to debug some source files with e.g. gdb -# (empty stack frames and no local variables)." -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions." - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - message(WARNING "Not generating debugger info for ClickHouse functions") - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() -``` - -### In the option's description, explain WHAT the option does rather than WHY it does something. - -The WHY explanation should be placed in the comment. -You may find that the option's name is self-descriptive. - -Bad: - -```cmake -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better: - -```cmake -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -### Don't assume other developers know as much as you do. - -In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to -the tool's docs. It won't take much of your time. - -Bad: - -```cmake -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better (combined with the above hint): - -```cmake -# https://clang.llvm.org/docs/ThinLTO.html -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -Other example, bad: - -```cmake -option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF) -``` - -Better: - -```cmake -# https://github.com/include-what-you-use/include-what-you-use -option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF) -``` - -### Prefer consistent default values. - -CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`. -Prefer the `ON/OFF` values, if possible. diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index eb0d92b7738..c07ab337cb0 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -139,6 +139,7 @@ The following settings can be specified in configuration file for given endpoint - `endpoint` — Specifies prefix of an endpoint. Mandatory. - `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. +- `region` — Specifies S3 region name. Optional. - `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`. - `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. - `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times. @@ -152,6 +153,7 @@ The following settings can be specified in configuration file for given endpoint https://storage.yandexcloud.net/my-test-bucket-768/ + diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9874e87be78..8743090df41 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -17,7 +17,7 @@ Main features: - Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified. - ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance. + ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. - Data replication support. @@ -191,9 +191,7 @@ Sparse indexes allow you to work with a very large number of table rows, because ClickHouse does not require a unique primary key. You can insert multiple rows with the same primary key. -You can use `Nullable`-typed expressions in the `PRIMARY KEY` and `ORDER BY` clauses. To allow this feature, turn on the [allow_nullable_key](../../../operations/settings/settings.md#allow-nullable-key) setting. - -The [NULLS_LAST](../../../sql-reference/statements/select/order-by.md#sorting-of-special-values) principle applies for `NULL` values in the `ORDER BY` clause. +You can use `Nullable`-typed expressions in the `PRIMARY KEY` and `ORDER BY` clauses but it is strongly discouraged. To allow this feature, turn on the [allow_nullable_key](../../../operations/settings/settings.md#allow-nullable-key) setting. The [NULLS_LAST](../../../sql-reference/statements/select/order-by.md#sorting-of-special-values) principle applies for `NULL` values in the `ORDER BY` clause. ### Selecting the Primary Key {#selecting-the-primary-key} @@ -741,6 +739,7 @@ Configuration markup: https://storage.yandexcloud.net/my-bucket/root-path/ your_access_key_id your_secret_access_key + your_base64_encoded_customer_key http://proxy1 @@ -766,6 +765,7 @@ Required parameters: - `secret_access_key` — S3 secret access key. Optional parameters: +- `region` — S3 region name. - `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. - `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. - `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index 83673cdceb6..f18acc6fd50 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -21,120 +21,121 @@ echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performa Creating a table: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` Loading data with multiple threads: @@ -206,7 +207,7 @@ LIMIT 10; Q4. The number of delays by carrier for 2007 ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier @@ -220,29 +221,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Better version of the same query: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier @@ -256,29 +257,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Better version of the same query: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -297,7 +298,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -305,7 +306,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -340,7 +341,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index a8753de6abd..2134de9d0f3 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -102,7 +102,9 @@ For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse buil - [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse` - [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse` -After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. To run `clickhouse server`, you have to additionally download [server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) and [users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) configuration files from GitHub. +After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. + +Run `sudo ./clickhouse install` if you want to install clickhouse system-wide (also with needed condiguration files, configuring users etc.). After that run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it. These builds are not recommended for use in production environments because they are less thoroughly tested, but you can do so on your own risk. They also have only a subset of ClickHouse features available. diff --git a/docs/en/operations/access-rights.md b/docs/en/operations/access-rights.md index 32f8fdcb642..9f7d2a0b95b 100644 --- a/docs/en/operations/access-rights.md +++ b/docs/en/operations/access-rights.md @@ -101,6 +101,9 @@ Privileges can be granted to a role by the [GRANT](../sql-reference/statements/g Row policy is a filter that defines which of the rows are available to a user or a role. Row policy contains filters for one particular table, as well as a list of roles and/or users which should use this row policy. +!!! note "Warning" + Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. + Management queries: - [CREATE ROW POLICY](../sql-reference/statements/create/row-policy.md) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f86e9668f00..19671b523e3 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -430,7 +430,7 @@ Keys for syslog: Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise. - format – Message format. Possible values: `bsd` and `syslog.` -## send_crash_reports {#server_configuration_parameters-logger} +## send_crash_reports {#server_configuration_parameters-send_crash_reports} Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). Enabling it, especially in pre-production environments, is highly appreciated. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 4d4730b28f5..b0c879af931 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -143,6 +143,16 @@ Possible values: Default value: 0. +## http_max_uri_size {#http-max-uri-size} + +Sets the maximum URI length of an HTTP request. + +Possible values: + +- Positive integer. + +Default value: 1048576. + ## send_progress_in_http_headers {#settings-send_progress_in_http_headers} Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses. @@ -2864,6 +2874,50 @@ Possible values: Default value: `0`. +## prefer_column_name_to_alias {#prefer-column-name-to-alias} + +Enables or disables using the original column names instead of aliases in query expressions and clauses. It especially matters when alias is the same as the column name, see [Expression Aliases](../../sql-reference/syntax.md#notes-on-usage). Enable this setting to make aliases syntax rules in ClickHouse more compatible with most other database engines. + +Possible values: + +- 0 — The column name is substituted with the alias. +- 1 — The column name is not substituted with the alias. + +Default value: `0`. + +**Example** + +The difference between enabled and disabled: + +Query: + +```sql +SET prefer_column_name_to_alias = 0; +SELECT avg(number) AS number, max(number) FROM numbers(10); +``` + +Result: + +```text +Received exception from server (version 21.5.1): +Code: 184. DB::Exception: Received from localhost:9000. DB::Exception: Aggregate function avg(number) is found inside another aggregate function in query: While processing avg(number) AS number. +``` + +Query: + +```sql +SET prefer_column_name_to_alias = 1; +SELECT avg(number) AS number, max(number) FROM numbers(10); +``` + +Result: + +```text +┌─number─┬─max(number)─┐ +│ 4.5 │ 9 │ +└────────┴─────────────┘ +``` + ## limit {#limit} Sets the maximum number of rows to get from the query result. It adjusts the value set by the [LIMIT](../../sql-reference/statements/select/limit.md#limit-clause) clause, so that the limit, specified in the query, cannot exceed the limit, set by this setting. diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index cba52586e93..096eca12e7d 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -4,63 +4,68 @@ Contains information about clusters available in the config file and the servers Columns: -- `cluster` (String) — The cluster name. -- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. -- `shard_weight` (UInt32) — The relative weight of the shard when writing data. -- `replica_num` (UInt32) — The replica number in the shard, starting from 1. -- `host_name` (String) — The host name, as specified in the config. -- `host_address` (String) — The host IP address obtained from DNS. -- `port` (UInt16) — The port to use for connecting to the server. -- `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32) - number of times this host failed to reach replica. -- `estimated_recovery_time` (UInt32) - seconds left until replica error count is zeroed and it is considered to be back to normal. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — The cluster name. +- `shard_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The shard number in the cluster, starting from 1. +- `shard_weight` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The relative weight of the shard when writing data. +- `replica_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The replica number in the shard, starting from 1. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — The host name, as specified in the config. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — The host IP address obtained from DNS. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port to use for connecting to the server. +- `is_local` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the host is local. +- `user` ([String](../../sql-reference/data-types/string.md)) — The name of the user for connecting to the server. +- `default_database` ([String](../../sql-reference/data-types/string.md)) — The default database name. +- `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica. +- `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests. +- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal. -Please note that `errors_count` is updated once per query to the cluster, but `estimated_recovery_time` is recalculated on-demand. So there could be a case of non-zero `errors_count` and zero `estimated_recovery_time`, that next query will zero `errors_count` and try to use replica as if it has no errors. +**Example** -**See also** +Query: + +```sql +SELECT * FROM system.clusters LIMIT 2 FORMAT Vertical; +``` + +Result: + +```text +Row 1: +────── +cluster: test_cluster_two_shards +shard_num: 1 +shard_weight: 1 +replica_num: 1 +host_name: 127.0.0.1 +host_address: 127.0.0.1 +port: 9000 +is_local: 1 +user: default +default_database: +errors_count: 0 +slowdowns_count: 0 +estimated_recovery_time: 0 + +Row 2: +────── +cluster: test_cluster_two_shards +shard_num: 2 +shard_weight: 1 +replica_num: 1 +host_name: 127.0.0.2 +host_address: 127.0.0.2 +port: 9000 +is_local: 0 +user: default +default_database: +errors_count: 0 +slowdowns_count: 0 +estimated_recovery_time: 0 +``` + +**See Also** - [Table engine Distributed](../../engines/table-engines/special/distributed.md) - [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) -**Example** - -```sql -:) SELECT * FROM system.clusters LIMIT 2 FORMAT Vertical; -``` - -```text -Row 1: -────── -cluster: test_cluster -shard_num: 1 -shard_weight: 1 -replica_num: 1 -host_name: clickhouse01 -host_address: 172.23.0.11 -port: 9000 -is_local: 1 -user: default -default_database: -errors_count: 0 -estimated_recovery_time: 0 - -Row 2: -────── -cluster: test_cluster -shard_num: 1 -shard_weight: 1 -replica_num: 2 -host_name: clickhouse02 -host_address: 172.23.0.12 -port: 9000 -is_local: 0 -user: default -default_database: -errors_count: 0 -estimated_recovery_time: 0 - -2 rows in set. Elapsed: 0.002 sec. -``` - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 3d3bbe2af4e..2bc1be51f19 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -21,6 +21,7 @@ Columns: - `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. - `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. - `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found. - `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. - `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). - `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) for the dictionary. @@ -60,4 +61,4 @@ SELECT * FROM system.dictionaries └──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) diff --git a/docs/en/operations/system-tables/distribution_queue.md b/docs/en/operations/system-tables/distribution_queue.md index fdc6a134da2..3b09c20874c 100644 --- a/docs/en/operations/system-tables/distribution_queue.md +++ b/docs/en/operations/system-tables/distribution_queue.md @@ -18,6 +18,10 @@ Columns: - `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in local files, in bytes. +- `broken_data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of files that has been marked as broken (due to an error). + +- `broken_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in broken files, in bytes. + - `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md index e0c74576bb6..c40f2372033 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -6,6 +6,10 @@ toc_priority: 141 Sums the arithmetic difference between consecutive rows. If the difference is negative, it is ignored. +Note that the underlying data must be sorted in order for this function to work properly. +If you would like to use this function in a materialized view, you most likely want to use the +[deltaSumTimestamp](deltasumtimestamp.md) method instead. + **Syntax** ``` sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md new file mode 100644 index 00000000000..2bfafdc81d1 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -0,0 +1,41 @@ +--- +toc_priority: 141 +--- + +# deltaSumTimestamp {#agg_functions-deltasum} + +Syntax: `deltaSumTimestamp(value, timestamp)` + +Adds the differences between consecutive rows. If the difference is negative, it is ignored. +Uses `timestamp` to order values. + +This function is primarily for materialized views that are ordered by some time bucket aligned +timestamp, for example a `toStartOfMinute` bucket. Because the rows in such a materialized view +will all have the same timestamp, it is impossible for them to be merged in the "right" order. This +function keeps track of the `timestamp` of the values it's seen, so it's possible to order the states +correctly during merging. + +To calculate the delta sum across an ordered collection you can simply use the +[deltaSum](./deltasum.md) function. + +**Arguments** + +- `value` must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). +- `timestamp` must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). + +**Returned value** + +- Accumulated differences between consecutive values, ordered by the `timestamp` parameter. + +**Example** + +```sql +SELECT deltaSumTimestamp(value, timestamp) +FROM (select number as timestamp, [0, 4, 8, 3, 0, 0, 0, 1, 3, 5][number] as value from numbers(1, 10)) +``` + +``` text +┌─deltaSumTimestamp(value, timestamp)─┐ +│ 13 │ +└─────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/max.md b/docs/en/sql-reference/aggregate-functions/reference/max.md index c462dd590a6..25173a48906 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/max.md +++ b/docs/en/sql-reference/aggregate-functions/reference/max.md @@ -4,4 +4,21 @@ toc_priority: 3 # max {#agg_function-max} -Calculates the maximum. +Aggregate function that calculates the maximum across a group of values. + +Example: + +``` +SELECT max(salary) FROM employees; +``` + +``` +SELECT department, max(salary) FROM employees GROUP BY department; +``` + +If you need non-aggregate function to choose a maximum of two values, see `greatest`: + +``` +SELECT greatest(a, b) FROM table; +``` + diff --git a/docs/en/sql-reference/aggregate-functions/reference/min.md b/docs/en/sql-reference/aggregate-functions/reference/min.md index 56b03468243..64b155857f8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/min.md +++ b/docs/en/sql-reference/aggregate-functions/reference/min.md @@ -4,4 +4,20 @@ toc_priority: 2 ## min {#agg_function-min} -Calculates the minimum. +Aggregate function that calculates the minimum across a group of values. + +Example: + +``` +SELECT min(salary) FROM employees; +``` + +``` +SELECT department, min(salary) FROM employees GROUP BY department; +``` + +If you need non-aggregate function to choose a minimum of two values, see `least`: + +``` +SELECT least(a, b) FROM table; +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 56ef598f7e7..32d174136e0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -12,6 +12,9 @@ The result depends on the order of running the query, and is nondeterministic. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function. +!!! note "Note" + Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead. + **Syntax** ``` sql diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index d95abe57510..ed07f599b91 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -23,7 +23,7 @@ The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/U Timezone agnostic unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions et cetera). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. -A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. +A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. [The list](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is also available at Wikipedia. You can explicitly set a time zone for `DateTime`-type columns when creating a table. Example: `DateTime('UTC')`. If the time zone isn’t set, ClickHouse uses the value of the [timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) parameter in the server settings or the operating system settings at the moment of the ClickHouse server start. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index b0636b0305e..69baf64ef55 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -5,7 +5,7 @@ toc_title: Dates and Times # Functions for Working with Dates and Times {#functions-for-working-with-dates-and-times} -Support for time zones +Support for time zones. All functions for working with the date and time that have a logical use for the time zone can accept a second optional time zone argument. Example: Asia/Yekaterinburg. In this case, they use the specified time zone instead of the local (default) one. @@ -23,13 +23,53 @@ SELECT └─────────────────────┴────────────┴────────────┴─────────────────────┘ ``` +## timeZone {#timezone} + +Returns the timezone of the server. + +**Syntax** + +``` sql +timeZone() +``` + +Alias: `timezone`. + +**Returned value** + +- Timezone. + +Type: [String](../../sql-reference/data-types/string.md). + ## toTimeZone {#totimezone} -Convert time or date and time to the specified time zone. The time zone is an attribute of the Date/DateTime types. The internal value (number of seconds) of the table field or of the resultset's column does not change, the column's type changes and its string representation changes accordingly. +Converts time or date and time to the specified time zone. The time zone is an attribute of the `Date` and `DateTime` data types. The internal value (number of seconds) of the table field or of the resultset's column does not change, the column's type changes and its string representation changes accordingly. + +**Syntax** + +``` sql +toTimezone(value, timezone) +``` + +Alias: `toTimezone`. + +**Arguments** + +- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Date and time. + +Type: [DateTime](../../sql-reference/data-types/datetime.md). + +**Example** + +Query: ```sql -SELECT - toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, +SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, toTypeName(time_utc) AS type_utc, toInt32(time_utc) AS int32utc, toTimeZone(time_utc, 'Asia/Yekaterinburg') AS time_yekat, @@ -40,6 +80,7 @@ SELECT toInt32(time_samoa) AS int32samoa FORMAT Vertical; ``` +Result: ```text Row 1: @@ -57,6 +98,82 @@ int32samoa: 1546300800 `toTimeZone(time_utc, 'Asia/Yekaterinburg')` changes the `DateTime('UTC')` type to `DateTime('Asia/Yekaterinburg')`. The value (Unixtimestamp) 1546300800 stays the same, but the string representation (the result of the toString() function) changes from `time_utc: 2019-01-01 00:00:00` to `time_yekat: 2019-01-01 05:00:00`. +## timeZoneOf {#timezoneof} + +Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. + +**Syntax** + +``` sql +timeZoneOf(value) +``` + +Alias: `timezoneOf`. + +**Arguments** + +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Returned value** + +- Timezone name. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: +``` sql +SELECT timezoneOf(now()); +``` + +Result: +``` text +┌─timezoneOf(now())─┐ +│ Etc/UTC │ +└───────────────────┘ +``` + +## timeZoneOffset {#timezoneoffset} + +Returns a timezone offset in seconds from [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). The function takes into account [daylight saving time](https://en.wikipedia.org/wiki/Daylight_saving_time) and historical timezone changes at the specified date and time. +[IANA timezone database](https://www.iana.org/time-zones) is used to calculate the offset. + +**Syntax** + +``` sql +timeZoneOffset(value) +``` + +Alias: `timezoneOffset`. + +**Arguments** + +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Returned value** + +- Offset from UTC in seconds. + +Type: [Int32](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT toDateTime('2021-04-21 10:20:30', 'America/New_York') AS Time, toTypeName(Time) AS Type, + timeZoneOffset(Time) AS Offset_in_seconds, (Offset_in_seconds / 3600) AS Offset_in_hours; +``` + +Result: + +``` text +┌────────────────Time─┬─Type─────────────────────────┬─Offset_in_seconds─┬─Offset_in_hours─┐ +│ 2021-04-21 10:20:30 │ DateTime('America/New_York') │ -14400 │ -4 │ +└─────────────────────┴──────────────────────────────┴───────────────────┴─────────────────┘ +``` + ## toYear {#toyear} Converts a date or date with time to a UInt16 number containing the year number (AD). diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 5cc95fe298a..c06711b3cd2 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -224,7 +224,7 @@ assumeNotNull(x) **Returned values** - The original value from the non-`Nullable` type, if it is not `NULL`. -- The default value for the non-`Nullable` type if the original value was `NULL`. +- Implementation specific result if the original value was `NULL`. **Example** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 9d7743e186f..ecb7b982157 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -696,10 +696,6 @@ Returns the server’s uptime in seconds. Returns the version of the server as a string. -## timezone() {#timezone} - -Returns the timezone of the server. - ## blockNumber {#blocknumber} Returns the sequence number of the data block where the row is located. diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 5a1fa218fad..1df7cc36995 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -7,6 +7,9 @@ toc_title: ROW POLICY Creates a [row policy](../../../operations/access-rights.md#row-policy-management), i.e. a filter used to determine which rows a user can read from a table. +!!! note "Warning" + Row policies makes sense only for users with readonly access. If user can modify table or copy partitions between tables, it defeats the restrictions of row policies. + Syntax: ``` sql diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index e9c9ed3693c..59f5b297ece 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -10,7 +10,7 @@ Makes the server "forget" about the existence of the table or materialized view. Syntax: ``` sql -DETACH TABLE|VIEW [IF EXISTS] [db.]name [PERMANENTLY] [ON CLUSTER cluster] +DETACH TABLE|VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY] ``` Detaching does not delete the data or metadata for the table or materialized view. If the table or view was not detached `PERMANENTLY`, on the next server launch the server will read the metadata and recall the table/view again. If the table or view was detached `PERMANENTLY`, there will be no automatic recall. diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 3cca29801dd..8083fa160fa 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -5,7 +5,7 @@ toc_title: EXPLAIN # EXPLAIN Statement {#explain} -Show the execution plan of a statement. +Shows the execution plan of a statement. Syntax: @@ -65,7 +65,7 @@ SelectWithUnionQuery (children 1) ### EXPLAIN SYNTAX {#explain-syntax} -Return query after syntax optimizations. +Returns query after syntax optimizations. Example: @@ -88,15 +88,16 @@ FROM ) AS `--.s` CROSS JOIN system.numbers AS c ``` + ### EXPLAIN PLAN {#explain-plan} Dump query plan steps. Settings: -- `header` — Print output header for step. Default: 0. -- `description` — Print step description. Default: 1. -- `actions` — Print detailed information about step actions. Default: 0. +- `header` — Prints output header for step. Default: 0. +- `description` — Prints step description. Default: 1. +- `actions` — Prints detailed information about step actions. Default: 0. Example: @@ -115,15 +116,16 @@ Union ``` !!! note "Note" - Step and query cost estimation is not supported. + Step and query cost estimation is not supported. ### EXPLAIN PIPELINE {#explain-pipeline} Settings: -- `header` — Print header for each output port. Default: 0. -- `graph` — Use DOT graph description language. Default: 0. -- `compact` — Print graph in compact mode if graph is enabled. Default: 1. +- `header` — Prints header for each output port. Default: 0. +- `graph` — Prints a graph described in the [DOT](https://en.wikipedia.org/wiki/DOT_(graph_description_language)) graph description language. Default: 0. +- `compact` — Prints graph in compact mode if `graph` setting is enabled. Default: 1. +- `indexes` — Shows used indexes, the number of filtered parts, and granules for every index applied. Default: 0. Supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. Example: diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 5d0eee76393..573e35d2f71 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -171,7 +171,7 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. +In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias) to `1`. ## Asterisk {#asterisk} diff --git a/docs/ja/commercial/cloud.md b/docs/ja/commercial/cloud.md index 84f58e46cdb..62fc75ecbda 100644 --- a/docs/ja/commercial/cloud.md +++ b/docs/ja/commercial/cloud.md @@ -22,7 +22,7 @@ toc_title: "\u30AF\u30E9\u30A6\u30C9" ## Alibaba Cloud {#alibaba-cloud} -ClickHouseのためのAlibaba Cloudの管理サービス [中国サイト](https://www.aliyun.com/product/clickhouse) (2021年5月に国際サイトで利用可能になります) 次の主な機能を提供します: +[ClickHouseのためのAlibaba Cloudの管理サービス](https://www.alibabacloud.com/product/clickhouse) 次の主な機能を提供します: - Alibaba Cloud Apsara分散システムをベースにした信頼性の高いクラウドディスクストレージエンジン - 手動でのデータ移行を必要とせずに、オン・デマンドで容量を拡張 diff --git a/docs/ja/getting-started/example-datasets/ontime.md b/docs/ja/getting-started/example-datasets/ontime.md index bd049e8caad..d12d8a36069 100644 --- a/docs/ja/getting-started/example-datasets/ontime.md +++ b/docs/ja/getting-started/example-datasets/ontime.md @@ -29,126 +29,127 @@ done テーブルの作成: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` データのロード: ``` bash -$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done +ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'" ``` ## パーティション済みデータのダウンロード {#download-of-prepared-partitions} @@ -212,10 +213,10 @@ LIMIT 10; Q4. 2007年のキャリア別の遅延の数 ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 -GROUP BY Carrier +GROUP BY IATA_CODE_Reporting_Airline ORDER BY count(*) DESC; ``` @@ -226,32 +227,32 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 同じクエリのより良いバージョン: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 -GROUP BY Carrier +GROUP BY IATA_CODE_Reporting_Airline ORDER BY c3 DESC ``` @@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 同じクエリのより良いバージョン: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -303,7 +304,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -311,7 +312,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -346,7 +347,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index bee1fc1318a..9a5a985c76b 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -82,6 +82,7 @@ SELECT * FROM s3_engine_table LIMIT 2; Необязательные настройки: - `access_key_id` и `secret_access_key` — указывают учетные данные для использования с данной точкой приема запроса. +- `region` — название региона S3. - `use_environment_credentials` — если `true`, S3-клиент будет пытаться получить учетные данные из переменных среды и метаданных Amazon EC2 для данной точки приема запроса. Значение по умолчанию - `false`. - `header` — добавляет указанный HTTP-заголовок к запросу на заданную точку приема запроса. Может быть определен несколько раз. - `server_side_encryption_customer_key_base64` — устанавливает необходимые заголовки для доступа к объектам S3 с шифрованием SSE-C. @@ -94,6 +95,7 @@ SELECT * FROM s3_engine_table LIMIT 2; https://storage.yandexcloud.net/my-test-bucket-768/ + diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index b8bd259167a..4cff6fcfb80 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -727,6 +727,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' https://storage.yandexcloud.net/my-bucket/root-path/ your_access_key_id your_secret_access_key + http://proxy1 http://proxy2 @@ -753,6 +754,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' Необязательные параметры: +- `region` — название региона S3. - `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из сетевого окружения, а также из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`. - `use_insecure_imds_request` — признак, нужно ли использовать менее безопасное соединение при выполнении запроса к IMDS при получении учётных данных из метаданных Amazon EC2. Значение по умолчанию: `false`. - `proxy` — конфигурация прокси-сервера для конечной точки S3. Каждый элемент `uri` внутри блока `proxy` должен содержать URL прокси-сервера. diff --git a/docs/ru/getting-started/example-datasets/ontime.md b/docs/ru/getting-started/example-datasets/ontime.md index be5b1cd1b70..d46b7e75e7f 100644 --- a/docs/ru/getting-started/example-datasets/ontime.md +++ b/docs/ru/getting-started/example-datasets/ontime.md @@ -27,126 +27,127 @@ done Создание таблицы: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` Загрузка данных: ``` bash -$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done +ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'" ``` ## Скачивание готовых партиций {#skachivanie-gotovykh-partitsii} @@ -211,7 +212,7 @@ LIMIT 10; Q4. Количество задержек по перевозчикам за 2007 год ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier @@ -225,29 +226,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Более оптимальная версия того же запроса: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier @@ -261,29 +262,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` Более оптимальная версия того же запроса: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -302,7 +303,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -310,7 +311,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -346,7 +347,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index be9e2deab74..abaf2a8f2da 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -415,7 +415,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part Значения по умолчанию: при указанном `address` - `LOG_USER`, иначе - `LOG_DAEMON` - format - формат сообщений. Возможные значения - `bsd` и `syslog` -## send_crash_reports {#server_configuration_parameters-logger} +## send_crash_reports {#server_configuration_parameters-send_crash_reports} Настройки для отправки сообщений о сбоях в команду разработчиков ядра ClickHouse через [Sentry](https://sentry.io). Включение этих настроек, особенно в pre-production среде, может дать очень ценную информацию и поможет развитию ClickHouse. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 1321ccee0b1..467d27dad32 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -119,6 +119,16 @@ ClickHouse применяет настройку в тех случаях, ко Значение по умолчанию: 0. +## http_max_uri_size {#http-max-uri-size} + +Устанавливает максимальную длину URI в HTTP-запросе. + +Возможные значения: + +- Положительное целое. + +Значение по умолчанию: 1048576. + ## send_progress_in_http_headers {#settings-send_progress_in_http_headers} Включает или отключает HTTP-заголовки `X-ClickHouse-Progress` в ответах `clickhouse-server`. @@ -2755,6 +2765,50 @@ SELECT * FROM test2; Значение по умолчанию: `0`. +## prefer_column_name_to_alias {#prefer-column-name-to-alias} + +Включает или отключает замену названий столбцов на синонимы в выражениях и секциях запросов, см. [Примечания по использованию синонимов](../../sql-reference/syntax.md#syntax-expression_aliases). Включите эту настройку, чтобы синтаксис синонимов в ClickHouse был более совместим с большинством других СУБД. + +Возможные значения: + +- 0 — синоним подставляется вместо имени столбца. +- 1 — синоним не подставляется вместо имени столбца. + +Значение по умолчанию: `0`. + +**Пример** + +Какие изменения привносит включение и выключение настройки: + +Запрос: + +```sql +SET prefer_column_name_to_alias = 0; +SELECT avg(number) AS number, max(number) FROM numbers(10); +``` + +Результат: + +```text +Received exception from server (version 21.5.1): +Code: 184. DB::Exception: Received from localhost:9000. DB::Exception: Aggregate function avg(number) is found inside another aggregate function in query: While processing avg(number) AS number. +``` + +Запрос: + +```sql +SET prefer_column_name_to_alias = 1; +SELECT avg(number) AS number, max(number) FROM numbers(10); +``` + +Результат: + +```text +┌─number─┬─max(number)─┐ +│ 4.5 │ 9 │ +└────────┴─────────────┘ +``` + ## limit {#limit} Устанавливает максимальное количество строк, возвращаемых запросом. Ограничивает сверху значение, установленное в запросе в секции [LIMIT](../../sql-reference/statements/select/limit.md#limit-clause). diff --git a/docs/ru/operations/system-tables/clusters.md b/docs/ru/operations/system-tables/clusters.md index ddc6849b44d..6bfeb8aa818 100644 --- a/docs/ru/operations/system-tables/clusters.md +++ b/docs/ru/operations/system-tables/clusters.md @@ -4,12 +4,68 @@ Столбцы: -- `cluster` (String) — имя кластера. -- `shard_num` (UInt32) — номер шарда в кластере, начиная с 1. -- `shard_weight` (UInt32) — относительный вес шарда при записи данных. -- `replica_num` (UInt32) — номер реплики в шарде, начиная с 1. -- `host_name` (String) — хост, указанный в конфигурации. -- `host_address` (String) — TIP-адрес хоста, полученный из DNS. -- `port` (UInt16) — порт, на который обращаться для соединения с сервером. -- `user` (String) — имя пользователя, которого использовать для соединения с сервером. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — имя кластера. +- `shard_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — номер шарда в кластере, начиная с 1. +- `shard_weight` ([UInt32](../../sql-reference/data-types/int-uint.md)) — относительный вес шарда при записи данных. +- `replica_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — номер реплики в шарде, начиная с 1. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — хост, указанный в конфигурации. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — TIP-адрес хоста, полученный из DNS. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт для соединения с сервером. +- `is_local` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий является ли хост локальным. +- `user` ([String](../../sql-reference/data-types/string.md)) — имя пользователя для соединения с сервером. +- `default_database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных по умолчанию. +- `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество неудачных попыток хоста получить доступ к реплике. +- `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество замен реплики из-за долгого отсутствия ответа от нее при установке соединения при хеджированных запросах. +- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество секунд до момента, когда количество ошибок будет обнулено и реплика станет доступной. +**Пример** + +Запрос: + +```sql +SELECT * FROM system.clusters LIMIT 2 FORMAT Vertical; +``` + +Результат: + +```text +Row 1: +────── +cluster: test_cluster_two_shards +shard_num: 1 +shard_weight: 1 +replica_num: 1 +host_name: 127.0.0.1 +host_address: 127.0.0.1 +port: 9000 +is_local: 1 +user: default +default_database: +errors_count: 0 +slowdowns_count: 0 +estimated_recovery_time: 0 + +Row 2: +────── +cluster: test_cluster_two_shards +shard_num: 2 +shard_weight: 1 +replica_num: 1 +host_name: 127.0.0.2 +host_address: 127.0.0.2 +port: 9000 +is_local: 0 +user: default +default_database: +errors_count: 0 +slowdowns_count: 0 +estimated_recovery_time: 0 +``` + +**Смотрите также** + +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [Настройка distributed_replica_error_cap](../../operations/settings/settings.md#settings-distributed_replica_error_cap) +- [Настройка distributed_replica_error_half_life](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/clusters) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/max.md b/docs/ru/sql-reference/aggregate-functions/reference/max.md deleted file mode 100644 index 4f61ecd051d..00000000000 --- a/docs/ru/sql-reference/aggregate-functions/reference/max.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -toc_priority: 3 ---- - -# max {#agg_function-max} - -Вычисляет максимум. - diff --git a/docs/ru/sql-reference/aggregate-functions/reference/max.md b/docs/ru/sql-reference/aggregate-functions/reference/max.md new file mode 120000 index 00000000000..ae47679c80e --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/max.md @@ -0,0 +1 @@ +../../../../en/sql-reference/aggregate-functions/reference/max.md \ No newline at end of file diff --git a/docs/ru/sql-reference/aggregate-functions/reference/min.md b/docs/ru/sql-reference/aggregate-functions/reference/min.md deleted file mode 100644 index 16dd577e790..00000000000 --- a/docs/ru/sql-reference/aggregate-functions/reference/min.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -toc_priority: 2 ---- - -## min {#agg_function-min} - -Вычисляет минимум. - diff --git a/docs/ru/sql-reference/aggregate-functions/reference/min.md b/docs/ru/sql-reference/aggregate-functions/reference/min.md new file mode 120000 index 00000000000..61417b347a8 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/min.md @@ -0,0 +1 @@ +../../../../en/sql-reference/aggregate-functions/reference/min.md \ No newline at end of file diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index f7239be0ba5..6dce79d8a89 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -12,6 +12,9 @@ toc_priority: 208 Внутренние состояния функций `quantile*` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantiles](#quantiles), это повысит эффективность запроса. +!!! note "Примечание" + Использование `quantileTDigestWeighted` [не рекомендуется для небольших наборов данных](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) и может привести к значительной ошибке. Рассмотрите возможность использования [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) в таких случаях. + **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index ebd780d0d7d..c9804f57c33 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -20,8 +20,7 @@ DateTime([timezone]) ## Использование {#ispolzovanie} Момент времени сохраняется как [Unix timestamp](https://ru.wikipedia.org/wiki/Unix-%D0%B2%D1%80%D0%B5%D0%BC%D1%8F), независимо от часового пояса и переходов на летнее/зимнее время. Дополнительно, тип `DateTime` позволяет хранить часовой пояс, единый для всей колонки, который влияет на то, как будут отображаться значения типа `DateTime` в текстовом виде и как будут парситься значения заданные в виде строк (‘2020-01-01 05:00:01’). Часовой пояс не хранится в строках таблицы (выборки), а хранится в метаданных колонки. -Список поддерживаемых временных зон можно найти в [IANA Time Zone Database](https://www.iana.org/time-zones). -Пакет `tzdata`, содержащий [базу данных часовых поясов IANA](https://www.iana.org/time-zones), должен быть установлен в системе. Используйте команду `timedatectl list-timezones` для получения списка часовых поясов, известных локальной системе. +Список поддерживаемых часовых поясов можно найти в [IANA Time Zone Database](https://www.iana.org/time-zones) или получить из базы данных, выполнив запрос `SELECT * FROM system.time_zones`. Также [список](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) есть в Википедии. Часовой пояс для столбца типа `DateTime` можно в явном виде установить при создании таблицы. Если часовой пояс не установлен, то ClickHouse использует значение параметра [timezone](../../sql-reference/data-types/datetime.md#server_configuration_parameters-timezone), установленное в конфигурации сервера или в настройках операционной системы на момент запуска сервера. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 0adccbe888b..b442a782100 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -23,13 +23,53 @@ SELECT └─────────────────────┴────────────┴────────────┴─────────────────────┘ ``` +## timeZone {#timezone} + +Возвращает часовой пояс сервера. + +**Синтаксис** + +``` sql +timeZone() +``` + +Псевдоним: `timezone`. + +**Возвращаемое значение** + +- Часовой пояс. + +Тип: [String](../../sql-reference/data-types/string.md). + ## toTimeZone {#totimezone} -Переводит дату или дату-с-временем в указанный часовой пояс. Часовой пояс (таймзона) это атрибут типов Date/DateTime, внутреннее значение (количество секунд) поля таблицы или колонки результата не изменяется, изменяется тип поля и автоматически его текстовое отображение. +Переводит дату или дату с временем в указанный часовой пояс. Часовой пояс - это атрибут типов `Date` и `DateTime`. Внутреннее значение (количество секунд) поля таблицы или результирующего столбца не изменяется, изменяется тип поля и, соответственно, его текстовое отображение. + +**Синтаксис** + +``` sql +toTimezone(value, timezone) +``` + +Псевдоним: `toTimezone`. + +**Аргументы** + +- `value` — время или дата с временем. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — часовой пояс для возвращаемого значения. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Дата с временем. + +Тип: [DateTime](../../sql-reference/data-types/datetime.md). + +**Пример** + +Запрос: ```sql -SELECT - toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, +SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, toTypeName(time_utc) AS type_utc, toInt32(time_utc) AS int32utc, toTimeZone(time_utc, 'Asia/Yekaterinburg') AS time_yekat, @@ -40,6 +80,7 @@ SELECT toInt32(time_samoa) AS int32samoa FORMAT Vertical; ``` +Результат: ```text Row 1: @@ -57,6 +98,82 @@ int32samoa: 1546300800 `toTimeZone(time_utc, 'Asia/Yekaterinburg')` изменяет тип `DateTime('UTC')` в `DateTime('Asia/Yekaterinburg')`. Значение (unix-время) 1546300800 остается неизменным, но текстовое отображение (результат функции toString()) меняется `time_utc: 2019-01-01 00:00:00` в `time_yekat: 2019-01-01 05:00:00`. +## timeZoneOf {#timezoneof} + +Возвращает название часового пояса для значений типа [DateTime](../../sql-reference/data-types/datetime.md) и [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Синтаксис** + +``` sql +timeZoneOf(value) +``` + +Псевдоним: `timezoneOf`. + +**Аргументы** + +- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Возвращаемое значение** + +- Название часового пояса. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: +``` sql +SELECT timezoneOf(now()); +``` + +Результат: +``` text +┌─timezoneOf(now())─┐ +│ Etc/UTC │ +└───────────────────┘ +``` + +## timeZoneOffset {#timezoneoffset} + +Возвращает смещение часового пояса в секундах от [UTC](https://ru.wikipedia.org/wiki/Всемирное_координированное_время). Функция учитывает [летнее время](https://ru.wikipedia.org/wiki/Летнее_время) и исторические изменения часовых поясов, которые действовали на указанную дату. +Для вычисления смещения используется информация из [базы данных IANA](https://www.iana.org/time-zones). + +**Синтаксис** + +``` sql +timeZoneOffset(value) +``` + +Псевдоним: `timezoneOffset`. + +**Аргументы** + +- `value` — Дата с временем. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Возвращаемое значение** + +- Смещение в секундах от UTC. + +Тип: [Int32](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT toDateTime('2021-04-21 10:20:30', 'Europe/Moscow') AS Time, toTypeName(Time) AS Type, + timeZoneOffset(Time) AS Offset_in_seconds, (Offset_in_seconds / 3600) AS Offset_in_hours; +``` + +Результат: + +``` text +┌────────────────Time─┬─Type──────────────────────┬─Offset_in_seconds─┬─Offset_in_hours─┐ +│ 2021-04-21 10:20:30 │ DateTime('Europe/Moscow') │ 10800 │ 3 │ +└─────────────────────┴───────────────────────────┴───────────────────┴─────────────────┘ +``` + ## toYear {#toyear} Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD). @@ -943,4 +1060,3 @@ SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime; │ 2009-02-11 14:42:23 │ └─────────────────────┘ ``` - diff --git a/docs/ru/sql-reference/functions/functions-for-nulls.md b/docs/ru/sql-reference/functions/functions-for-nulls.md index 365dba75da7..7285f803264 100644 --- a/docs/ru/sql-reference/functions/functions-for-nulls.md +++ b/docs/ru/sql-reference/functions/functions-for-nulls.md @@ -224,7 +224,7 @@ assumeNotNull(x) **Возвращаемые значения** - Исходное значение с не `Nullable` типом, если оно — не `NULL`. -- Значение по умолчанию для не `Nullable` типа, если исходное значение — `NULL`. +- Неспецифицированный результат, зависящий от реализации, если исходное значение — `NULL`. **Пример** diff --git a/docs/ru/sql-reference/statements/alter/index/index.md b/docs/ru/sql-reference/statements/alter/index/index.md index 862def5cc04..632f11ed906 100644 --- a/docs/ru/sql-reference/statements/alter/index/index.md +++ b/docs/ru/sql-reference/statements/alter/index/index.md @@ -9,8 +9,9 @@ toc_title: "Манипуляции с индексами" Добавить или удалить индекс можно с помощью операций ``` sql -ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [AFTER name] -ALTER TABLE [db].name DROP INDEX name +ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [AFTER name] +ALTER TABLE [db.]name DROP INDEX name +ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name ``` Поддерживается только таблицами семейства `*MergeTree`. @@ -18,6 +19,7 @@ ALTER TABLE [db].name DROP INDEX name Команда `ADD INDEX` добавляет описание индексов в метаданные, а `DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому они легковесные и работают мгновенно. Если индекс появился в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`. +`MATERIALIZE INDEX` - перестраивает индекс в указанной партиции. Реализовано как мутация. Запрос на изменение индексов реплицируется, сохраняя новые метаданные в ZooKeeper и применяя изменения на всех репликах. diff --git a/docs/ru/sql-reference/statements/detach.md b/docs/ru/sql-reference/statements/detach.md index d707acd7ccf..af915d38772 100644 --- a/docs/ru/sql-reference/statements/detach.md +++ b/docs/ru/sql-reference/statements/detach.md @@ -10,7 +10,7 @@ toc_title: DETACH Синтаксис: ``` sql -DETACH TABLE|VIEW [IF EXISTS] [db.]name [PERMANENTLY] [ON CLUSTER cluster] +DETACH TABLE|VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY] ``` Но ни данные, ни метаданные таблицы или материализованного представления не удаляются. При следующем запуске сервера, если не было использовано `PERMANENTLY`, сервер прочитает метаданные и снова узнает о таблице/представлении. Если таблица или представление были отключены перманентно, сервер не подключит их обратно автоматически. diff --git a/docs/ru/sql-reference/statements/explain.md b/docs/ru/sql-reference/statements/explain.md new file mode 100644 index 00000000000..ef2a670a87e --- /dev/null +++ b/docs/ru/sql-reference/statements/explain.md @@ -0,0 +1,152 @@ +--- +toc_priority: 39 +toc_title: EXPLAIN +--- + +# EXPLAIN {#explain} + +Выводит план выполнения запроса. + +Синтаксис: + +```sql +EXPLAIN [AST | SYNTAX | PLAN | PIPELINE] [setting = value, ...] SELECT ... [FORMAT ...] +``` + +Пример: + +```sql +EXPLAIN SELECT sum(number) FROM numbers(10) UNION ALL SELECT sum(number) FROM numbers(10) ORDER BY sum(number) ASC FORMAT TSV; +``` + +```sql +Union + Expression (Projection) + Expression (Before ORDER BY and SELECT) + Aggregating + Expression (Before GROUP BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) + Expression (Projection) + MergingSorted (Merge sorted streams for ORDER BY) + MergeSorting (Merge sorted blocks for ORDER BY) + PartialSorting (Sort each block for ORDER BY) + Expression (Before ORDER BY and SELECT) + Aggregating + Expression (Before GROUP BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) +``` + +## Типы EXPLAIN {#explain-types} + +- `AST` — абстрактное синтаксическое дерево. +- `SYNTAX` — текст запроса после оптимизации на уровне AST. +- `PLAN` — план выполнения запроса. +- `PIPELINE` — конвейер выполнения запроса. + +### EXPLAIN AST {#explain-ast} + +Дамп AST запроса. + +Пример: + +```sql +EXPLAIN AST SELECT 1; +``` + +```sql +SelectWithUnionQuery (children 1) + ExpressionList (children 1) + SelectQuery (children 1) + ExpressionList (children 1) + Literal UInt64_1 +``` + +### EXPLAIN SYNTAX {#explain-syntax} + +Возвращает текст запроса после применения синтаксических оптимизаций. + +Пример: + +```sql +EXPLAIN SYNTAX SELECT * FROM system.numbers AS a, system.numbers AS b, system.numbers AS c; +``` + +```sql +SELECT + `--a.number` AS `a.number`, + `--b.number` AS `b.number`, + number AS `c.number` +FROM +( + SELECT + number AS `--a.number`, + b.number AS `--b.number` + FROM system.numbers AS a + CROSS JOIN system.numbers AS b +) AS `--.s` +CROSS JOIN system.numbers AS c +``` + +### EXPLAIN PLAN {#explain-plan} + +Дамп шагов выполнения запроса. + +Настройки: + +- `header` — выводит выходной заголовок для шага. По умолчанию: 0. +- `description` — выводит описание шага. По умолчанию: 1. +- `actions` — выводит подробную информацию о действиях, выполняемых на данном шаге. По умолчанию: 0. + +Пример: + +```sql +EXPLAIN SELECT sum(number) FROM numbers(10) GROUP BY number % 4; +``` + +```sql +Union + Expression (Projection) + Expression (Before ORDER BY and SELECT) + Aggregating + Expression (Before GROUP BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) +``` + +!!! note "Примечание" + Оценка стоимости выполнения шага и запроса не поддерживается. + +### EXPLAIN PIPELINE {#explain-pipeline} + +Настройки: + +- `header` — выводит заголовок для каждого выходного порта. По умолчанию: 0. +- `graph` — выводит граф, описанный на языке [DOT](https://ru.wikipedia.org/wiki/DOT_(язык)). По умолчанию: 0. +- `compact` — выводит граф в компактном режиме, если включена настройка `graph`. По умолчанию: 1. +- `indexes` — показывает используемые индексы, количество отфильтрованных кусков и гранул для каждого примененного индекса. По умолчанию: 0. Поддерживается для таблиц семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). + +Пример: + +```sql +EXPLAIN PIPELINE SELECT sum(number) FROM numbers_mt(100000) GROUP BY number % 4; +``` + +```sql +(Union) +(Expression) +ExpressionTransform + (Expression) + ExpressionTransform + (Aggregating) + Resize 2 → 1 + AggregatingTransform × 2 + (Expression) + ExpressionTransform × 2 + (SettingQuotaAndLimits) + (ReadFromStorage) + NumbersMt × 2 0 → 1 +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/explain/) diff --git a/docs/ru/sql-reference/syntax.md b/docs/ru/sql-reference/syntax.md index 6a923fd6b58..dbbf5f92612 100644 --- a/docs/ru/sql-reference/syntax.md +++ b/docs/ru/sql-reference/syntax.md @@ -128,7 +128,7 @@ expr AS alias Например, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - В функции [CAST](sql_reference/syntax.md#type_conversion_function-cast), ключевое слово `AS` имеет другое значение. Смотрите описание функции. + В функции [CAST](../sql_reference/syntax.md#type_conversion_function-cast), ключевое слово `AS` имеет другое значение. Смотрите описание функции. - `expr` — любое выражение, которое поддерживает ClickHouse. @@ -138,7 +138,7 @@ expr AS alias Например, `SELECT "table t".column_name FROM table_name AS "table t"`. -### Примечания по использованию {#primechaniia-po-ispolzovaniiu} +### Примечания по использованию {#notes-on-usage} Синонимы являются глобальными для запроса или подзапроса, и вы можете определить синоним в любой части запроса для любого выражения. Например, `SELECT (1 AS n) + 2, n`. @@ -169,9 +169,9 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -В этом примере мы объявили таблицу `t` со столбцом `b`. Затем, при выборе данных, мы определили синоним `sum(b) AS b`. Поскольку синонимы глобальные, то ClickHouse заменил литерал `b` в выражении `argMax(a, b)` выражением `sum(b)`. Эта замена вызвала исключение. +В этом примере мы объявили таблицу `t` со столбцом `b`. Затем, при выборе данных, мы определили синоним `sum(b) AS b`. Поскольку синонимы глобальные, то ClickHouse заменил литерал `b` в выражении `argMax(a, b)` выражением `sum(b)`. Эта замена вызвала исключение. Можно изменить это поведение, включив настройку [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias), для этого нужно установить ее в значение `1`. -## Звёздочка {#zviozdochka} +## Звёздочка {#asterisk} В запросе `SELECT`, вместо выражения может стоять звёздочка. Подробнее смотрите раздел «SELECT». @@ -180,4 +180,3 @@ Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception Выражение представляет собой функцию, идентификатор, литерал, применение оператора, выражение в скобках, подзапрос, звёздочку. А также может содержать синоним. Список выражений - одно выражение или несколько выражений через запятую. Функции и операторы, в свою очередь, в качестве аргументов, могут иметь произвольные выражения. - diff --git a/docs/zh/commercial/cloud.md b/docs/zh/commercial/cloud.md index c74ffa93e9a..e0a297f51c8 100644 --- a/docs/zh/commercial/cloud.md +++ b/docs/zh/commercial/cloud.md @@ -31,7 +31,7 @@ toc_title: 云 ## 阿里云 {#alibaba-cloud} -阿里云的 ClickHouse 托管服务 [中国站](https://www.aliyun.com/product/clickhouse) (国际站于2021年5月初开放) 提供以下主要功能: +[阿里云的 ClickHouse 托管服务](https://www.alibabacloud.com/zh/product/clickhouse) 提供以下主要功能: - 基于阿里飞天分布式系统的高可靠云盘存储引擎 - 按需扩容,无需手动进行数据搬迁 diff --git a/docs/zh/getting-started/example-datasets/ontime.md b/docs/zh/getting-started/example-datasets/ontime.md index 3921f71fc7e..6d888b2196c 100644 --- a/docs/zh/getting-started/example-datasets/ontime.md +++ b/docs/zh/getting-started/example-datasets/ontime.md @@ -29,126 +29,127 @@ done 创建表结构: ``` sql -CREATE TABLE `ontime` ( - `Year` UInt16, - `Quarter` UInt8, - `Month` UInt8, - `DayofMonth` UInt8, - `DayOfWeek` UInt8, - `FlightDate` Date, - `UniqueCarrier` FixedString(7), - `AirlineID` Int32, - `Carrier` FixedString(2), - `TailNum` String, - `FlightNum` String, - `OriginAirportID` Int32, - `OriginAirportSeqID` Int32, - `OriginCityMarketID` Int32, - `Origin` FixedString(5), - `OriginCityName` String, - `OriginState` FixedString(2), - `OriginStateFips` String, - `OriginStateName` String, - `OriginWac` Int32, - `DestAirportID` Int32, - `DestAirportSeqID` Int32, - `DestCityMarketID` Int32, - `Dest` FixedString(5), - `DestCityName` String, - `DestState` FixedString(2), - `DestStateFips` String, - `DestStateName` String, - `DestWac` Int32, - `CRSDepTime` Int32, - `DepTime` Int32, - `DepDelay` Int32, - `DepDelayMinutes` Int32, - `DepDel15` Int32, - `DepartureDelayGroups` String, - `DepTimeBlk` String, - `TaxiOut` Int32, - `WheelsOff` Int32, - `WheelsOn` Int32, - `TaxiIn` Int32, - `CRSArrTime` Int32, - `ArrTime` Int32, - `ArrDelay` Int32, - `ArrDelayMinutes` Int32, - `ArrDel15` Int32, - `ArrivalDelayGroups` Int32, - `ArrTimeBlk` String, - `Cancelled` UInt8, - `CancellationCode` FixedString(1), - `Diverted` UInt8, - `CRSElapsedTime` Int32, - `ActualElapsedTime` Int32, - `AirTime` Int32, - `Flights` Int32, - `Distance` Int32, - `DistanceGroup` UInt8, - `CarrierDelay` Int32, - `WeatherDelay` Int32, - `NASDelay` Int32, - `SecurityDelay` Int32, - `LateAircraftDelay` Int32, - `FirstDepTime` String, - `TotalAddGTime` String, - `LongestAddGTime` String, - `DivAirportLandings` String, - `DivReachedDest` String, - `DivActualElapsedTime` String, - `DivArrDelay` String, - `DivDistance` String, - `Div1Airport` String, - `Div1AirportID` Int32, - `Div1AirportSeqID` Int32, - `Div1WheelsOn` String, - `Div1TotalGTime` String, - `Div1LongestGTime` String, - `Div1WheelsOff` String, - `Div1TailNum` String, - `Div2Airport` String, - `Div2AirportID` Int32, - `Div2AirportSeqID` Int32, - `Div2WheelsOn` String, - `Div2TotalGTime` String, - `Div2LongestGTime` String, - `Div2WheelsOff` String, - `Div2TailNum` String, - `Div3Airport` String, - `Div3AirportID` Int32, - `Div3AirportSeqID` Int32, - `Div3WheelsOn` String, - `Div3TotalGTime` String, - `Div3LongestGTime` String, - `Div3WheelsOff` String, - `Div3TailNum` String, - `Div4Airport` String, - `Div4AirportID` Int32, - `Div4AirportSeqID` Int32, - `Div4WheelsOn` String, - `Div4TotalGTime` String, - `Div4LongestGTime` String, - `Div4WheelsOff` String, - `Div4TailNum` String, - `Div5Airport` String, - `Div5AirportID` Int32, - `Div5AirportSeqID` Int32, - `Div5WheelsOn` String, - `Div5TotalGTime` String, - `Div5LongestGTime` String, - `Div5WheelsOff` String, - `Div5TailNum` String +CREATE TABLE `ontime` +( + `Year` UInt16, + `Quarter` UInt8, + `Month` UInt8, + `DayofMonth` UInt8, + `DayOfWeek` UInt8, + `FlightDate` Date, + `Reporting_Airline` String, + `DOT_ID_Reporting_Airline` Int32, + `IATA_CODE_Reporting_Airline` String, + `Tail_Number` Int32, + `Flight_Number_Reporting_Airline` String, + `OriginAirportID` Int32, + `OriginAirportSeqID` Int32, + `OriginCityMarketID` Int32, + `Origin` FixedString(5), + `OriginCityName` String, + `OriginState` FixedString(2), + `OriginStateFips` String, + `OriginStateName` String, + `OriginWac` Int32, + `DestAirportID` Int32, + `DestAirportSeqID` Int32, + `DestCityMarketID` Int32, + `Dest` FixedString(5), + `DestCityName` String, + `DestState` FixedString(2), + `DestStateFips` String, + `DestStateName` String, + `DestWac` Int32, + `CRSDepTime` Int32, + `DepTime` Int32, + `DepDelay` Int32, + `DepDelayMinutes` Int32, + `DepDel15` Int32, + `DepartureDelayGroups` String, + `DepTimeBlk` String, + `TaxiOut` Int32, + `WheelsOff` Int32, + `WheelsOn` Int32, + `TaxiIn` Int32, + `CRSArrTime` Int32, + `ArrTime` Int32, + `ArrDelay` Int32, + `ArrDelayMinutes` Int32, + `ArrDel15` Int32, + `ArrivalDelayGroups` Int32, + `ArrTimeBlk` String, + `Cancelled` UInt8, + `CancellationCode` FixedString(1), + `Diverted` UInt8, + `CRSElapsedTime` Int32, + `ActualElapsedTime` Int32, + `AirTime` Nullable(Int32), + `Flights` Int32, + `Distance` Int32, + `DistanceGroup` UInt8, + `CarrierDelay` Int32, + `WeatherDelay` Int32, + `NASDelay` Int32, + `SecurityDelay` Int32, + `LateAircraftDelay` Int32, + `FirstDepTime` String, + `TotalAddGTime` String, + `LongestAddGTime` String, + `DivAirportLandings` String, + `DivReachedDest` String, + `DivActualElapsedTime` String, + `DivArrDelay` String, + `DivDistance` String, + `Div1Airport` String, + `Div1AirportID` Int32, + `Div1AirportSeqID` Int32, + `Div1WheelsOn` String, + `Div1TotalGTime` String, + `Div1LongestGTime` String, + `Div1WheelsOff` String, + `Div1TailNum` String, + `Div2Airport` String, + `Div2AirportID` Int32, + `Div2AirportSeqID` Int32, + `Div2WheelsOn` String, + `Div2TotalGTime` String, + `Div2LongestGTime` String, + `Div2WheelsOff` String, + `Div2TailNum` String, + `Div3Airport` String, + `Div3AirportID` Int32, + `Div3AirportSeqID` Int32, + `Div3WheelsOn` String, + `Div3TotalGTime` String, + `Div3LongestGTime` String, + `Div3WheelsOff` String, + `Div3TailNum` String, + `Div4Airport` String, + `Div4AirportID` Int32, + `Div4AirportSeqID` Int32, + `Div4WheelsOn` String, + `Div4TotalGTime` String, + `Div4LongestGTime` String, + `Div4WheelsOff` String, + `Div4TailNum` String, + `Div5Airport` String, + `Div5AirportID` Int32, + `Div5AirportSeqID` Int32, + `Div5WheelsOn` String, + `Div5TotalGTime` String, + `Div5LongestGTime` String, + `Div5WheelsOff` String, + `Div5TailNum` String ) ENGINE = MergeTree -PARTITION BY Year -ORDER BY (Carrier, FlightDate) -SETTINGS index_granularity = 8192; + PARTITION BY Year + ORDER BY (IATA_CODE_Reporting_Airline, FlightDate) + SETTINGS index_granularity = 8192; ``` 加载数据: ``` bash -$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done +ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'" ``` ## 下载预处理好的分区数据 {#xia-zai-yu-chu-li-hao-de-fen-qu-shu-ju} @@ -212,7 +213,7 @@ LIMIT 10; Q4. 查询2007年各航空公司延误超过10分钟以上的次数 ``` sql -SELECT Carrier, count(*) +SELECT IATA_CODE_Reporting_Airline AS Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier @@ -226,29 +227,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year=2007 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year=2007 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 更好的查询版本: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year=2007 GROUP BY Carrier @@ -262,29 +263,29 @@ SELECT Carrier, c, c2, c*100/c2 as c3 FROM ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 GROUP BY Carrier -) +) q JOIN ( SELECT - Carrier, + IATA_CODE_Reporting_Airline AS Carrier, count(*) AS c2 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier -) USING Carrier +) qq USING Carrier ORDER BY c3 DESC; ``` 更好的查询版本: ``` sql -SELECT Carrier, avg(DepDelay>10)*100 AS c3 +SELECT IATA_CODE_Reporting_Airline AS Carrier, avg(DepDelay>10)*100 AS c3 FROM ontime WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier @@ -303,7 +304,7 @@ FROM from ontime WHERE DepDelay>10 GROUP BY Year -) +) q JOIN ( select @@ -311,7 +312,7 @@ JOIN count(*) as c2 from ontime GROUP BY Year -) USING (Year) +) qq USING (Year) ORDER BY Year; ``` @@ -346,7 +347,7 @@ Q10. ``` sql SELECT - min(Year), max(Year), Carrier, count(*) AS cnt, + min(Year), max(Year), IATA_CODE_Reporting_Airline AS Carrier, count(*) AS cnt, sum(ArrDelayMinutes>30) AS flights_delayed, round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime diff --git a/docs/zh/operations/system-tables/functions.md b/docs/zh/operations/system-tables/functions.md index ff716b0bc6c..8229a94cd5c 100644 --- a/docs/zh/operations/system-tables/functions.md +++ b/docs/zh/operations/system-tables/functions.md @@ -1,13 +1,30 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- +# system.functions {#system-functions} -# 系统。功能 {#system-functions} - -包含有关正常函数和聚合函数的信息。 +包含有关常规函数和聚合函数的信息。 列: - `name`(`String`) – The name of the function. - `is_aggregate`(`UInt8`) — Whether the function is aggregate. + +**举例** +``` + SELECT * FROM system.functions LIMIT 10; +``` + +``` +┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ +│ sumburConsistentHash │ 0 │ 0 │ │ +│ yandexConsistentHash │ 0 │ 0 │ │ +│ demangle │ 0 │ 0 │ │ +│ addressToLine │ 0 │ 0 │ │ +│ JSONExtractRaw │ 0 │ 0 │ │ +│ JSONExtractKeysAndValues │ 0 │ 0 │ │ +│ JSONExtract │ 0 │ 0 │ │ +│ JSONExtractString │ 0 │ 0 │ │ +│ JSONExtractFloat │ 0 │ 0 │ │ +│ JSONExtractInt │ 0 │ 0 │ │ +└──────────────────────────┴──────────────┴──────────────────┴──────────┘ + +10 rows in set. Elapsed: 0.002 sec. +``` diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md index ac5dae3a97e..4f6dbc0d87d 100644 --- a/docs/zh/sql-reference/functions/array-functions.md +++ b/docs/zh/sql-reference/functions/array-functions.md @@ -606,7 +606,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; 如果要获取数组中唯一项的列表,可以使用arrayReduce(‘groupUniqArray’,arr)。 -## arryjoin(arr) {#array-functions-join} +## arrayJoin(arr) {#array-functions-join} 一个特殊的功能。请参见[«ArrayJoin函数»](array-join.md#functions_arrayjoin)部分。 diff --git a/docs/zh/sql-reference/statements/select/join.md b/docs/zh/sql-reference/statements/select/join.md index 2976484e09a..407c8ca6101 100644 --- a/docs/zh/sql-reference/statements/select/join.md +++ b/docs/zh/sql-reference/statements/select/join.md @@ -43,15 +43,15 @@ ClickHouse中提供的其他联接类型: Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. -### ASOF加入使用 {#asof-join-usage} +### ASOF JOIN使用 {#asof-join-usage} `ASOF JOIN` 当您需要连接没有完全匹配的记录时非常有用。 -算法需要表中的特殊列。 本专栏: +该算法需要表中的特殊列。 该列需要满足: - 必须包含有序序列。 -- 可以是以下类型之一: [Int*,UInt*](../../../sql-reference/data-types/int-uint.md), [浮动\*](../../../sql-reference/data-types/float.md), [日期](../../../sql-reference/data-types/date.md), [日期时间](../../../sql-reference/data-types/datetime.md), [十进制\*](../../../sql-reference/data-types/decimal.md). -- 不能是唯一的列 `JOIN` +- 可以是以下类型之一: [Int*,UInt*](../../../sql-reference/data-types/int-uint.md), [Float\*](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal\*](../../../sql-reference/data-types/decimal.md). +- 不能是`JOIN`子句中唯一的列 语法 `ASOF JOIN ... ON`: @@ -62,9 +62,9 @@ ASOF LEFT JOIN table_2 ON equi_cond AND closest_match_cond ``` -您可以使用任意数量的相等条件和恰好一个最接近的匹配条件。 例如, `SELECT count() FROM table_1 ASOF LEFT JOIN table_2 ON table_1.a == table_2.b AND table_2.t <= table_1.t`. +您可以使用任意数量的相等条件和一个且只有一个最接近的匹配条件。 例如, `SELECT count() FROM table_1 ASOF LEFT JOIN table_2 ON table_1.a == table_2.b AND table_2.t <= table_1.t`. -支持最接近匹配的条件: `>`, `>=`, `<`, `<=`. +支持最接近匹配的运算符: `>`, `>=`, `<`, `<=`. 语法 `ASOF JOIN ... USING`: @@ -75,9 +75,9 @@ ASOF JOIN table_2 USING (equi_column1, ... equi_columnN, asof_column) ``` -`ASOF JOIN` 用途 `equi_columnX` 对于加入平等和 `asof_column` 用于加入与最接近的比赛 `table_1.asof_column >= table_2.asof_column` 条件。 该 `asof_column` 列总是在最后一个 `USING` 条款 +`table_1.asof_column >= table_2.asof_column` 中, `ASOF JOIN` 使用 `equi_columnX` 来进行条件匹配, `asof_column` 用于JOIN最接近匹配。 `asof_column` 列总是在最后一个 `USING` 条件中。 -例如,请考虑下表: +例如,参考下表: table_1 table_2 event | ev_time | user_id event | ev_time | user_id @@ -88,10 +88,10 @@ USING (equi_column1, ... equi_columnN, asof_column) event_1_2 | 13:00 | 42 event_2_3 | 13:00 | 42 ... ... -`ASOF JOIN` 可以从用户事件的时间戳 `table_1` 并找到一个事件 `table_2` 其中时间戳最接近事件的时间戳 `table_1` 对应于最接近的匹配条件。 如果可用,则相等的时间戳值是最接近的值。 在这里,该 `user_id` 列可用于连接相等和 `ev_time` 列可用于在最接近的匹配加入。 在我们的例子中, `event_1_1` 可以加入 `event_2_1` 和 `event_1_2` 可以加入 `event_2_3`,但是 `event_2_2` 不能加入。 +`ASOF JOIN`会从 `table_2` 中的用户事件时间戳找出和 `table_1` 中用户事件时间戳中最近的一个时间戳,来满足最接近匹配的条件。如果有得话,则相等的时间戳值是最接近的值。在此例中,`user_id` 列可用于条件匹配,`ev_time` 列可用于最接近匹配。在此例中,`event_1_1` 可以 JOIN `event_2_1`,`event_1_2` 可以JOIN `event_2_3`,但是 `event_2_2` 不能被JOIN。 !!! note "注" - `ASOF` 加入是 **不** 支持在 [加入我们](../../../engines/table-engines/special/join.md) 表引擎。 + `ASOF JOIN`在 [JOIN](../../../engines/table-engines/special/join.md) 表引擎中 **不受** 支持。 ## 分布式联接 {#global-join} diff --git a/programs/bash-completion/completions/clickhouse b/programs/bash-completion/completions/clickhouse index c4b77cf3f7a..fc55398dcf1 100644 --- a/programs/bash-completion/completions/clickhouse +++ b/programs/bash-completion/completions/clickhouse @@ -23,19 +23,9 @@ function _complete_for_clickhouse_entrypoint_bin() fi util="${words[1]}" - case "$prev" in - -C|--config-file|--config) - return - ;; - # Argh... This looks like a bash bug... - # Redirections are passed to the completion function - # although it is managed by the shell directly... - '<'|'>'|'>>'|[12]'>'|[12]'>>') - return - ;; - esac - - COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd" "$util")" -- "$cur") ) + if _complete_for_clickhouse_generic_bin_impl "$prev"; then + COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd" "$util")" -- "$cur") ) + fi return 0 } diff --git a/programs/bash-completion/completions/clickhouse-bootstrap b/programs/bash-completion/completions/clickhouse-bootstrap index dc8dcd5ad8d..15b2140161d 100644 --- a/programs/bash-completion/completions/clickhouse-bootstrap +++ b/programs/bash-completion/completions/clickhouse-bootstrap @@ -15,6 +15,13 @@ shopt -s extglob export _CLICKHOUSE_COMPLETION_LOADED=1 +CLICKHOUSE_QueryProcessingStage=( + complete + fetch_columns + with_mergeable_state + with_mergeable_state_after_aggregation +) + function _clickhouse_bin_exist() { [ -x "$1" ] || command -v "$1" >& /dev/null; } @@ -30,6 +37,33 @@ function _clickhouse_get_options() "$@" --help 2>&1 | awk -F '[ ,=<>]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 0, 1) == "-" && length($i) > 1) print $i; } }' | sort -u } +function _complete_for_clickhouse_generic_bin_impl() +{ + local prev=$1 && shift + + case "$prev" in + -C|--config-file|--config) + return 1 + ;; + --stage) + COMPREPLY=( $(compgen -W "${CLICKHOUSE_QueryProcessingStage[*]}" -- "$cur") ) + return 1 + ;; + --host) + COMPREPLY=( $(compgen -A hostname -- "$cur") ) + return 1 + ;; + # Argh... This looks like a bash bug... + # Redirections are passed to the completion function + # although it is managed by the shell directly... + '<'|'>'|'>>'|[12]'>'|[12]'>>') + return 1 + ;; + esac + + return 0 +} + function _complete_for_clickhouse_generic_bin() { local cur prev @@ -39,19 +73,9 @@ function _complete_for_clickhouse_generic_bin() COMPREPLY=() _get_comp_words_by_ref cur prev - case "$prev" in - -C|--config-file|--config) - return - ;; - # Argh... This looks like a bash bug... - # Redirections are passed to the completion function - # although it is managed by the shell directly... - '<'|'>'|'>>'|[12]'>'|[12]'>>') - return - ;; - esac - - COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd")" -- "$cur") ) + if _complete_for_clickhouse_generic_bin_impl "$prev"; then + COMPREPLY=( $(compgen -W "$(_clickhouse_get_options "$cmd")" -- "$cur") ) + fi return 0 } diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1aec3677b41..fbcdd0b8e9b 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1461,18 +1461,25 @@ private: // when `lambda()` function gets substituted into a wrong place. // To avoid dealing with these cases, run the check only for the // queries we were able to successfully execute. - // The final caveat is that sometimes WITH queries are not executed, + // Another caveat is that sometimes WITH queries are not executed, // if they are not referenced by the main SELECT, so they can still // have the aforementioned problems. Disable this check for such // queries, for lack of a better solution. - if (!have_error && queryHasWithClause(parsed_query.get())) + // There is also a problem that fuzzer substitutes positive Int64 + // literals or Decimal literals, which are then parsed back as + // UInt64, and suddenly duplicate alias substitition starts or stops + // working (ASTWithAlias::formatImpl) or something like that. + // So we compare not even the first and second formatting of the + // query, but second and third. + // If you have to add any more workarounds to this check, just remove + // it altogether, it's not so useful. + if (!have_error && !queryHasWithClause(parsed_query.get())) { - ASTPtr parsed_formatted_query; + ASTPtr ast_2; try { const auto * tmp_pos = query_to_send.c_str(); - parsed_formatted_query = parseQuery(tmp_pos, - tmp_pos + query_to_send.size(), + ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_send.size(), false /* allow_multi_statements */); } catch (Exception & e) @@ -1483,25 +1490,31 @@ private: } } - if (parsed_formatted_query) + if (ast_2) { - const auto formatted_twice - = parsed_formatted_query->formatForErrorMessage(); + const auto text_2 = ast_2->formatForErrorMessage(); + const auto * tmp_pos = text_2.c_str(); + const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), + false /* allow_multi_statements */); + const auto text_3 = ast_3->formatForErrorMessage(); - if (formatted_twice != query_to_send) + if (text_3 != text_2) { fmt::print(stderr, "The query formatting is broken.\n"); printChangedSettings(); - fmt::print(stderr, "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", - formatted_twice, query_to_send); + fmt::print(stderr, + "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", + text_3, text_2); fmt::print(stderr, "In more detail:\n"); - fmt::print(stderr, "AST-1:\n'{}'\n", parsed_query->dumpTree()); + fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_send); - fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", parsed_formatted_query->dumpTree()); - fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", formatted_twice); - fmt::print(stderr, "Text-1 must be equal to Text-2, but it is not.\n"); + fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree()); + fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2); + fmt::print(stderr, "AST-3 (Text-2 parsed):\n'{}'\n", ast_3->dumpTree()); + fmt::print(stderr, "Text-3 (AST-3 formatted):\n'{}'\n", text_3); + fmt::print(stderr, "Text-3 must be equal to Text-2, but it is not.\n"); exit(1); } @@ -1518,6 +1531,11 @@ private: server_exception.reset(); client_exception.reset(); have_error = false; + + // We have to reinitialize connection after errors, because it + // might have gotten into a wrong state and we'll get false + // positives about "Unknown packet from server". + connection->forceConnected(connection_parameters.timeouts); } else if (ast_to_process->formatForErrorMessage().size() > 500) { @@ -2252,30 +2270,27 @@ private: return; processed_rows += block.rows(); + + /// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset. initBlockOutputStream(block); /// The header block containing zero rows was used to initialize /// block_out_stream, do not output it. /// Also do not output too much data if we're fuzzing. - if (block.rows() != 0 - && (query_fuzzer_runs == 0 || processed_rows < 100)) - { - block_out_stream->write(block); - written_first_block = true; - } + if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100)) + return; - bool clear_progress = false; if (need_render_progress) - clear_progress = std_out.offset() > 0; - - if (clear_progress) clearProgress(); + block_out_stream->write(block); + written_first_block = true; + /// Received data block is immediately displayed to the user. block_out_stream->flush(); /// Restore progress bar after data block. - if (clear_progress) + if (need_render_progress) writeProgress(); } diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp index 6243e2c82ec..721e5acb991 100644 --- a/programs/client/QueryFuzzer.cpp +++ b/programs/client/QueryFuzzer.cpp @@ -27,6 +27,7 @@ #include #include + namespace DB { diff --git a/programs/client/QueryFuzzer.h b/programs/client/QueryFuzzer.h index 38714205967..7c79e683eb4 100644 --- a/programs/client/QueryFuzzer.h +++ b/programs/client/QueryFuzzer.h @@ -4,11 +4,14 @@ #include #include +#include + #include #include #include #include + namespace DB { @@ -50,7 +53,7 @@ struct QueryFuzzer // Some debug fields for detecting problematic ASTs with loops. // These are reset for each fuzzMain call. std::unordered_set debug_visited_nodes; - ASTPtr * debug_top_ast; + ASTPtr * debug_top_ast = nullptr; // This is the only function you have to call -- it will modify the passed diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index b07435dcf78..7977cfba79d 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -774,7 +774,7 @@ UInt128 diffHash(const CommitDiff & file_changes) } UInt128 hash_of_diff; - hasher.get128(hash_of_diff.low, hash_of_diff.high); + hasher.get128(hash_of_diff.items[0], hash_of_diff.items[1]); return hash_of_diff; } diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 739ee26cf31..96d336673d0 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -844,8 +844,8 @@ namespace fmt::print("The pidof command returned unusual output.\n"); } - WriteBufferFromFileDescriptor stderr(STDERR_FILENO); - copyData(sh->err, stderr); + WriteBufferFromFileDescriptor std_err(STDERR_FILENO); + copyData(sh->err, std_err); sh->tryWait(); } diff --git a/programs/library-bridge/SharedLibraryHandler.h b/programs/library-bridge/SharedLibraryHandler.h index 5c0334ac89f..fa476995e32 100644 --- a/programs/library-bridge/SharedLibraryHandler.h +++ b/programs/library-bridge/SharedLibraryHandler.h @@ -23,6 +23,8 @@ public: SharedLibraryHandler(const SharedLibraryHandler & other); + SharedLibraryHandler & operator=(const SharedLibraryHandler & other) = delete; + ~SharedLibraryHandler(); BlockInputStreamPtr loadAll(); diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index c92eb5c6647..fb6817fbf80 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -365,16 +365,20 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI } } -static void transformUUID(const UInt128 & src, UInt128 & dst, UInt64 seed) +static void transformUUID(const UUID & src_uuid, UUID & dst_uuid, UInt64 seed) { + const UInt128 & src = src_uuid.toUnderType(); + UInt128 & dst = dst_uuid.toUnderType(); + SipHash hash; hash.update(seed); - hash.update(reinterpret_cast(&src), sizeof(UInt128)); + hash.update(reinterpret_cast(&src), sizeof(UUID)); /// Saving version and variant from an old UUID hash.get128(reinterpret_cast(&dst)); - dst.high = (dst.high & 0x1fffffffffffffffull) | (src.high & 0xe000000000000000ull); - dst.low = (dst.low & 0xffffffffffff0fffull) | (src.low & 0x000000000000f000ull); + + dst.items[1] = (dst.items[1] & 0x1fffffffffffffffull) | (src.items[1] & 0xe000000000000000ull); + dst.items[0] = (dst.items[0] & 0xffffffffffff0fffull) | (src.items[0] & 0x000000000000f000ull); } class FixedStringModel : public IModel @@ -426,10 +430,10 @@ public: ColumnPtr generate(const IColumn & column) override { - const ColumnUInt128 & src_column = assert_cast(column); + const ColumnUUID & src_column = assert_cast(column); const auto & src_data = src_column.getData(); - auto res_column = ColumnUInt128::create(); + auto res_column = ColumnUUID::create(); auto & res_data = res_column->getData(); res_data.resize(src_data.size()); diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index e33858583c2..f4f575bb33d 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -109,7 +109,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); - nanodbc::catalog catalog(*connection); + nanodbc::catalog catalog(connection->get()); std::string catalog_name; /// In XDBC tables it is allowed to pass either database_name or schema_name in table definion, but not both of them. diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index a5a97cb8086..124a5c420f8 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -46,7 +46,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); - auto identifier = getIdentifierQuote(*connection); + auto identifier = getIdentifierQuote(connection->get()); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index e24b51f6037..ffa636e8b49 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -18,13 +18,10 @@ #include #include #include -#include "ODBCConnectionFactory.h" #include #include -#include - namespace DB { @@ -133,12 +130,12 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse auto quoting_style = IdentifierQuotingStyle::None; #if USE_ODBC - quoting_style = getQuotingStyle(*connection); + quoting_style = getQuotingStyle(connection->get()); #endif auto & read_buf = request.getStream(); auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, getContext(), max_block_size); auto input_stream = std::make_shared(input_format); - ODBCBlockOutputStream output_stream(*connection, db_name, table_name, *sample_block, getContext(), quoting_style); + ODBCBlockOutputStream output_stream(std::move(connection), db_name, table_name, *sample_block, getContext(), quoting_style); copyData(*input_stream, output_stream); writeStringBinary("Ok.", out); } @@ -148,7 +145,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse LOG_TRACE(log, "Query: {}", query); BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext()); - ODBCBlockInputStream inp(*connection, query, *sample_block, max_block_size); + ODBCBlockInputStream inp(std::move(connection), query, *sample_block, max_block_size); copyData(inp, *writer); } } diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 3a73cb9f601..c695c8db9cf 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -21,14 +21,13 @@ namespace ErrorCodes ODBCBlockInputStream::ODBCBlockInputStream( - nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) + nanodbc::ConnectionHolderPtr connection, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_) : log(&Poco::Logger::get("ODBCBlockInputStream")) , max_block_size{max_block_size_} - , connection(connection_) , query(query_str) { description.init(sample_block); - result = execute(connection, NANODBC_TEXT(query)); + result = execute(connection->get(), NANODBC_TEXT(query)); } diff --git a/programs/odbc-bridge/ODBCBlockInputStream.h b/programs/odbc-bridge/ODBCBlockInputStream.h index bbd90ce4d6c..26aa766dbcc 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.h +++ b/programs/odbc-bridge/ODBCBlockInputStream.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include "ODBCConnectionFactory.h" namespace DB @@ -13,7 +13,7 @@ namespace DB class ODBCBlockInputStream final : public IBlockInputStream { public: - ODBCBlockInputStream(nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_); + ODBCBlockInputStream(nanodbc::ConnectionHolderPtr connection, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_); String getName() const override { return "ODBC"; } @@ -36,7 +36,6 @@ private: const UInt64 max_block_size; ExternalResultDescription description; - nanodbc::connection & connection; nanodbc::result result; String query; bool finished = false; diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index e4614204178..dc965b3b2a7 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -1,5 +1,6 @@ #include "ODBCBlockOutputStream.h" +#include #include #include #include @@ -37,17 +38,16 @@ namespace query.IAST::format(settings); return buf.str(); } - } -ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::connection & connection_, +ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::ConnectionHolderPtr connection_, const std::string & remote_database_name_, const std::string & remote_table_name_, const Block & sample_block_, ContextPtr local_context_, IdentifierQuotingStyle quoting_) : log(&Poco::Logger::get("ODBCBlockOutputStream")) - , connection(connection_) + , connection(std::move(connection_)) , db_name(remote_database_name_) , table_name(remote_table_name_) , sample_block(sample_block_) @@ -69,7 +69,7 @@ void ODBCBlockOutputStream::write(const Block & block) writer->write(block); std::string query = getInsertQuery(db_name, table_name, block.getColumnsWithTypeAndName(), quoting) + values_buf.str(); - execute(connection, query); + execute(connection->get(), query); } } diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.h b/programs/odbc-bridge/ODBCBlockOutputStream.h index 0b13f7039b5..c370a0a9c7b 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.h +++ b/programs/odbc-bridge/ODBCBlockOutputStream.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include "ODBCConnectionFactory.h" namespace DB @@ -16,7 +16,7 @@ class ODBCBlockOutputStream : public IBlockOutputStream public: ODBCBlockOutputStream( - nanodbc::connection & connection_, + nanodbc::ConnectionHolderPtr connection_, const std::string & remote_database_name_, const std::string & remote_table_name_, const Block & sample_block_, @@ -29,7 +29,7 @@ public: private: Poco::Logger * log; - nanodbc::connection & connection; + nanodbc::ConnectionHolderPtr connection; std::string db_name; std::string table_name; Block sample_block; diff --git a/programs/odbc-bridge/ODBCConnectionFactory.h b/programs/odbc-bridge/ODBCConnectionFactory.h index 56961ddb2fb..41ed5f1b31f 100644 --- a/programs/odbc-bridge/ODBCConnectionFactory.h +++ b/programs/odbc-bridge/ODBCConnectionFactory.h @@ -6,53 +6,51 @@ #include #include +namespace DB +{ +namespace ErrorCodes +{ + extern const int NO_FREE_CONNECTION; +} +} namespace nanodbc { -static constexpr inline auto ODBC_CONNECT_TIMEOUT = 100; - -using ConnectionPtr = std::shared_ptr; +using ConnectionPtr = std::unique_ptr; using Pool = BorrowedObjectPool; using PoolPtr = std::shared_ptr; class ConnectionHolder { - public: - ConnectionHolder(const std::string & connection_string_, PoolPtr pool_) : connection_string(connection_string_), pool(pool_) {} + ConnectionHolder(PoolPtr pool_, ConnectionPtr connection_) : pool(pool_), connection(std::move(connection_)) {} - ~ConnectionHolder() + ConnectionHolder(const ConnectionHolder & other) = delete; + + ~ConnectionHolder() { pool->returnObject(std::move(connection)); } + + nanodbc::connection & get() const { - if (connection) - pool->returnObject(std::move(connection)); - } - - nanodbc::connection & operator*() - { - if (!connection) - { - pool->borrowObject(connection, [&]() - { - return std::make_shared(connection_string, ODBC_CONNECT_TIMEOUT); - }); - } - + assert(connection != nullptr); return *connection; } private: - std::string connection_string; PoolPtr pool; ConnectionPtr connection; }; +using ConnectionHolderPtr = std::unique_ptr; } namespace DB { +static constexpr inline auto ODBC_CONNECT_TIMEOUT = 100; +static constexpr inline auto ODBC_POOL_WAIT_TIMEOUT = 10000; + class ODBCConnectionFactory final : private boost::noncopyable { public: @@ -62,14 +60,32 @@ public: return ret; } - nanodbc::ConnectionHolder get(const std::string & connection_string, size_t pool_size) + nanodbc::ConnectionHolderPtr get(const std::string & connection_string, size_t pool_size) { std::lock_guard lock(mutex); if (!factory.count(connection_string)) factory.emplace(std::make_pair(connection_string, std::make_shared(pool_size))); - return nanodbc::ConnectionHolder(connection_string, factory[connection_string]); + auto & pool = factory[connection_string]; + + nanodbc::ConnectionPtr connection; + auto connection_available = pool->tryBorrowObject(connection, []() { return nullptr; }, ODBC_POOL_WAIT_TIMEOUT); + + if (!connection_available) + throw Exception("Unable to fetch connection within the timeout", ErrorCodes::NO_FREE_CONNECTION); + + try + { + if (!connection || !connection->connected()) + connection = std::make_unique(connection_string, ODBC_CONNECT_TIMEOUT); + } + catch (...) + { + pool->returnObject(std::move(connection)); + } + + return std::make_unique(factory[connection_string], std::move(connection)); } private: diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index 4cceaee962c..3a20148780d 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -53,7 +53,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); - bool result = isSchemaAllowed(*connection); + bool result = isSchemaAllowed(connection->get()); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try diff --git a/programs/odbc-bridge/getIdentifierQuote.cpp b/programs/odbc-bridge/getIdentifierQuote.cpp index 9ccad6e6e1d..586e3c4e5dd 100644 --- a/programs/odbc-bridge/getIdentifierQuote.cpp +++ b/programs/odbc-bridge/getIdentifierQuote.cpp @@ -3,7 +3,6 @@ #if USE_ODBC #include -#include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 729dc150e07..82392b3fee0 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -385,6 +386,11 @@ void Server::initialize(Poco::Util::Application & self) { BaseDaemon::initialize(self); logger().information("starting up"); + + LOG_INFO(&logger(), "OS Name = {}, OS Version = {}, OS Architecture = {}", + Poco::Environment::osName(), + Poco::Environment::osVersion(), + Poco::Environment::osArchitecture()); } std::string Server::getDefaultCorePath() const @@ -879,7 +885,8 @@ int Server::main(const std::vector & /*args*/) global_context->setMMappedFileCache(mmap_cache_size); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500); + constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 1024; + size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default); CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size); #endif diff --git a/programs/server/config.xml b/programs/server/config.xml index 195b6263595..df8a5266c39 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -329,6 +329,8 @@ --> 1000 + + 1073741824 /var/lib/clickhouse/ @@ -518,6 +520,33 @@ false + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index d5185b9931d..ea6ef0eb3ec 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -133,6 +133,7 @@ enum class AccessType M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_EMBEDDED_DICTIONARIES, "RELOAD EMBEDDED DICTIONARIES", GLOBAL, SYSTEM_RELOAD) /* implicitly enabled by the grant SYSTEM_RELOAD_DICTIONARY ON *.* */\ M(SYSTEM_RELOAD, "", GROUP, SYSTEM) \ + M(SYSTEM_RESTART_DISK, "SYSTEM RESTART DISK", GLOBAL, SYSTEM) \ M(SYSTEM_MERGES, "SYSTEM STOP MERGES, SYSTEM START MERGES, STOP_MERGES, START MERGES", TABLE, SYSTEM) \ M(SYSTEM_TTL_MERGES, "SYSTEM STOP TTL MERGES, SYSTEM START TTL MERGES, STOP TTL MERGES, START TTL MERGES", TABLE, SYSTEM) \ M(SYSTEM_FETCHES, "SYSTEM STOP FETCHES, SYSTEM START FETCHES, STOP FETCHES, START FETCHES", TABLE, SYSTEM) \ diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 80594f66dfc..8c38cd02f9c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -355,8 +355,9 @@ String DiskAccessStorage::getStorageParamsJSON() const std::lock_guard lock{mutex}; Poco::JSON::Object json; json.set("path", directory_path); - if (readonly) - json.set("readonly", readonly.load()); + bool readonly_loaded = readonly; + if (readonly_loaded) + json.set("readonly", Poco::Dynamic::Var{true}); std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(json, oss); diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 1cade973724..0c4d2f417c9 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -77,7 +77,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str if (enable_tls_lc_str == "starttls") params.enable_tls = LDAPClient::Params::TLSEnable::YES_STARTTLS; else if (config.getBool(ldap_server_config + ".enable_tls")) - params.enable_tls = LDAPClient::Params::TLSEnable::YES; + params.enable_tls = LDAPClient::Params::TLSEnable::YES; //-V1048 else params.enable_tls = LDAPClient::Params::TLSEnable::NO; } @@ -96,7 +96,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str else if (tls_minimum_protocol_version_lc_str == "tls1.1") params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_1; else if (tls_minimum_protocol_version_lc_str == "tls1.2") - params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_2; + params.tls_minimum_protocol_version = LDAPClient::Params::TLSProtocolVersion::TLS1_2; //-V1048 else throw Exception("Bad value for 'tls_minimum_protocol_version' entry, allowed values are: 'ssl2', 'ssl3', 'tls1.0', 'tls1.1', 'tls1.2'", ErrorCodes::BAD_ARGUMENTS); } @@ -113,7 +113,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str else if (tls_require_cert_lc_str == "try") params.tls_require_cert = LDAPClient::Params::TLSRequireCert::TRY; else if (tls_require_cert_lc_str == "demand") - params.tls_require_cert = LDAPClient::Params::TLSRequireCert::DEMAND; + params.tls_require_cert = LDAPClient::Params::TLSRequireCert::DEMAND; //-V1048 else throw Exception("Bad value for 'tls_require_cert' entry, allowed values are: 'never', 'allow', 'try', 'demand'", ErrorCodes::BAD_ARGUMENTS); } diff --git a/src/Access/GrantedRoles.cpp b/src/Access/GrantedRoles.cpp index 7930b56e44d..2659f8a3ec9 100644 --- a/src/Access/GrantedRoles.cpp +++ b/src/Access/GrantedRoles.cpp @@ -136,7 +136,7 @@ GrantedRoles::Elements GrantedRoles::getElements() const boost::range::set_difference(roles, roles_with_admin_option, std::back_inserter(element.ids)); if (!element.empty()) { - element.admin_option = false; + element.admin_option = false; //-V1048 elements.emplace_back(std::move(element)); } diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 2cdd8eabf73..cc914664149 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Access/Quota.h b/src/Access/Quota.h index 430bdca29b0..b7970b2583b 100644 --- a/src/Access/Quota.h +++ b/src/Access/Quota.h @@ -45,7 +45,7 @@ struct Quota : public IAccessEntity struct ResourceTypeInfo { - const char * const raw_name; + const char * const raw_name = ""; const String name; /// Lowercased with underscores, e.g. "result_rows". const String keyword; /// Uppercased with spaces, e.g. "RESULT ROWS". const bool output_as_float = false; diff --git a/src/Access/RolesOrUsersSet.h b/src/Access/RolesOrUsersSet.h index 0d8983c2ec3..871bb0c0758 100644 --- a/src/Access/RolesOrUsersSet.h +++ b/src/Access/RolesOrUsersSet.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Access/RowPolicy.h b/src/Access/RowPolicy.h index c9b4d69152d..723db545dbe 100644 --- a/src/Access/RowPolicy.h +++ b/src/Access/RowPolicy.h @@ -2,6 +2,7 @@ #include #include +#include #include diff --git a/src/Access/ya.make.in b/src/Access/ya.make.in index 0c5692a9bfa..1f11c7d7d2a 100644 --- a/src/Access/ya.make.in +++ b/src/Access/ya.make.in @@ -8,7 +8,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index b55e9110589..7cdef3bfe69 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -7,7 +7,7 @@ #include #include #include -#include "Core/DecimalFunctions.h" +#include namespace DB diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h index 79e8a7f3ca3..5842e7311e9 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.h +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.h @@ -6,18 +6,19 @@ namespace DB { struct Settings; -template + +template using AvgWeightedFieldType = std::conditional_t, std::conditional_t, Decimal256, Decimal128>, std::conditional_t, Float64, // no way to do UInt128 * UInt128, better cast to Float64 NearestFieldType>>; -template +template using MaxFieldType = std::conditional_t<(sizeof(AvgWeightedFieldType) > sizeof(AvgWeightedFieldType)), AvgWeightedFieldType, AvgWeightedFieldType>; -template +template class AggregateFunctionAvgWeighted final : public AggregateFunctionAvgBase< MaxFieldType, AvgWeightedFieldType, AggregateFunctionAvgWeighted> diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 9d142c5d535..e2073c37c63 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -21,7 +20,7 @@ namespace ErrorCodes /** Tracks the leftmost and rightmost (x, y) data points. */ -struct AggregateFunctionBoundingRatioData +struct AggregateFunctionBoundingRatioData //-V730 { struct Point { diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h index 81860734353..31d25c559dd 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h @@ -23,8 +23,7 @@ struct AggregationFunctionDeltaSumData T sum = 0; T last = 0; T first = 0; - bool seen_last = false; - bool seen_first = false; + bool seen = false; }; template @@ -50,18 +49,17 @@ public: { auto value = assert_cast &>(*columns[0]).getData()[row_num]; - if ((this->data(place).last < value) && this->data(place).seen_last) + if ((this->data(place).last < value) && this->data(place).seen) { this->data(place).sum += (value - this->data(place).last); } this->data(place).last = value; - this->data(place).seen_last = true; - if (!this->data(place).seen_first) + if (!this->data(place).seen) { this->data(place).first = value; - this->data(place).seen_first = true; + this->data(place).seen = true; } } @@ -70,7 +68,7 @@ public: auto place_data = &this->data(place); auto rhs_data = &this->data(rhs); - if ((place_data->last < rhs_data->first) && place_data->seen_last && rhs_data->seen_first) + if ((place_data->last < rhs_data->first) && place_data->seen && rhs_data->seen) { // If the lhs last number seen is less than the first number the rhs saw, the lhs is before // the rhs, for example [0, 2] [4, 7]. So we want to add the deltasums, but also add the @@ -80,7 +78,7 @@ public: place_data->sum += rhs_data->sum + (rhs_data->first - place_data->last); place_data->last = rhs_data->last; } - else if ((rhs_data->first < place_data->last && rhs_data->seen_last && place_data->seen_first)) + else if ((rhs_data->first < place_data->last && rhs_data->seen && place_data->seen)) { // In the opposite scenario, the lhs comes after the rhs, e.g. [4, 6] [1, 2]. Since we // assume the input interval states are sorted by time, we assume this is a counter @@ -90,16 +88,15 @@ public: place_data->sum += rhs_data->sum; place_data->last = rhs_data->last; } - else if (rhs_data->seen_first && !place_data->seen_first) + else if (rhs_data->seen && !place_data->seen) { // If we're here then the lhs is an empty state and the rhs does have some state, so // we'll just take that state. place_data->first = rhs_data->first; - place_data->seen_first = rhs_data->seen_first; place_data->last = rhs_data->last; - place_data->seen_last = rhs_data->seen_last; place_data->sum = rhs_data->sum; + place_data->seen = rhs_data->seen; } // Otherwise lhs either has data or is uninitialized, so we don't need to modify its values. @@ -110,8 +107,7 @@ public: writeIntBinary(this->data(place).sum, buf); writeIntBinary(this->data(place).first, buf); writeIntBinary(this->data(place).last, buf); - writePODBinary(this->data(place).seen_first, buf); - writePODBinary(this->data(place).seen_last, buf); + writePODBinary(this->data(place).seen, buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override @@ -119,8 +115,7 @@ public: readIntBinary(this->data(place).sum, buf); readIntBinary(this->data(place).first, buf); readIntBinary(this->data(place).last, buf); - readPODBinary(this->data(place).seen_first, buf); - readPODBinary(this->data(place).seen_last, buf); + readPODBinary(this->data(place).seen, buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp new file mode 100644 index 00000000000..70a1ac660b1 --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.cpp @@ -0,0 +1,51 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +AggregateFunctionPtr createAggregateFunctionDeltaSumTimestamp( + const String & name, + const DataTypes & arguments, + const Array & params) +{ + assertNoParameters(name, params); + + if (arguments.size() != 2) + throw Exception("Incorrect number of arguments for aggregate function " + name, + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!isInteger(arguments[0]) && !isFloat(arguments[0]) && !isDateOrDateTime(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument for aggregate function " + + name + ", must be Int, Float, Date, DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!isInteger(arguments[1]) && !isFloat(arguments[1]) && !isDateOrDateTime(arguments[1])) + throw Exception("Illegal type " + arguments[1]->getName() + " of argument for aggregate function " + + name + ", must be Int, Float, Date, DateTime", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return AggregateFunctionPtr(createWithTwoNumericOrDateTypes( + *arguments[0], *arguments[1], arguments, params)); +} +} + +void registerAggregateFunctionDeltaSumTimestamp(AggregateFunctionFactory & factory) +{ + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; + + factory.registerFunction("deltaSumTimestamp", { createAggregateFunctionDeltaSumTimestamp, properties }); +} + +} diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h new file mode 100644 index 00000000000..b7e91e9524a --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h @@ -0,0 +1,172 @@ +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include + +#include + + +namespace DB +{ + +template +struct AggregationFunctionDeltaSumTimestampData +{ + ValueType sum = 0; + ValueType first = 0; + ValueType last = 0; + TimestampType first_ts = 0; + TimestampType last_ts = 0; + bool seen = false; +}; + +template +class AggregationFunctionDeltaSumTimestamp final + : public IAggregateFunctionDataHelper< + AggregationFunctionDeltaSumTimestampData, + AggregationFunctionDeltaSumTimestamp + > +{ +public: + AggregationFunctionDeltaSumTimestamp(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper< + AggregationFunctionDeltaSumTimestampData, + AggregationFunctionDeltaSumTimestamp + >{arguments, params} + {} + + AggregationFunctionDeltaSumTimestamp() + : IAggregateFunctionDataHelper< + AggregationFunctionDeltaSumTimestampData, + AggregationFunctionDeltaSumTimestamp + >{} + {} + + bool allocatesMemoryInArena() const override { return false; } + + String getName() const override { return "deltaSumTimestamp"; } + + DataTypePtr getReturnType() const override { return std::make_shared>(); } + + void NO_SANITIZE_UNDEFINED ALWAYS_INLINE add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override + { + auto value = assert_cast &>(*columns[0]).getData()[row_num]; + auto ts = assert_cast &>(*columns[1]).getData()[row_num]; + + if ((this->data(place).last < value) && this->data(place).seen) + { + this->data(place).sum += (value - this->data(place).last); + } + + this->data(place).last = value; + this->data(place).last_ts = ts; + + if (!this->data(place).seen) + { + this->data(place).first = value; + this->data(place).seen = true; + this->data(place).first_ts = ts; + } + } + + // before returns true if lhs is before rhs or false if it is not or can't be determined + bool ALWAYS_INLINE before ( + const AggregationFunctionDeltaSumTimestampData * lhs, + const AggregationFunctionDeltaSumTimestampData * rhs + ) const + { + if (lhs->last_ts < rhs->first_ts) + { + return true; + } + if (lhs->last_ts == rhs->first_ts && (lhs->last_ts < rhs->last_ts || lhs->first_ts < rhs->first_ts)) + { + return true; + } + return false; + } + + void NO_SANITIZE_UNDEFINED ALWAYS_INLINE merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override + { + auto place_data = &this->data(place); + auto rhs_data = &this->data(rhs); + + if (!place_data->seen && rhs_data->seen) + { + place_data->sum = rhs_data->sum; + place_data->seen = true; + place_data->first = rhs_data->first; + place_data->first_ts = rhs_data->first_ts; + place_data->last = rhs_data->last; + place_data->last_ts = rhs_data->last_ts; + } + else if (place_data->seen && !rhs_data->seen) + return; + else if (before(place_data, rhs_data)) + { + // This state came before the rhs state + + if (rhs_data->first > place_data->last) + place_data->sum += (rhs_data->first - place_data->last); + place_data->sum += rhs_data->sum; + place_data->last = rhs_data->last; + place_data->last_ts = rhs_data->last_ts; + } + else if (before(rhs_data, place_data)) + { + // This state came after the rhs state + + if (place_data->first > rhs_data->last) + place_data->sum += (place_data->first - rhs_data->last); + place_data->sum += rhs_data->sum; + place_data->first = rhs_data->first; + place_data->first_ts = rhs_data->first_ts; + } + else + { + // If none of those conditions matched, it means both states we are merging have all + // same timestamps. We have to pick either the smaller or larger value so that the + // result is deterministic. + + if (place_data->first < rhs_data->first) + { + place_data->first = rhs_data->first; + place_data->last = rhs_data->last; + } + } + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override + { + writeIntBinary(this->data(place).sum, buf); + writeIntBinary(this->data(place).first, buf); + writeIntBinary(this->data(place).first_ts, buf); + writeIntBinary(this->data(place).last, buf); + writeIntBinary(this->data(place).last_ts, buf); + writePODBinary(this->data(place).seen, buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override + { + readIntBinary(this->data(place).sum, buf); + readIntBinary(this->data(place).first, buf); + readIntBinary(this->data(place).first_ts, buf); + readIntBinary(this->data(place).last, buf); + readIntBinary(this->data(place).last_ts, buf); + readPODBinary(this->data(place).seen, buf); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override + { + assert_cast &>(to).getData().push_back(this->data(place).sum); + } +}; + +} diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 01d3de8dd5e..9b7853f8665 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -10,7 +10,6 @@ #include #include -#include namespace DB { diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 1111ec00768..06292992a2f 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -298,12 +298,7 @@ public: if (size) { typename ColumnVector::Container & data_to = assert_cast &>(arr_to.getData()).getData(); - if constexpr (is_big_int_v) - // is data_to empty? we should probably use std::vector::insert then - for (auto it = this->data(place).value.begin(); it != this->data(place).value.end(); it++) - data_to.push_back(*it); - else - data_to.insert(this->data(place).value.begin(), this->data(place).value.end()); + data_to.insert(this->data(place).value.begin(), this->data(place).value.end()); } } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp index b4d32a3a908..cc4b2145977 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes namespace { +/// TODO Proper support for Decimal256. template struct MovingSum { diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index fa38c3642d1..daaa7a69baf 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -39,7 +39,7 @@ struct MovingData using Array = PODArray; Array value; /// Prefix sums. - T sum = 0; + T sum{}; void NO_SANITIZE_UNDEFINED add(T val, Arena * arena) { @@ -70,9 +70,9 @@ struct MovingAvgData : public MovingData T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const { if (idx < window_size) - return this->value[idx] / window_size; + return this->value[idx] / T(window_size); else - return (this->value[idx] - this->value[idx - window_size]) / window_size; + return (this->value[idx] - this->value[idx - window_size]) / T(window_size); } }; diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index 70e2430f666..5268f84e7ac 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -221,7 +221,7 @@ private: } public: - AggregateFunctionHistogramData() + AggregateFunctionHistogramData() //-V730 : size(0) , lower_bound(std::numeric_limits::max()) , upper_bound(std::numeric_limits::lowest()) diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h index 3625114d8c7..5ef8e3bc75a 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.h +++ b/src/AggregateFunctions/AggregateFunctionIf.h @@ -114,6 +114,16 @@ public: nested_func->merge(place, rhs, arena); } + void mergeBatch( + size_t batch_size, + AggregateDataPtr * places, + size_t place_offset, + const AggregateDataPtr * rhs, + Arena * arena) const override + { + nested_func->mergeBatch(batch_size, places, place_offset, rhs, arena); + } + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override { nested_func->serialize(place, buf); diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h index 67b1310f13c..365bc45f7b7 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -21,7 +20,7 @@ #include -#include + namespace DB { struct Settings; diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 58f9bf2d47d..de1f4fad296 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -183,7 +183,7 @@ public: /** For strings. Short strings are stored in the object itself, and long strings are allocated separately. * NOTE It could also be suitable for arrays of numbers. */ -struct SingleValueDataString +struct SingleValueDataString //-V730 { private: using Self = SingleValueDataString; diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h index 645ec308f4e..aaafb8cc2c0 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.h +++ b/src/AggregateFunctions/AggregateFunctionOrFill.h @@ -197,6 +197,18 @@ public: place[size_of_data] |= rhs[size_of_data]; } + void mergeBatch( + size_t batch_size, + AggregateDataPtr * places, + size_t place_offset, + const AggregateDataPtr * rhs, + Arena * arena) const override + { + nested_function->mergeBatch(batch_size, places, place_offset, rhs, arena); + for (size_t i = 0; i < batch_size; ++i) + (places[i] + place_offset)[size_of_data] |= rhs[i][size_of_data]; + } + void serialize( ConstAggregateDataPtr place, WriteBuffer & buf) const override diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp index 0771d8a7b9e..3830a8d38d0 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -101,13 +101,14 @@ AggregateFunctionPtr createAggregateFunctionQuantile(const std::string & name, c if (which.idx == TypeIndex::Decimal32) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Decimal64) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Decimal128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::Decimal256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::DateTime64) return std::make_shared>(argument_types, params); - //if (which.idx == TypeIndex::Decimal256) return std::make_shared>(argument_types, params); } if constexpr (supportBigInt()) { if (which.idx == TypeIndex::Int128) return std::make_shared>(argument_types, params); + if (which.idx == TypeIndex::UInt128) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::Int256) return std::make_shared>(argument_types, params); if (which.idx == TypeIndex::UInt256) return std::make_shared>(argument_types, params); } diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/src/AggregateFunctions/AggregateFunctionStatistics.cpp index 8d227e3dcae..0b4088a281f 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.cpp +++ b/src/AggregateFunctions/AggregateFunctionStatistics.cpp @@ -37,7 +37,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(const std::string & assertNoParameters(name, parameters); assertBinary(name, argument_types); - AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); + AggregateFunctionPtr res(createWithTwoBasicNumericTypes(*argument_types[0], *argument_types[1], argument_types)); if (!res) throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName() + " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h index f67da4bb1aa..97ea9ad8917 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.h +++ b/src/AggregateFunctions/AggregateFunctionStatistics.h @@ -14,7 +14,7 @@ namespace DB { struct Settings; -namespace +namespace detail { /// This function returns true if both values are large and comparable. @@ -73,7 +73,7 @@ public: Float64 factor = static_cast(count * source.count) / total_count; Float64 delta = mean - source.mean; - if (areComparable(count, source.count)) + if (detail::areComparable(count, source.count)) mean = (source.count * source.mean + count * mean) / total_count; else mean = source.mean + delta * (static_cast(count) / total_count); @@ -303,7 +303,7 @@ public: Float64 left_delta = left_mean - source.left_mean; Float64 right_delta = right_mean - source.right_mean; - if (areComparable(count, source.count)) + if (detail::areComparable(count, source.count)) { left_mean = (source.count * source.left_mean + count * left_mean) / total_count; right_mean = (source.count * source.right_mean + count * right_mean) / total_count; diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp index 8989244c09a..dc040921f53 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.cpp @@ -42,7 +42,7 @@ AggregateFunctionPtr createAggregateFunctionStatisticsBinary(const std::string & assertNoParameters(name, parameters); assertBinary(name, argument_types); - AggregateFunctionPtr res(createWithTwoNumericTypes(*argument_types[0], *argument_types[1], argument_types)); + AggregateFunctionPtr res(createWithTwoBasicNumericTypes(*argument_types[0], *argument_types[1], argument_types)); if (!res) throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName() + " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index 0ff6ff2e65a..e57781fac33 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -132,11 +132,10 @@ public: static_cast(static_cast(*columns[1]).getData()[row_num])); else { - if constexpr (std::is_same_v) + if constexpr (IsDecimalNumber) { this->data(place).add(static_cast( - static_cast(*columns[0]).getData()[row_num].value - )); + static_cast(*columns[0]).getData()[row_num].value)); } else this->data(place).add( diff --git a/src/AggregateFunctions/AggregateFunctionTopK.cpp b/src/AggregateFunctions/AggregateFunctionTopK.cpp index 985773b92d2..ba26247ce31 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.cpp +++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp @@ -2,9 +2,10 @@ #include #include #include +#include #include #include -#include "registerAggregateFunctions.h" + #define TOP_K_MAX_SIZE 0xFFFFFF diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 3b8cfef6f2e..30d69b8ca7b 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -10,7 +10,6 @@ #include #include -#include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 39568722523..1ed5daaa10a 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -70,7 +70,7 @@ struct AggregateFunctionUniqHLL12Data }; template <> -struct AggregateFunctionUniqHLL12Data +struct AggregateFunctionUniqHLL12Data { using Set = HyperLogLogWithSmallSetOptimization; Set set; @@ -134,16 +134,14 @@ template struct AggregateFunctionUniqTraits { static UInt64 hash(T x) { - if constexpr (std::is_same_v) - { - return sipHash64(x); - } - else if constexpr (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) { return ext::bit_cast(x); } else if constexpr (sizeof(T) <= sizeof(UInt64)) + { return x; + } else return DefaultHash64(x); } @@ -185,7 +183,7 @@ struct OneAdder UInt128 key; SipHash hash; hash.update(value.data, value.size); - hash.get128(key.low, key.high); + hash.get128(key); data.set.insert(key); } diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp index b989498b0d0..205ac015a68 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.cpp @@ -3,11 +3,13 @@ #include #include +#include + #include #include #include -#include "registerAggregateFunctions.h" + namespace DB { diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index 84b14267854..cb623bcb761 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp index dcca9969c62..4305139b587 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp @@ -1,11 +1,11 @@ #include #include #include +#include #include #include #include #include -#include "registerAggregateFunctions.h" namespace DB diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index e55a952473a..a1599a0935b 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -1,6 +1,7 @@ #pragma once -#include +#include + #include #include @@ -36,7 +37,7 @@ struct Settings; */ template -struct __attribute__((__packed__)) AggregateFunctionUniqUpToData +struct AggregateFunctionUniqUpToData { /** If count == threshold + 1 - this means that it is "overflowed" (values greater than threshold). * In this case (for example, after calling the merge function), the `data` array does not necessarily contain the initialized values @@ -44,7 +45,17 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData * then set count to `threshold + 1`, and values from another state are not copied. */ UInt8 count = 0; - T data[0]; + char data_ptr[0]; + + T load(size_t i) const + { + return unalignedLoad(data_ptr + i * sizeof(T)); + } + + void store(size_t i, const T & x) + { + unalignedStore(data_ptr + i * sizeof(T), x); + } size_t size() const { @@ -61,12 +72,12 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData /// Linear search for the matching element. for (size_t i = 0; i < count; ++i) - if (data[i] == x) + if (load(i) == x) return; /// Did not find the matching element. If there is room for one more element, insert it. if (count < threshold) - data[count] = x; + store(count, x); /// After increasing count, the state may be overflowed. ++count; @@ -85,7 +96,7 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData } for (size_t i = 0; i < rhs.count; ++i) - insert(rhs.data[i], threshold); + insert(rhs.load(i), threshold); } void write(WriteBuffer & wb, UInt8 threshold) const @@ -94,7 +105,7 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData /// Write values only if the state is not overflowed. Otherwise, they are not needed, and only the fact that the state is overflowed is important. if (count <= threshold) - wb.write(reinterpret_cast(data), count * sizeof(data[0])); + wb.write(data_ptr, count * sizeof(T)); } void read(ReadBuffer & rb, UInt8 threshold) @@ -102,7 +113,7 @@ struct __attribute__((__packed__)) AggregateFunctionUniqUpToData readBinary(count, rb); if (count <= threshold) - rb.read(reinterpret_cast(data), count * sizeof(data[0])); + rb.read(data_ptr, count * sizeof(T)); } /// ALWAYS_INLINE is required to have better code layout for uniqUpTo function diff --git a/src/AggregateFunctions/CMakeLists.txt b/src/AggregateFunctions/CMakeLists.txt index 72cae5428a0..b6dbf2b4eb0 100644 --- a/src/AggregateFunctions/CMakeLists.txt +++ b/src/AggregateFunctions/CMakeLists.txt @@ -24,6 +24,6 @@ list(REMOVE_ITEM clickhouse_aggregate_functions_headers add_library(clickhouse_aggregate_functions ${clickhouse_aggregate_functions_sources}) target_link_libraries(clickhouse_aggregate_functions PRIVATE dbms PUBLIC ${CITYHASH_LIBRARIES}) -if(ENABLE_TESTS) - add_subdirectory(tests) +if(ENABLE_EXAMPLES) + add_subdirectory(examples) endif() diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index 4717efc03d2..7e512058deb 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -15,12 +15,12 @@ M(Float32) \ M(Float64) -// No UInt128 here because of the name conflict #define FOR_NUMERIC_TYPES(M) \ M(UInt8) \ M(UInt16) \ M(UInt32) \ M(UInt64) \ + M(UInt128) \ M(UInt256) \ M(Int8) \ M(Int16) \ @@ -110,6 +110,8 @@ static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argu if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate>(std::forward(args)...); if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate>(std::forward(args)...); if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate>(std::forward(args)...); + if (which.idx == TypeIndex::UInt128) return new AggregateFunctionTemplate>(std::forward(args)...); + if (which.idx == TypeIndex::UInt256) return new AggregateFunctionTemplate>(std::forward(args)...); return nullptr; } @@ -120,11 +122,11 @@ static IAggregateFunction * createWithNumericBasedType(const IDataType & argumen if (f) return f; - /// expects that DataTypeDate based on UInt16, DataTypeDateTime based on UInt32 and UUID based on UInt128 + /// expects that DataTypeDate based on UInt16, DataTypeDateTime based on UInt32 WhichDataType which(argument_type); if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate(std::forward(args)...); if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTemplate(std::forward(args)...); - if (which.idx == TypeIndex::UUID) return new AggregateFunctionTemplate(std::forward(args)...); + if (which.idx == TypeIndex::UUID) return new AggregateFunctionTemplate(std::forward(args)...); return nullptr; } @@ -185,6 +187,69 @@ static IAggregateFunction * createWithTwoNumericTypes(const IDataType & first_ty return nullptr; } +template class AggregateFunctionTemplate, typename... TArgs> +static IAggregateFunction * createWithTwoBasicNumericTypesSecond(const IDataType & second_type, TArgs && ... args) +{ + WhichDataType which(second_type); +#define DISPATCH(TYPE) \ + if (which.idx == TypeIndex::TYPE) return new AggregateFunctionTemplate(std::forward(args)...); + FOR_BASIC_NUMERIC_TYPES(DISPATCH) +#undef DISPATCH + return nullptr; +} + +template