Merge branch 'ClickHouse:master' into jsonpath

This commit is contained in:
l1tsolaiki 2021-06-17 18:25:26 +03:00 committed by GitHub
commit f2d451e0ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
628 changed files with 9676 additions and 7660 deletions

6
.gitmodules vendored
View File

@ -210,9 +210,6 @@
[submodule "contrib/fast_float"]
path = contrib/fast_float
url = https://github.com/fastfloat/fast_float
[submodule "contrib/libpqxx"]
path = contrib/libpqxx
url = https://github.com/jtv/libpqxx
[submodule "contrib/libpq"]
path = contrib/libpq
url = https://github.com/ClickHouse-Extras/libpq
@ -231,3 +228,6 @@
[submodule "contrib/yaml-cpp"]
path = contrib/yaml-cpp
url = https://github.com/ClickHouse-Extras/yaml-cpp.git
[submodule "contrib/libpqxx"]
path = contrib/libpqxx
url = https://github.com/ClickHouse-Extras/libpqxx.git

View File

@ -1,14 +1,22 @@
#include "IBridge.h"
#include <IO/ReadHelpers.h>
#include <boost/program_options.hpp>
#include <Poco/Net/NetException.h>
#include <Poco/Util/HelpFormatter.h>
#include <Common/StringUtils/StringUtils.h>
#include <Formats/registerFormats.h>
#include <common/logger_useful.h>
#include <common/range.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/SensitiveDataMasker.h>
#include <common/errnoToString.h>
#include <IO/ReadHelpers.h>
#include <Formats/registerFormats.h>
#include <Server/HTTP/HTTPServer.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <sys/time.h>
#include <sys/resource.h>
#if USE_ODBC
# include <Poco/Data/ODBC/Connector.h>
@ -163,6 +171,31 @@ void IBridge::initialize(Application & self)
max_server_connections = config().getUInt("max-server-connections", 1024);
keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10);
struct rlimit limit;
const UInt64 gb = 1024 * 1024 * 1024;
/// Set maximum RSS to 1 GiB.
limit.rlim_max = limit.rlim_cur = gb;
if (setrlimit(RLIMIT_RSS, &limit))
LOG_WARNING(log, "Unable to set maximum RSS to 1GB: {} (current rlim_cur={}, rlim_max={})",
errnoToString(errno), limit.rlim_cur, limit.rlim_max);
if (!getrlimit(RLIMIT_RSS, &limit))
LOG_INFO(log, "RSS limit: cur={}, max={}", limit.rlim_cur, limit.rlim_max);
try
{
const auto oom_score = toString(config().getUInt64("bridge_oom_score", 500));
WriteBufferFromFile buf("/proc/self/oom_score_adj");
buf.write(oom_score.data(), oom_score.size());
buf.close();
LOG_INFO(log, "OOM score is set to {}", oom_score);
}
catch (const Exception & e)
{
LOG_WARNING(log, "Failed to set OOM score, error: {}", e.what());
}
initializeTerminationAndSignalProcessing();
ServerApplication::initialize(self); // NOLINT
@ -214,7 +247,7 @@ int IBridge::main(const std::vector<std::string> & /*args*/)
server.stop();
for (size_t count : ext::range(1, 6))
for (size_t count : collections::range(1, 6))
{
if (server.currentConnections() == 0)
break;

View File

@ -91,10 +91,12 @@ struct DecomposedFloat
/// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic.
/// This function is generic, big integers (128, 256 bit) are supported as well.
/// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers.
/// (note that we need total order)
/// Returns -1, 0 or 1.
template <typename Int>
int compare(Int rhs)
int compare(Int rhs) const
{
if (rhs == 0)
return sign();
@ -137,10 +139,11 @@ struct DecomposedFloat
if (normalized_exponent() >= static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
return is_negative() ? -1 : 1;
using UInt = make_unsigned_t<Int>;
using UInt = std::conditional_t<(sizeof(Int) > sizeof(typename Traits::UInt)), make_unsigned_t<Int>, typename Traits::UInt>;
UInt uint_rhs = rhs < 0 ? -rhs : rhs;
/// Smaller octave: abs(rhs) < abs(float)
/// FYI, TIL: octave is also called "binade", https://en.wikipedia.org/wiki/Binade
if (uint_rhs < (static_cast<UInt>(1) << normalized_exponent()))
return is_negative() ? -1 : 1;
@ -154,11 +157,11 @@ struct DecomposedFloat
bool large_and_always_integer = normalized_exponent() >= static_cast<int16_t>(Traits::mantissa_bits);
typename Traits::UInt a = large_and_always_integer
? mantissa() << (normalized_exponent() - Traits::mantissa_bits)
: mantissa() >> (Traits::mantissa_bits - normalized_exponent());
UInt a = large_and_always_integer
? static_cast<UInt>(mantissa()) << (normalized_exponent() - Traits::mantissa_bits)
: static_cast<UInt>(mantissa()) >> (Traits::mantissa_bits - normalized_exponent());
typename Traits::UInt b = uint_rhs - (static_cast<UInt>(1) << normalized_exponent());
UInt b = uint_rhs - (static_cast<UInt>(1) << normalized_exponent());
if (a < b)
return is_negative() ? 1 : -1;
@ -175,37 +178,37 @@ struct DecomposedFloat
template <typename Int>
bool equals(Int rhs)
bool equals(Int rhs) const
{
return compare(rhs) == 0;
}
template <typename Int>
bool notEquals(Int rhs)
bool notEquals(Int rhs) const
{
return compare(rhs) != 0;
}
template <typename Int>
bool less(Int rhs)
bool less(Int rhs) const
{
return compare(rhs) < 0;
}
template <typename Int>
bool greater(Int rhs)
bool greater(Int rhs) const
{
return compare(rhs) > 0;
}
template <typename Int>
bool lessOrEquals(Int rhs)
bool lessOrEquals(Int rhs) const
{
return compare(rhs) <= 0;
}
template <typename Int>
bool greaterOrEquals(Int rhs)
bool greaterOrEquals(Int rhs) const
{
return compare(rhs) >= 0;
}

View File

@ -1,6 +1,6 @@
#include <common/ReadlineLineReader.h>
#include <common/errnoToString.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <errno.h>
#include <signal.h>

View File

@ -3,7 +3,7 @@
#include <map>
#include <tuple>
#include <mutex>
#include <ext/function_traits.h>
#include <common/function_traits.h>
/** The simplest cache for a free function.
@ -32,10 +32,11 @@ public:
template <typename... Args>
Result operator() (Args &&... args)
{
Key key{std::forward<Args>(args)...};
{
std::lock_guard lock(mutex);
Key key{std::forward<Args>(args)...};
auto it = cache.find(key);
if (cache.end() != it)
@ -43,7 +44,7 @@ public:
}
/// The calculations themselves are not done under mutex.
Result res = f(std::forward<Args>(args)...);
Result res = std::apply(f, key);
{
std::lock_guard lock(mutex);
@ -57,11 +58,12 @@ public:
template <typename... Args>
void update(Args &&... args)
{
Result res = f(std::forward<Args>(args)...);
Key key{std::forward<Args>(args)...};
Result res = std::apply(f, key);
{
std::lock_guard lock(mutex);
Key key{std::forward<Args>(args)...};
cache[key] = std::move(res);
}
}

7
base/common/arraySize.h Normal file
View File

@ -0,0 +1,7 @@
#pragma once
#include <cstdlib>
/** \brief Returns number of elements in an automatic array. */
template <typename T, std::size_t N>
constexpr size_t arraySize(const T (&)[N]) noexcept { return N; }

27
base/common/bit_cast.h Normal file
View File

@ -0,0 +1,27 @@
#pragma once
#include <string.h>
#include <algorithm>
#include <type_traits>
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> bit_cast(const From & from)
{
To res {};
memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
return res;
}
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> safe_bit_cast(const From & from)
{
static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
return bit_cast<To, From>(from);
}

46
base/common/chrono_io.h Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <chrono>
#include <string>
#include <sstream>
#include <cctz/time_zone.h>
inline std::string to_string(const std::time_t & time)
{
return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
}
template <typename Clock, typename Duration = typename Clock::duration>
std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
{
// Don't use DateLUT because it shows weird characters for
// TimePoint::max(). I wish we could use C++20 format, but it's not
// there yet.
// return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
auto in_time_t = std::chrono::system_clock::to_time_t(tp);
return to_string(in_time_t);
}
template <typename Rep, typename Period = std::ratio<1>>
std::string to_string(const std::chrono::duration<Rep, Period> & duration)
{
auto seconds_as_int = std::chrono::duration_cast<std::chrono::seconds>(duration);
if (seconds_as_int == duration)
return std::to_string(seconds_as_int.count()) + "s";
auto seconds_as_double = std::chrono::duration_cast<std::chrono::duration<double>>(duration);
return std::to_string(seconds_as_double.count()) + "s";
}
template <typename Clock, typename Duration = typename Clock::duration>
std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
{
return o << to_string(tp);
}
template <typename Rep, typename Period = std::ratio<1>>
std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Period> & duration)
{
return o << to_string(duration);
}

52
base/common/map.h Normal file
View File

@ -0,0 +1,52 @@
#pragma once
#include <type_traits>
#include <boost/iterator/transform_iterator.hpp>
namespace collections
{
/// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers
template <typename T>
using unqualified_t = std::remove_cv_t<std::remove_reference_t<T>>;
/** \brief Returns collection of the same container-type as the input collection,
* with each element transformed by the application of `mapper`.
*/
template <template <typename...> class Collection, typename... Params, typename Mapper>
auto map(const Collection<Params...> & collection, Mapper && mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return Collection<value_type>(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
/** \brief Returns collection of specified container-type,
* with each element transformed by the application of `mapper`.
* Allows conversion between different container-types, e.g. std::vector to std::list
*/
template <template <typename...> class ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, Mapper && mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return ResultCollection<value_type>(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
/** \brief Returns collection of specified type,
* with each element transformed by the application of `mapper`.
* Allows leveraging implicit conversion between the result of applying `mapper` and R::value_type.
*/
template <typename ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, Mapper && mapper)
{
return ResultCollection(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
}

View File

@ -4,9 +4,9 @@
#include <boost/range/adaptor/transformed.hpp>
#include <type_traits>
namespace ext
namespace collections
{
namespace internal
{
template <typename ResultType, typename CountingType, typename BeginType, typename EndType>
@ -24,11 +24,11 @@ namespace internal
/// For loop adaptor which is used to iterate through a half-closed interval [begin, end).
/// The parameters `begin` and `end` can have any integral or enum types.
template <typename BeginType,
typename EndType,
typename = std::enable_if_t<
(std::is_integral_v<BeginType> || std::is_enum_v<BeginType>) &&
(std::is_integral_v<EndType> || std::is_enum_v<EndType>) &&
(!std::is_enum_v<BeginType> || !std::is_enum_v<EndType> || std::is_same_v<BeginType, EndType>), void>>
typename EndType,
typename = std::enable_if_t<
(std::is_integral_v<BeginType> || std::is_enum_v<BeginType>) &&
(std::is_integral_v<EndType> || std::is_enum_v<EndType>) &&
(!std::is_enum_v<BeginType> || !std::is_enum_v<EndType> || std::is_same_v<BeginType, EndType>), void>>
inline auto range(BeginType begin, EndType end)
{
if constexpr (std::is_integral_v<BeginType> && std::is_integral_v<EndType>)
@ -51,7 +51,7 @@ inline auto range(BeginType begin, EndType end)
/// The parameter `end` can have any integral or enum type.
/// The same as range(0, end).
template <typename Type,
typename = std::enable_if_t<std::is_integral_v<Type> || std::is_enum_v<Type>, void>>
typename = std::enable_if_t<std::is_integral_v<Type> || std::is_enum_v<Type>, void>>
inline auto range(Type end)
{
if constexpr (std::is_integral_v<Type>)
@ -59,4 +59,5 @@ inline auto range(Type end)
else
return internal::rangeImpl<Type, std::underlying_type_t<Type>>(0, end);
}
}

View File

@ -4,9 +4,6 @@
#include <memory>
#include <utility>
namespace ext
{
template <class F>
class [[nodiscard]] basic_scope_guard
{
@ -105,10 +102,9 @@ using scope_guard = basic_scope_guard<std::function<void(void)>>;
template <class F>
inline basic_scope_guard<F> make_scope_guard(F && function_) { return std::forward<F>(function_); }
}
#define SCOPE_EXIT_CONCAT(n, ...) \
const auto scope_exit##n = ext::make_scope_guard([&] { __VA_ARGS__; })
const auto scope_exit##n = make_scope_guard([&] { __VA_ARGS__; })
#define SCOPE_EXIT_FWD(n, ...) SCOPE_EXIT_CONCAT(n, __VA_ARGS__)
#define SCOPE_EXIT(...) SCOPE_EXIT_FWD(__LINE__, __VA_ARGS__)

View File

@ -1,6 +1,6 @@
#pragma once
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <common/logger_useful.h>
#include <Common/MemoryTracker.h>

View File

@ -2,8 +2,6 @@
#include <memory>
namespace ext
{
/** Allows to make std::shared_ptr from T with protected constructor.
*
@ -36,4 +34,3 @@ struct is_shared_ptr<std::shared_ptr<T>>
template <typename T>
inline constexpr bool is_shared_ptr_v = is_shared_ptr<T>::value;
}

View File

@ -109,10 +109,7 @@ public:
constexpr explicit operator bool() const noexcept;
template <class T>
using _integral_not_wide_integer_class = typename std::enable_if<std::is_arithmetic<T>::value, T>::type;
template <class T, class = _integral_not_wide_integer_class<T>>
template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T>, T>>
constexpr operator T() const noexcept;
constexpr operator long double() const noexcept;

View File

@ -255,13 +255,13 @@ struct integer<Bits, Signed>::_impl
set_multiplier<double>(self, alpha);
self *= max_int;
self += static_cast<uint64_t>(t - alpha * static_cast<T>(max_int)); // += b_i
self += static_cast<uint64_t>(t - floor(alpha) * static_cast<T>(max_int)); // += b_i
}
constexpr static void wide_integer_from_builtin(integer<Bits, Signed>& self, double rhs) noexcept
constexpr static void wide_integer_from_builtin(integer<Bits, Signed> & self, double rhs) noexcept
{
constexpr int64_t max_int = std::numeric_limits<int64_t>::max();
constexpr int64_t min_int = std::numeric_limits<int64_t>::min();
constexpr int64_t min_int = std::numeric_limits<int64_t>::lowest();
/// There are values in int64 that have more than 53 significant bits (in terms of double
/// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up,
@ -271,14 +271,14 @@ struct integer<Bits, Signed>::_impl
/// The necessary check here is that long double has enough significant (mantissa) bits to store the
/// int64_t max value precisely.
//TODO Be compatible with Apple aarch64
// TODO Be compatible with Apple aarch64
#if not (defined(__APPLE__) && defined(__aarch64__))
static_assert(LDBL_MANT_DIG >= 64,
"On your system long double has less than 64 precision bits,"
"On your system long double has less than 64 precision bits, "
"which may result in UB when initializing double from int64_t");
#endif
if ((rhs > 0 && rhs < static_cast<long double>(max_int)) || (rhs < 0 && rhs > static_cast<long double>(min_int)))
if (rhs > static_cast<long double>(min_int) && rhs < static_cast<long double>(max_int))
{
self = static_cast<int64_t>(rhs);
return;

View File

@ -21,7 +21,7 @@
#include <fstream>
#include <sstream>
#include <memory>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <Poco/Observer.h>
#include <Poco/AutoPtr.h>

View File

@ -1,30 +0,0 @@
#pragma once
#include <string.h>
#include <algorithm>
#include <type_traits>
namespace ext
{
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> bit_cast(const From & from)
{
To res {};
memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
return res;
}
/** \brief Returns value `from` converted to type `To` while retaining bit representation.
* `To` and `From` must satisfy `CopyConstructible`.
*/
template <typename To, typename From>
std::decay_t<To> safe_bit_cast(const From & from)
{
static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
return bit_cast<To, From>(from);
}
}

View File

@ -1,49 +0,0 @@
#pragma once
#include <chrono>
#include <string>
#include <sstream>
#include <cctz/time_zone.h>
namespace ext
{
inline std::string to_string(const std::time_t & time)
{
return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
}
template <typename Clock, typename Duration = typename Clock::duration>
std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
{
// Don't use DateLUT because it shows weird characters for
// TimePoint::max(). I wish we could use C++20 format, but it's not
// there yet.
// return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
auto in_time_t = std::chrono::system_clock::to_time_t(tp);
return to_string(in_time_t);
}
template <typename Rep, typename Period = std::ratio<1>>
std::string to_string(const std::chrono::duration<Rep, Period> & duration)
{
auto seconds_as_int = std::chrono::duration_cast<std::chrono::seconds>(duration);
if (seconds_as_int == duration)
return std::to_string(seconds_as_int.count()) + "s";
auto seconds_as_double = std::chrono::duration_cast<std::chrono::duration<double>>(duration);
return std::to_string(seconds_as_double.count()) + "s";
}
template <typename Clock, typename Duration = typename Clock::duration>
std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
{
return o << to_string(tp);
}
template <typename Rep, typename Period = std::ratio<1>>
std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Period> & duration)
{
return o << to_string(duration);
}
}

View File

@ -1,24 +0,0 @@
#pragma once
#include <iterator>
namespace ext
{
/** \brief Returns collection of specified container-type.
* Retains stored value_type, constructs resulting collection using iterator range. */
template <template <typename...> class ResultCollection, typename Collection>
auto collection_cast(const Collection & collection)
{
using value_type = typename Collection::value_type;
return ResultCollection<value_type>(std::begin(collection), std::end(collection));
}
/** \brief Returns collection of specified type.
* Performs implicit conversion of between source and result value_type, if available and required. */
template <typename ResultCollection, typename Collection>
auto collection_cast(const Collection & collection)
{
return ResultCollection(std::begin(collection), std::end(collection));
}
}

View File

@ -1,60 +0,0 @@
#pragma once
#include <ext/size.h>
#include <type_traits>
#include <utility>
#include <iterator>
/** \brief Provides a wrapper view around a container, allowing to iterate over it's elements and indices.
* Allow writing code like shown below:
*
* std::vector<T> v = getVector();
* for (const std::pair<const std::size_t, T &> index_and_value : ext::enumerate(v))
* std::cout << "element " << index_and_value.first << " is " << index_and_value.second << std::endl;
*/
namespace ext
{
template <typename It> struct enumerate_iterator
{
using traits = typename std::iterator_traits<It>;
using iterator_category = typename traits::iterator_category;
using value_type = std::pair<const std::size_t, typename traits::value_type>;
using difference_type = typename traits::difference_type;
using reference = std::pair<const std::size_t, typename traits::reference>;
std::size_t idx;
It it;
enumerate_iterator(const std::size_t idx_, It it_) : idx{idx_}, it{it_} {}
auto operator*() const { return reference(idx, *it); }
bool operator!=(const enumerate_iterator & other) const { return it != other.it; }
enumerate_iterator & operator++() { return ++idx, ++it, *this; }
};
template <typename Collection> struct enumerate_wrapper
{
using underlying_iterator = decltype(std::begin(std::declval<Collection &>()));
using iterator = enumerate_iterator<underlying_iterator>;
Collection & collection;
enumerate_wrapper(Collection & collection_) : collection(collection_) {}
auto begin() { return iterator(0, std::begin(collection)); }
auto end() { return iterator(ext::size(collection), std::end(collection)); }
};
template <typename Collection> auto enumerate(Collection & collection)
{
return enumerate_wrapper<Collection>{collection};
}
template <typename Collection> auto enumerate(const Collection & collection)
{
return enumerate_wrapper<const Collection>{collection};
}
}

View File

@ -1,24 +0,0 @@
#pragma once
#include <utility>
namespace ext
{
/// \brief Identity function for use with other algorithms as a pass-through.
class identity
{
/** \brief Function pointer type template for converting identity to a function pointer.
* Presumably useless, provided for completeness. */
template <typename T> using function_ptr_t = T &&(*)(T &&);
/** \brief Implementation of identity as a non-instance member function for taking function pointer. */
template <typename T> static T && invoke(T && t) { return std::forward<T>(t); }
public:
/** \brief Returns the value passed as a sole argument using perfect forwarding. */
template <typename T> T && operator()(T && t) const { return std::forward<T>(t); }
/** \brief Allows conversion of identity instance to a function pointer. */
template <typename T> operator function_ptr_t<T>() const { return &invoke; };
};
}

View File

@ -1,43 +0,0 @@
#pragma once
#include <utility>
#include <type_traits>
#include <array>
/** \brief Produces std::array of specified size, containing copies of provided object.
* Copy is performed N-1 times, and the last element is being moved.
* This helper allows to initialize std::array in place.
*/
namespace ext
{
namespace detail
{
template<std::size_t size, typename T, std::size_t... indexes>
constexpr auto make_array_n_impl(T && value, std::index_sequence<indexes...>)
{
/// Comma is used to make N-1 copies of value
return std::array<std::decay_t<T>, size>{ (static_cast<void>(indexes), value)..., std::forward<T>(value) };
}
}
template<typename T>
constexpr auto make_array_n(std::integral_constant<std::size_t, 0>, T &&)
{
return std::array<std::decay_t<T>, 0>{};
}
template<std::size_t size, typename T>
constexpr auto make_array_n(std::integral_constant<std::size_t, size>, T && value)
{
return detail::make_array_n_impl<size>(std::forward<T>(value), std::make_index_sequence<size - 1>{});
}
template<std::size_t size, typename T>
constexpr auto make_array_n(T && value)
{
return make_array_n(std::integral_constant<std::size_t, size>{}, std::forward<T>(value));
}
}

View File

@ -1,51 +0,0 @@
#pragma once
#include <type_traits>
#include <boost/iterator/transform_iterator.hpp>
namespace ext
{
/// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers
template <typename T>
using unqualified_t = std::remove_cv_t<std::remove_reference_t<T>>;
/** \brief Returns collection of the same container-type as the input collection,
* with each element transformed by the application of `mapper`.
*/
template <template <typename...> class Collection, typename... Params, typename Mapper>
auto map(const Collection<Params...> & collection, Mapper && mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return Collection<value_type>(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
/** \brief Returns collection of specified container-type,
* with each element transformed by the application of `mapper`.
* Allows conversion between different container-types, e.g. std::vector to std::list
*/
template <template <typename...> class ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, Mapper && mapper)
{
using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
return ResultCollection<value_type>(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
/** \brief Returns collection of specified type,
* with each element transformed by the application of `mapper`.
* Allows leveraging implicit conversion between the result of applying `mapper` and R::value_type.
*/
template <typename ResultCollection, typename Collection, typename Mapper>
auto map(const Collection & collection, Mapper && mapper)
{
return ResultCollection(
boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
}
}

View File

@ -1,25 +0,0 @@
#pragma once
#include <vector>
namespace ext
{
/// Moves all arguments starting from the second to the end of the vector.
/// For example, `push_back(vec, a1, a2, a3)` is a more compact way to write
/// `vec.push_back(a1); vec.push_back(a2); vec.push_back(a3);`
/// This function is like boost::range::push_back() but works for noncopyable types too.
template <typename T>
void push_back(std::vector<T> &)
{
}
template <typename T, typename FirstArg, typename... OtherArgs>
void push_back(std::vector<T> & vec, FirstArg && first, OtherArgs &&... other)
{
vec.reserve(vec.size() + sizeof...(other) + 1);
vec.emplace_back(std::move(first));
push_back(vec, std::move(other)...);
}
}

View File

@ -1,14 +0,0 @@
#pragma once
#include <cstdlib>
namespace ext
{
/** \brief Returns number of elements in an automatic array. */
template <typename T, std::size_t N>
constexpr std::size_t size(const T (&)[N]) noexcept { return N; }
/** \brief Returns number of in a container providing size() member function. */
template <typename T> constexpr auto size(const T & t) { return t.size(); }
}

View File

@ -1,27 +0,0 @@
#pragma once
namespace ext
{
template <typename T>
class unlock_guard
{
public:
unlock_guard(T & mutex_) : mutex(mutex_)
{
mutex.unlock();
}
~unlock_guard()
{
mutex.lock();
}
unlock_guard(const unlock_guard &) = delete;
unlock_guard & operator=(const unlock_guard &) = delete;
private:
T & mutex;
};
}

2
contrib/libpqxx vendored

@ -1 +1 @@
Subproject commit 58d2a028d1600225ac3a478d6b3a06ba2f0c01f6
Subproject commit 357608d11b7a1961c3fb7db2ef9a5dbb2e87da77

View File

@ -64,7 +64,7 @@ set (HDRS
add_library(libpqxx ${SRCS} ${HDRS})
target_link_libraries(libpqxx PUBLIC ${LIBPQ_LIBRARY})
target_include_directories (libpqxx PRIVATE "${LIBRARY_DIR}/include")
target_include_directories (libpqxx SYSTEM PRIVATE "${LIBRARY_DIR}/include")
# crutch
set(CM_CONFIG_H_IN "${LIBRARY_DIR}/include/pqxx/config.h.in")

View File

@ -200,7 +200,7 @@ continue
# The server has died.
task_exit_code=210
echo "failure" > status.txt
if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
if ! grep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
then
echo "Lost connection to server. See the logs." > description.txt
fi
@ -220,8 +220,8 @@ continue
# which is confusing.
task_exit_code=$fuzzer_exit_code
echo "failure" > status.txt
{ grep -o "Found error:.*" fuzzer.log \
|| grep -o "Exception.*" fuzzer.log \
{ grep --text -o "Found error:.*" fuzzer.log \
|| grep --text -o "Exception.*" fuzzer.log \
|| echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
| tail -1 > description.txt
fi

View File

@ -489,7 +489,7 @@ if args.report == 'main':
text = tableStart('Test Times')
text += tableHeader(columns, attrs)
allowed_average_run_time = 1.6 # 30 seconds per test at 7 runs
allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
for r in rows:
anchor = f'{currentTableAnchor()}.{r[0]}'
total_runs = (int(r[7]) + 1) * 2 # one prewarm run, two servers

View File

@ -112,12 +112,15 @@ timeout "$MAX_RUN_TIME" bash -c run_tests ||:
./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz ||:
mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then
tar -chf /test_output/clickhouse_coverage.tar.gz /profraw ||:
fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.gz ||:
pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:

View File

@ -14,6 +14,8 @@ HUNG_SIGN = "Found hung queries in processlist"
NO_TASK_TIMEOUT_SIGN = "All tests have finished"
RETRIES_SIGN = "Some tests were restarted"
def process_test_log(log_path):
total = 0
skipped = 0
@ -21,6 +23,7 @@ def process_test_log(log_path):
failed = 0
success = 0
hung = False
retries = False
task_timeout = True
test_results = []
with open(log_path, 'r') as test_file:
@ -30,6 +33,8 @@ def process_test_log(log_path):
task_timeout = False
if HUNG_SIGN in line:
hung = True
if RETRIES_SIGN in line:
retries = True
if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
test_name = line.split(' ')[2].split(':')[0]
@ -57,7 +62,7 @@ def process_test_log(log_path):
else:
success += int(OK_SIGN in line)
test_results.append((test_name, "OK", test_time))
return total, skipped, unknown, failed, success, hung, task_timeout, test_results
return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results
def process_result(result_path):
test_results = []
@ -73,7 +78,7 @@ def process_result(result_path):
state = "error"
if result_path and os.path.exists(result_path):
total, skipped, unknown, failed, success, hung, task_timeout, test_results = process_test_log(result_path)
total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
# If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
# But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
@ -83,9 +88,14 @@ def process_result(result_path):
if hung:
description = "Some queries hung, "
state = "failure"
test_results.append(("Some queries hung", "FAIL", "0"))
elif task_timeout:
description = "Timeout, "
state = "failure"
test_results.append(("Timeout", "FAIL", "0"))
elif retries:
description = "Some tests restarted, "
test_results.append(("Some tests restarted", "SKIPPED", "0"))
else:
description = ""

View File

@ -103,6 +103,7 @@ timeout "$MAX_RUN_TIME" bash -c run_tests ||:
clickhouse-client -q "system flush logs" ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz &
clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz &
@ -140,6 +141,8 @@ tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_l
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.gz ||:
pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:

0
docs/clean Normal file
View File

View File

@ -112,7 +112,7 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations or query.
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query.
## Functions {#functions}
@ -169,7 +169,7 @@ There is no global query plan for distributed query execution. Each node has its
`MergeTree` is a family of storage engines that supports indexing by primary key. The primary key can be an arbitrary tuple of columns or expressions. Data in a `MergeTree` table is stored in “parts”. Each part stores data in the primary key order, so data is ordered lexicographically by the primary key tuple. All the table columns are stored in separate `column.bin` files in these parts. The files consist of compressed blocks. Each block is usually from 64 KB to 1 MB of uncompressed data, depending on the average value size. The blocks consist of column values placed contiguously one after the other. Column values are in the same order for each column (the primary key defines the order), so when you iterate by many columns, you get values for the corresponding rows.
The primary key itself is “sparse”. It does not address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks,” which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached.
The primary key itself is “sparse”. It does not address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks”, which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached.
When we are going to read something from a part in `MergeTree`, we look at `primary.idx` data and locate ranges that could contain requested data, then look at `column.mrk` data and calculate offsets for where to start reading those ranges. Because of sparseness, excess data may be read. ClickHouse is not suitable for a high load of simple point queries, because the entire range with `index_granularity` rows must be read for each key, and the entire compressed block must be decompressed for each column. We made the index sparse because we must be able to maintain trillions of rows per single server without noticeable memory consumption for the index. Also, because the primary key is sparse, it is not unique: it cannot check the existence of the key in the table at INSERT time. You could have many rows with the same key in a table.

View File

@ -17,7 +17,7 @@ Main features:
- Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.
ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query.
ClickHouse supports certain operations with partitions that are more efficient than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query.
- Data replication support.
@ -83,7 +83,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
Expression must have one `Date` or `DateTime` column as a result. Example:
`TTL date + INTERVAL 1 DAY`
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule.
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
@ -474,7 +474,7 @@ With `WHERE` clause you may specify which of the expired rows to delete or aggre
`GROUP BY` expression must be a prefix of the table primary key.
If a column is not part of the `GROUP BY` expression and is not set explicitely in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it).
If a column is not part of the `GROUP BY` expression and is not set explicitly in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it).
**Examples**

View File

@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key
When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
ClickHouse can merge the data parts so that different resulting parts of data can consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
### Common Rules for Summation {#common-rules-for-summation}

View File

@ -1249,10 +1249,13 @@ The table below shows supported data types and how they match ClickHouse [data t
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type.
Unsupported Parquet data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Unsupported Parquet data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Data types of ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column.
@ -1276,7 +1279,54 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
[Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats.
`Arrow` is Apache Arrows “file mode” format. It is designed for in-memory random access.
`Arrow` is Apache Arrows "file mode" format. It is designed for in-memory random access.
### Data Types Matching {#data_types-matching-arrow}
The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) |
|----------------------------|-----------------------------------------------------|----------------------------|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` |
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the Arrow `DECIMAL` type as the ClickHouse `Decimal128` type.
Unsupported Arrow data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
The data types of ClickHouse table columns do not have to match the corresponding Arrow data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column.
### Inserting Data {#inserting-data-arrow}
You can insert Arrow data from a file into ClickHouse table by the following command:
``` bash
$ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
```
### Selecting Data {#selecting-data-arrow}
You can select data from a ClickHouse table and save them into some file in the Arrow format by the following command:
``` bash
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filename.arrow}
```
## ArrowStream {#data-format-arrow-stream}
@ -1306,7 +1356,9 @@ The table below shows supported data types and how they match ClickHouse [data t
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
Arrays can be nested and can have a value of the `Nullable` type as an argument.
ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.

View File

@ -191,10 +191,12 @@ Possible values:
Default value: 480.
`fsync` is not called for new parts, so for some time new parts exist only in the server's RAM (OS cache). If the server is rebooted spontaneously, new parts can be lost or damaged.
To protect data parts created by merges source parts are not deleted immediately. After merging several parts into a new part, ClickHouse marks the original parts as inactive and deletes them only after `old_parts_lifetime` seconds.
After merging several parts into a new part, ClickHouse marks the original parts as inactive and deletes them only after `old_parts_lifetime` seconds.
Inactive parts are removed if they are not used by current queries, i.e. if the `refcount` of the part is zero.
`fsync` is not called for new parts, so for some time new parts exist only in the server's RAM (OS cache). If the server is rebooted spontaneously, new parts can be lost or damaged.
To protect data inactive parts are not deleted immediately.
During startup ClickHouse checks the integrity of the parts.
If the merged part is damaged ClickHouse returns the inactive parts to the active list, and later merges them again. Then the damaged part is renamed (the `broken_` prefix is added) and moved to the `detached` folder.
If the merged part is not damaged, then the original inactive parts are renamed (the `ignored_` prefix is added) and moved to the `detached` folder.
@ -214,7 +216,7 @@ Default value: 161061273600 (150 GB).
The merge scheduler periodically analyzes the sizes and number of parts in partitions, and if there is enough free resources in the pool, it starts background merges. Merges occur until the total size of the source parts is less than `max_bytes_to_merge_at_max_space_in_pool`.
Merges initiated by `optimize final` ignore `max_bytes_to_merge_at_max_space_in_pool` and merge parts only taking into account available resources (free disk's space) until one part remains in the partition.
Merges initiated by [OPTIMIZE FINAL](../../sql-reference/statements/optimize.md) ignore `max_bytes_to_merge_at_max_space_in_pool` and merge parts only taking into account available resources (free disk's space) until one part remains in the partition.
## max_bytes_to_merge_at_min_space_in_pool {#max-bytes-to-merge-at-min-space-in-pool}
@ -252,6 +254,7 @@ Possible values:
Default value: auto (number of CPU cores).
During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
## max_partitions_to_read {#max-partitions-to-read}
Limits the maximum number of partitions that can be accessed in one query.

View File

@ -2069,7 +2069,7 @@ Possible values:
- Any positive integer.
Default value: 16.
Default value: 128.
## background_fetches_pool_size {#background_fetches_pool_size}
@ -2549,17 +2549,6 @@ Result
└──────────────────────────┴───────┴───────────────────────────────────────────────────────┘
```
## allow_experimental_bigint_types {#allow_experimental_bigint_types}
Enables or disables integer values exceeding the range that is supported by the int data type.
Possible values:
- 1 — The bigint data type is enabled.
- 0 — The bigint data type is disabled.
Default value: `0`.
## persistent {#persistent}
Disables persistency for the [Set](../../engines/table-engines/special/set.md#set) and [Join](../../engines/table-engines/special/join.md#join) table engines.

View File

@ -116,7 +116,7 @@ Type: `UInt8`.
- `.*` — Matches any number of events. You do not need conditional arguments to match this element of the pattern.
- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators.
- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=`, `==` operators.
**Examples**
@ -509,7 +509,7 @@ Same behavior as [sumMap](../../sql-reference/aggregate-functions/reference/summ
## sequenceNextNode {#sequenceNextNode}
Returns a value of next event that matched an event chain.
Returns a value of the next event that matched an event chain.
_Experimental function, `SET allow_experimental_funnel_functions = 1` to enable it._
@ -520,33 +520,36 @@ sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event
```
**Parameters**
- `direction` - Used to navigate to directions.
- forward : Moving forward
- backward: Moving backward
- `base` - Used to set the base point.
- head : Set the base point to the first event
- tail : Set the base point to the last event
- first_match : Set the base point to the first matched event1
- last_match : Set the base point to the last matched event1
- `direction` — Used to navigate to directions.
- forward — Moving forward.
- backward — Moving backward.
- `base` — Used to set the base point.
- head — Set the base point to the first event.
- tail — Set the base point to the last event.
- first_match — Set the base point to the first matched `event1`.
- last_match — Set the base point to the last matched `event1`.
**Arguments**
- `timestamp` — Name of the column containing the timestamp. Data types supported: `Date`, `DateTime` and other unsigned integer types.
- `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: `String` and `Nullable(String)`
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types.
- `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: [String](../../sql-reference/data-types/string.md) and [Nullable(String)](../../sql-reference/data-types/nullable.md).
- `base_condition` — Condition that the base point must fulfill.
- `cond` — Conditions describing the chain of events. `UInt8`
- `event1`, `event2`, ... — Conditions describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md).
**Returned value**
- `event_column[next_index]` - if the pattern is matched and next value exists.
- `NULL` - if the pattern isnt matched or next value doesn't exist.
**Returned values**
Type: `Nullable(String)`.
- `event_column[next_index]` — If the pattern is matched and next value exists.
- `NULL` - If the pattern isnt matched or next value doesn't exist.
Type: [Nullable(String)](../../sql-reference/data-types/nullable.md).
**Example**
It can be used when events are A->B->C->E->F and you want to know the event following B->C, which is E.
It can be used when events are A->B->C->D->E and you want to know the event following B->C, which is D.
The query statement searching the event following A->B :
The query statement searching the event following A->B:
``` sql
CREATE TABLE test_flow (
@ -557,7 +560,7 @@ ENGINE = MergeTree()
PARTITION BY toYYYYMMDD(dt)
ORDER BY id;
INSERT INTO test_flow VALUES (1, 1, 'A') (2, 1, 'B') (3, 1, 'C') (4, 1, 'E') (5, 1, 'F');
INSERT INTO test_flow VALUES (1, 1, 'A') (2, 1, 'B') (3, 1, 'C') (4, 1, 'D') (5, 1, 'E');
SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'A', page = 'A', page = 'B') as next_flow FROM test_flow GROUP BY id;
```
@ -572,7 +575,7 @@ Result:
**Behavior for `forward` and `head`**
```SQL
``` sql
ALTER TABLE test_flow DELETE WHERE 1 = 1 settings mutations_sync = 1;
INSERT INTO test_flow VALUES (1, 1, 'Home') (2, 1, 'Gift') (3, 1, 'Exit');
@ -580,7 +583,7 @@ INSERT INTO test_flow VALUES (1, 2, 'Home') (2, 2, 'Home') (3, 2, 'Gift') (4, 2,
INSERT INTO test_flow VALUES (1, 3, 'Gift') (2, 3, 'Home') (3, 3, 'Gift') (4, 3, 'Basket');
```
```SQL
``` sql
SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'Home', page = 'Home', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
@ -601,7 +604,7 @@ SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'Home', page = '
**Behavior for `backward` and `tail`**
```SQL
``` sql
SELECT id, sequenceNextNode('backward', 'tail')(dt, page, page = 'Basket', page = 'Basket', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
@ -623,7 +626,7 @@ SELECT id, sequenceNextNode('backward', 'tail')(dt, page, page = 'Basket', page
**Behavior for `forward` and `first_match`**
```SQL
``` sql
SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
@ -637,12 +640,12 @@ SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', p
1970-01-01 09:00:04 2 Basket The result
1970-01-01 09:00:01 3 Gift // Base point
1970-01-01 09:00:02 3 Home // Thre result
1970-01-01 09:00:02 3 Home // The result
1970-01-01 09:00:03 3 Gift
1970-01-01 09:00:04 3 Basket
```
```SQL
``` sql
SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;
dt id page
@ -664,7 +667,7 @@ SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', p
**Behavior for `backward` and `last_match`**
```SQL
``` sql
SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
@ -683,7 +686,7 @@ SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', p
1970-01-01 09:00:04 3 Basket
```
```SQL
``` sql
SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;
dt id page
@ -705,7 +708,7 @@ SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', p
**Behavior for `base_condition`**
```SQL
``` sql
CREATE TABLE test_flow_basecond
(
`dt` DateTime,
@ -715,47 +718,47 @@ CREATE TABLE test_flow_basecond
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(dt)
ORDER BY id
ORDER BY id;
INSERT INTO test_flow_basecond VALUES (1, 1, 'A', 'ref4') (2, 1, 'A', 'ref3') (3, 1, 'B', 'ref2') (4, 1, 'B', 'ref1');
```
```SQL
``` sql
SELECT id, sequenceNextNode('forward', 'head')(dt, page, ref = 'ref1', page = 'A') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4 // The head can't be base point becasue the ref column of the head unmatched with 'ref1'.
1970-01-01 09:00:01 1 A ref4 // The head can not be base point because the ref column of the head unmatched with 'ref1'.
1970-01-01 09:00:02 1 A ref3
1970-01-01 09:00:03 1 B ref2
1970-01-01 09:00:04 1 B ref1
```
```SQL
``` sql
SELECT id, sequenceNextNode('backward', 'tail')(dt, page, ref = 'ref4', page = 'B') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4
1970-01-01 09:00:02 1 A ref3
1970-01-01 09:00:03 1 B ref2
1970-01-01 09:00:04 1 B ref1 // The tail can't be base point becasue the ref column of the tail unmatched with 'ref4'.
1970-01-01 09:00:04 1 B ref1 // The tail can not be base point because the ref column of the tail unmatched with 'ref4'.
```
```SQL
``` sql
SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, ref = 'ref3', page = 'A') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4 // This row can't be base point becasue the ref column unmatched with 'ref3'.
1970-01-01 09:00:01 1 A ref4 // This row can not be base point because the ref column unmatched with 'ref3'.
1970-01-01 09:00:02 1 A ref3 // Base point
1970-01-01 09:00:03 1 B ref2 // The result
1970-01-01 09:00:04 1 B ref1
```
```SQL
``` sql
SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, ref = 'ref2', page = 'B') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4
1970-01-01 09:00:02 1 A ref3 // The result
1970-01-01 09:00:03 1 B ref2 // Base point
1970-01-01 09:00:04 1 B ref1 // This row can't be base point becasue the ref column unmatched with 'ref2'.
1970-01-01 09:00:04 1 B ref1 // This row can not be base point because the ref column unmatched with 'ref2'.
```

View File

@ -27,17 +27,17 @@ SELECT
Returns the timezone of the server.
**Syntax**
**Syntax**
``` sql
timeZone()
```
Alias: `timezone`.
Alias: `timezone`.
**Returned value**
- Timezone.
- Timezone.
Type: [String](../../sql-reference/data-types/string.md).
@ -45,7 +45,7 @@ Type: [String](../../sql-reference/data-types/string.md).
Converts time or date and time to the specified time zone. The time zone is an attribute of the `Date` and `DateTime` data types. The internal value (number of seconds) of the table field or of the resultset's column does not change, the column's type changes and its string representation changes accordingly.
**Syntax**
**Syntax**
``` sql
toTimezone(value, timezone)
@ -53,14 +53,14 @@ toTimezone(value, timezone)
Alias: `toTimezone`.
**Arguments**
**Arguments**
- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Date and time.
- Date and time.
Type: [DateTime](../../sql-reference/data-types/datetime.md).
@ -102,21 +102,21 @@ int32samoa: 1546300800
Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types.
**Syntax**
**Syntax**
``` sql
timeZoneOf(value)
```
Alias: `timezoneOf`.
Alias: `timezoneOf`.
**Arguments**
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
- Timezone name.
- Timezone name.
Type: [String](../../sql-reference/data-types/string.md).
@ -149,11 +149,11 @@ Alias: `timezoneOffset`.
**Arguments**
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
- Offset from UTC in seconds.
- Offset from UTC in seconds.
Type: [Int32](../../sql-reference/data-types/int-uint.md).
@ -599,7 +599,7 @@ Aliases: `dateAdd`, `DATE_ADD`.
- `quarter`
- `year`
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
@ -704,7 +704,7 @@ Aliases: `dateSub`, `DATE_SUB`.
- `quarter`
- `year`
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
@ -805,7 +805,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
- `quarter`
- `year`
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
@ -963,7 +963,7 @@ formatDateTime(Time, Format\[, Timezone\])
**Returned value(s)**
Returnes time and date values according to the determined format.
Returns time and date values according to the determined format.
**Replacement fields**
Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
@ -1012,6 +1012,45 @@ Result:
└────────────────────────────────────────────┘
```
## dateName {#dataname}
Returns part of date with specified date part.
**Syntax**
``` sql
dateName(date_part, date)
```
**Arguments**
- `date_part` - Date part. Possible values .
- `date` — Date [Date](../../sql-reference/data-types/date.md) or DateTime [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
- Specified date part of date.
Type: [String](../../sql-reference/data-types/string.md#string)
**Example**
Query:
```sql
WITH toDateTime('2021-04-14 11:22:33') AS date_value
SELECT dateName('year', date_value), dateName('month', date_value), dateName('day', date_value);
```
Result:
```text
┌─dateName('year', date_value)─┬─dateName('month', date_value)─┬─dateName('day', date_value)─┐
│ 2021 │ April │ 14 │
└──────────────────────────────┴───────────────────────────────┴─────────────────────────────
```
## FROM\_UNIXTIME {#fromunixfime}
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.

View File

@ -188,6 +188,24 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV
└─────────────────────┴────────────────────────────────────────────────────────────┘
```
You can work with dates without using `INTERVAL`, just by adding or subtracting seconds, minutes, and hours. For example, an interval of one day can be set by adding `60*60*24`.
!!! note "Note"
The `INTERVAL` syntax or `addDays` function are always preferred. Simple addition or subtraction (syntax like `now() + ...`) doesn't consider time settings. For example, daylight saving time.
Examples:
``` sql
SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 60 * 24 AS time_plus_24_hours, time + toIntervalDay(1) AS time_plus_1_day;
```
``` text
┌────────────────time─┬──time_plus_24_hours─┬─────time_plus_1_day─┐
│ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │
└─────────────────────┴─────────────────────┴─────────────────────┘
```
**See Also**
- [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type
@ -295,4 +313,3 @@ SELECT * FROM t_null WHERE y IS NOT NULL
│ 2 │ 3 │
└───┴───┘
```

View File

@ -366,9 +366,9 @@ Returns a list of clusters. All available clusters are listed in the [system.clu
``` sql
SHOW CLUSTER '<name>'
SWOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
SHOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
```
### Examples
### Examples {#show-cluster-examples}
Query:

View File

@ -113,7 +113,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
- `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern.
- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` お互いから1800秒以上発生するイベントと一致します。 これらのイベントの間に任意の数のイベントを配置できます。 を使用することができます `>=`, `>`, `<`, `<=` 演算子。
- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` お互いから1800秒以上発生するイベントと一致します。 これらのイベントの間に任意の数のイベントを配置できます。 を使用することができます `>=`, `>`, `<`, `<=`, `==` 演算子。
**例**

View File

@ -1168,12 +1168,15 @@ SELECT * FROM topic1_stream;
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `STRING` |
| — | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
Неподдержанные типы данных Parquet: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных, ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы.
Неподдерживаемые типы данных Parquet: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы.
### Вставка и выборка данных {#vstavka-i-vyborka-dannykh}
@ -1197,6 +1200,53 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
`Arrow` — это Apache Arrow's "file mode" формат. Он предназначен для произвольного доступа в памяти.
### Соответствие типов данных {#data_types-matching-arrow}
Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT` и `SELECT`.
| Тип данных Arrow (`INSERT`) | Тип данных ClickHouse | Тип данных Arrow (`SELECT`) |
|-----------------------------|-----------------------------------------------------|-----------------------------|
| `UINT8`, `BOOL` | [UInt8](../sql-reference/data-types/int-uint.md) | `UINT8` |
| `INT8` | [Int8](../sql-reference/data-types/int-uint.md) | `INT8` |
| `UINT16` | [UInt16](../sql-reference/data-types/int-uint.md) | `UINT16` |
| `INT16` | [Int16](../sql-reference/data-types/int-uint.md) | `INT16` |
| `UINT32` | [UInt32](../sql-reference/data-types/int-uint.md) | `UINT32` |
| `INT32` | [Int32](../sql-reference/data-types/int-uint.md) | `INT32` |
| `UINT64` | [UInt64](../sql-reference/data-types/int-uint.md) | `UINT64` |
| `INT64` | [Int64](../sql-reference/data-types/int-uint.md) | `INT64` |
| `FLOAT`, `HALF_FLOAT` | [Float32](../sql-reference/data-types/float.md) | `FLOAT32` |
| `DOUBLE` | [Float64](../sql-reference/data-types/float.md) | `FLOAT64` |
| `DATE32` | [Date](../sql-reference/data-types/date.md) | `UINT16` |
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `UTF8` |
| `STRING`, `BINARY` | [FixedString](../sql-reference/data-types/fixedstring.md) | `UTF8` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Arrow `DECIMAL` как `Decimal128`.
Неподдерживаемые типы данных Arrow: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Arrow. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы.
### Вставка данных {#inserting-data-arrow}
Чтобы вставить в ClickHouse данные из файла в формате Arrow, используйте команду следующего вида:
``` bash
$ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
```
### Вывод данных {#selecting-data-arrow}
Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида:
``` bash
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filename.arrow}
```
## ArrowStream {#data-format-arrow-stream}
`ArrowStream` — это Apache Arrow's "stream mode" формат. Он предназначен для обработки потоков в памяти.
@ -1225,9 +1275,11 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
| `DATE64`, `TIMESTAMP` | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP` |
| `STRING`, `BINARY` | [String](../sql-reference/data-types/string.md) | `BINARY` |
| `DECIMAL` | [Decimal](../sql-reference/data-types/decimal.md) | `DECIMAL` |
| `-` | [Array](../sql-reference/data-types/array.md) | `LIST` |
| `LIST` | [Array](../sql-reference/data-types/array.md) | `LIST` |
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.
Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.
Неподдерживаемые типы данных ORC: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
@ -1397,4 +1449,3 @@ $ clickhouse-client --query "SELECT * FROM {some_table} FORMAT RawBLOB" | md5sum
``` text
f9725a22f9191e064120d718e26862a9 -
```

View File

@ -1,6 +1,6 @@
# Настройки MergeTree таблиц {#merge-tree-settings}
Значения настроек для всех MergeTree таблиц можно посмотреть в таблице `system.merge_tree_settings`, их можно переопределить в `config.xml` в секции `merge_tree`, или задать в секции `SETTINGS` у каждой таблицы.
Значения настроек всех MergeTree таблиц собраны в таблице `system.merge_tree_settings`. Их можно переопределить в разделе `merge_tree` файла `config.xml` или задать в секции `SETTINGS` каждой таблицы.
Пример переопределения в `config.xml`:
@ -10,7 +10,7 @@
</merge_tree>
```
Пример для определения в `SETTINGS` у конкретной таблицы:
Пример установки `SETTINGS` для конкретной таблицы:
``` sql
CREATE TABLE foo
@ -22,7 +22,7 @@ ORDER BY tuple()
SETTINGS max_suspicious_broken_parts = 500;
```
Пример изменения настроек у конкретной таблицы командой `ALTER TABLE ... MODIFY SETTING`:
Пример изменения настроек для конкретной таблицы при помощи команды `ALTER TABLE ... MODIFY SETTING`:
``` sql
ALTER TABLE foo
@ -31,7 +31,7 @@ ALTER TABLE foo
## parts_to_throw_insert {#parts-to-throw-insert}
Eсли число кусков в партиции превышает значение `parts_to_throw_insert`, INSERT прерывается с исключением `Too many parts (N). Merges are processing significantly slower than inserts`.
Eсли число активных кусков в партиции больше значения `parts_to_throw_insert`, то INSERT прерывается с исключением: `Too many parts (N). Merges are processing significantly slower than inserts`.
Возможные значения:
@ -39,13 +39,13 @@ Eсли число кусков в партиции превышает знач
Значение по умолчанию: 300.
Для достижения максимальной производительности запросов `SELECT` необходимо минимизировать количество обрабатываемых кусков, см. [Дизайн MergeTree](../../development/architecture.md#merge-tree).
Чтобы производительность запросов `SELECT` стала максимальной, необходимо минимизировать количество обрабатываемых кусков, см. [Дизайн MergeTree](../../development/architecture.md#merge-tree).
Можно установить большее значение 600 (1200), это уменьшит вероятность возникновения ошибки `Too many parts`, но в тоже время вы позже обнаружите возможную проблему со слияниями (например, из-за недостатка места на диске) и деградацию производительности `SELECT`.
Можно установить значение больше — 600 (1200) кусков. Тогда ошибка `Too many parts` будет появляться реже, но при этом могут возникнуть проблемы с фоновыми слияниями и производительностью `SELECT`-запросов.
## parts_to_delay_insert {#parts-to-delay-insert}
Eсли число кусков в партиции превышает значение `parts_to_delay_insert`, `INSERT` искусственно замедляется.
Eсли число кусков в партиции больше значения `parts_to_delay_insert`, то `INSERT` искусственно замедляется.
Возможные значения:
@ -53,31 +53,31 @@ Eсли число кусков в партиции превышает знач
Значение по умолчанию: 150.
ClickHouse искусственно выполняет `INSERT` дольше (добавляет sleep), чтобы фоновый механизм слияния успевал слиять куски быстрее, чем они добавляются.
ClickHouse искусственно выполняет `INSERT` дольше (добавляет sleep) так, чтобы куски сливались в фоновом процессе быстрее, чем добавляются.
## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert}
Если число неактивных кусков в партиции превышает значение `inactive_parts_to_throw_insert`, `INSERT` прерывается с исключением «Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts».
Если число неактивных кусков в партиции больше значения `inactive_parts_to_throw_insert`, то `INSERT` прерывается с исключением `Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts`.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: 0 (не ограничено).
Значение по умолчанию: 0 (без ограничений).
## inactive_parts_to_delay_insert {#inactive-parts-to-delay-insert}
Если число неактивных кусков в партиции больше или равно значению `inactive_parts_to_delay_insert`, `INSERT` искусственно замедляется. Это полезно, когда сервер не может быстро очистить неактивные куски.
Если число неактивных кусков в партиции больше или равно значению `inactive_parts_to_delay_insert`, то `INSERT` искусственно замедляется. Это помогает, когда сервер не может быстро очистить неактивные куски.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: 0 (не ограничено).
Значение по умолчанию: 0 (без ограничений).
## max_delay_to_insert {#max-delay-to-insert}
Величина в секундах, которая используется для расчета задержки `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert).
Величина в секундах, которая используется для расчета задержки `INSERT` в случаях, когда число кусков в партиции больше значения [parts_to_delay_insert](#parts-to-delay-insert).
Возможные значения:
@ -87,17 +87,17 @@ ClickHouse искусственно выполняет `INSERT` дольше (д
Величина задержки (в миллисекундах) для `INSERT` вычисляется по формуле:
``` code
```code
max_k = parts_to_throw_insert - parts_to_delay_insert
k = 1 + parts_count_in_partition - parts_to_delay_insert
delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k)
```
Т.е. если в партиции уже 299 кусков и parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд.
Т.е. если в партиции уже 299 кусков и parts_to_throw_insert = 300, parts_to_delay_insert = 150, а max_delay_to_insert = 1, то `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд.
## max_parts_in_total {#max-parts-in-total}
Eсли суммарное число активных кусков во всех партициях таблицы превышает значение `max_parts_in_total`, INSERT прерывается с исключением `Too many parts (N)`.
Eсли суммарное число активных кусков во всех партициях таблицы больше значения `max_parts_in_total`, то INSERT прерывается с исключением `Too many parts (N)`.
Возможные значения:
@ -105,20 +105,22 @@ Eсли суммарное число активных кусков во все
Значение по умолчанию: 100000.
Большое число кусков в таблице снижает производительность запросов ClickHouse и увеличивает время старта ClickHouse. Чаще всего это следствие неправильного дизайна (ошибки при выборе стратегии партиционирования -- слишком мелкие партиции).
С большим числом кусков в таблице производительность запросов ClickHouse снижается, а время старта ClickHouse — увеличивается. Чаще всего это следствие неправильного дизайна (ошибки выбора стратегии партиционирования, например, слишком мелкие партиции).
## replicated_deduplication_window {#replicated-deduplication-window}
Количество хеш-сумм последних вставленных блоков, хранящихся в Zookeeper.
Количество хеш-сумм последних вставленных блоков, которые хранятся в Zookeeper.
Возможные значения:
- Положительное целое число.
- 0 (без ограничений).
Значение по умолчанию: 100.
Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся хеш-суммы не всех кусков, а только последние `replicated_deduplication_window`. Наиболее старые хеш-суммы удаляются из Zookeeper.
Большое число `replicated_deduplication_window` замедляет `Insert`-ы. Хеш-сумма рассчитывается от композиции имен и типов полей, а также данных вставленного куска (потока байт).
Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper.
Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм.
Хеш-сумма рассчитывается по названиям и типам полей, а также по данным вставленного куска (потока байт).
## non_replicated_deduplication_window {#non-replicated-deduplication-window}
@ -135,7 +137,7 @@ Eсли суммарное число активных кусков во все
## replicated_deduplication_window_seconds {#replicated-deduplication-window-seconds}
Число секунд, после которых хеш-суммы вставленных блоков удаляются из Zookeeper.
Время хранения (в секундах) хеш-сумм вставленных блоков в Zookeeper. По истечении этого времени хеш-суммы удаляются.
Возможные значения:
@ -143,11 +145,11 @@ Eсли суммарное число активных кусков во все
Значение по умолчанию: 604800 (1 неделя).
Аналогично [replicated_deduplication_window](#replicated-deduplication-window), задает, сколько времени хранить хеш-суммы блоков для дедупликции `Insert`. Хеш-суммы старше `replicated_deduplication_window_seconds` удаляются из Zookeeper, даже если их меньше чем `replicated_deduplication_window`.
Аналогично [replicated_deduplication_window](#replicated-deduplication-window), настройка `replicated_deduplication_window_seconds` задает время хранения хеш-сумм блоков для дедупликции `Insert`. Хеш-суммы старше значения `replicated_deduplication_window_seconds` удаляются из Zookeeper, даже если количество оставшихся хеш-сумм станет меньше чем `replicated_deduplication_window`.
## old_parts_lifetime {#old-parts-lifetime}
Время (в секундах) хранения неактивных кусков, для защиты от потери данных при спонтанной перезагрузке сервера или О.С.
Время (в секундах) хранения неактивных кусков для защиты от потери данных при спонтанной перезагрузке сервера.
Возможные значения:
@ -155,12 +157,16 @@ Eсли суммарное число активных кусков во все
Значение по умолчанию: 480.
После слияния нескольких кусков в новый кусок, ClickHouse помечает исходные куски как неактивные и удаляет их после `old_parts_lifetime` секунд.
Неактивные куски удаляются, если они не используются в текущих запросах, т.е. если счетчик ссылок куска `refcount` равен нулю.
После объединения нескольких кусков в один новый ClickHouse помечает исходные куски как неактивные и удаляет их по прошествии `old_parts_lifetime` секунд.
Неактивные куски удаляются, если они не нужны для текущих запросов, т.е. если счетчик ссылок куска `refcount` имеет нулевое значение.
Неактивные куски удаляются не сразу, потому что при записи нового куска не вызывается `fsync`, т.е. некоторое время новый кусок находится только в оперативной памяти сервера (кеше О.С.). Т.о. при спонтанной перезагрузке сервера новый (смерженный) кусок может быть потерян или испорчен. В этом случае ClickHouse в процессе старта при проверке целостности кусков обнаружит проблему, вернет неактивные куски в список активных и позже заново их смержит. Сломанный кусок в этом случае переименовывается (добавляется префикс broken_) и перемещается в папку detached. Если проверка целостности не обнаруживает проблем в смерженном куске, то исходные неактивные куски переименовываются (добавляется префикс ignored_) и перемещаются в папку detached.
При записи нового куска `fsync` не вызывается, поэтому неактивные куски удаляются позже. Это значит, что некоторое время новый кусок находится только в оперативной памяти сервера (кеш ОС). Если сервер перезагрузится спонтанно, новый слитый кусок может испортиться или потеряться.
Стандартное значение Linux dirty_expire_centisecs - 30 секунд (максимальное время, которое записанные данные хранятся только в оперативной памяти), но при больших нагрузках на дисковую систему, данные могут быть записаны намного позже. Экспериментально было найдено время - 480 секунд, за которое гарантированно новый кусок будет записан на диск.
Во время запуска сервер ClickHouse проверяет целостность кусков.
Если новый (слитый) кусок поврежден, ClickHouse возвращает неактивные куски в список активных и позже снова выполняет слияние. В этом случае испорченный кусок получает новое имя (добавляется префикс `broken_`) и попадает в каталог `detached`.
Если проверка целостности не выявляет проблем в слитом куске, то исходные неактивные куски переименовываются (добавляется префикс `ignored_`) и перемещаются в каталог `detached`.
Стандартное для Linux значение `dirty_expire_centisecs` — 30 секунд. Это максимальное время, в течение которого записанные данные хранятся только в оперативной памяти. Если нагрузка на дисковую систему большая, то данные записываются намного позже. Значение 480 секунд подобрали экспериментальным путем — это время, за которое новый кусок гарантированно запишется на диск.
## replicated_fetches_http_connection_timeout {#replicated_fetches_http_connection_timeout}
@ -197,8 +203,8 @@ Eсли суммарное число активных кусков во все
## max_bytes_to_merge_at_max_space_in_pool {#max-bytes-to-merge-at-max-space-in-pool}
Максимальный суммарный размер кусков (в байтах) в одном слиянии, при наличии свободных ресурсов в фоновом пуле.
`max_bytes_to_merge_at_max_space_in_pool` -- примерно соответствует максимально возможному размеру куска, созданного автоматическим фоновым слиянием.
Максимальный суммарный размер кусков (в байтах) в одном слиянии, если есть свободные ресурсы в фоновом пуле.
`max_bytes_to_merge_at_max_space_in_pool` примерно соответствует максимально возможному размеру куска, созданного автоматическим фоновым слиянием.
Возможные значения:
@ -206,26 +212,27 @@ Eсли суммарное число активных кусков во все
Значение по умолчанию: 161061273600 (150ГБ).
Планировщик мержей периодически анализирует размер и количество кусков в партициях, и при достаточном количестве свободных ресурсов в фоновом пуле начинает фоновое слияние. Слияния происходят до тех пор, пока суммарный размер входных кусков не достигнет `max_bytes_to_merge_at_max_space_in_pool`.
Планировщик слияний периодически анализирует размер и количество кусков в партициях, и если в пуле хватает ресурсов, то начинает фоновое слияние. Слияния выполняются до тех пор, пока суммарный размер входных кусков не достигнет `max_bytes_to_merge_at_max_space_in_pool`.
Слияния, инициированные `optimize final`, не учитывают `max_bytes_to_merge_at_max_space_in_pool` и размеры кусков и слияют куски только с учетом наличия ресурсов в фоновом пуле, пока не останется один кусок в партиции.
Слияния, начатые по [OPTIMIZE FINAL](../../sql-reference/statements/optimize.md), не учитывают `max_bytes_to_merge_at_max_space_in_pool` и объединяют куски пока есть доступные ресурсы (свободное дисковое пространство) до тех пор, пока в партиции не останется один кусок.
## max_bytes_to_merge_at_min_space_in_pool {#max-bytes-to-merge-at-min-space-in-pool}
Максимальный суммарный размер кусков (в байтах) в одном слиянии, при минимальных свободных ресурсах в фоновом пуле.
Максимальный суммарный размер кусков (в байтах) в одном слиянии при минимуме свободных ресурсов в фоновом пуле.
Возможные значения:
- Положительное целое число.
Значение по умолчанию: 1048576
Значение по умолчанию: 1048576 (1 МБ).
`max_bytes_to_merge_at_min_space_in_pool` задает максимальный суммарный размер кусков, для которых можно начать слияние, несмотря на недостаток свободных ресурсов в фоновом пуле (дискового пространства). Это необходимо, чтобы уменьшить количество маленьких кусков и вероятность ошибки `Too many parts`.
Слияния резервируют дисковое пространство, удваивая суммарный размер кусков в слиянии. Таким образом, при малом количестве свободного места на диске может сложится ситуация, что свободное место есть, но оно уже зарезервировано идущими слиянияними, поэтому другие слияния не могут начаться, и количество маленьких кусков в партиции растет с каждым инсертом.
`max_bytes_to_merge_at_min_space_in_pool` задает максимальный суммарный размер кусков, которые можно объединить несмотря на нехватку свободных ресурсов (дискового пространства) в фоновом пуле. Это нужно, чтобы уменьшить количество маленьких кусков и снизить вероятность ошибки `Too many parts`.
Слияния резервируют дисковое пространство, удваивая суммарный размер кусков в слиянии. Поэтому при малом объеме свободного места на диске может сложиться ситуация, когда свободное место есть, но оно уже зарезервировано текущими слияниями. Из-за этого другие слияния не начинаются, и количество маленьких кусков в партиции растет с каждым запросом `INSERT`.
## merge_max_block_size {#merge-max-block-size}
Количество строк в блоках, которые читаются из слияемых кусков.
Количество строк в блоках, которые читаются из объединяемых кусков.
Возможные значения:
@ -233,7 +240,7 @@ Eсли суммарное число активных кусков во все
Значение по умолчанию: 8192
Слияние читает строки из кусков блоками по `merge_max_block_size` строк, производит слияние и пишет результат в новый кусок. Читаемый блок помещается в оперативную память, т.е. `merge_max_block_size` влияет на размер оперативной памяти, необходимой для слияния. Таким образом, слияния могут потреблять большое количество оперативной памяти для таблиц, хранящих очень большие строки (если средний размер строки 100кб, то при слиянии 10 кусков будет использовано (100кб * 10 * 8192) =~ 8ГБ ОЗУ). Уменьшив `merge_max_block_size`, можно сократить размер оперативной памяти, необходимой для слияния.
Слияние читает строки из кусков блоками по `merge_max_block_size` строк, производит слияние и записывает результат в новый кусок. Читаемый блок помещается в оперативную память, т.е. `merge_max_block_size` влияет на размер оперативной памяти, необходимой для слияния. Таким образом, слияния могут потреблять большое количество оперативной памяти для таблиц, хранящих очень большие строки (если средний размер строки 100кб, то при слиянии 10 кусков будет использовано (100кб * 10 * 8192) =~ 8ГБ оперативной памяти). Уменьшив `merge_max_block_size`, можно сократить размер оперативной памяти, необходимой для слияния, но при этом процесс слияния замедлится.
## max_part_loading_threads {#max-part-loading-threads}
@ -243,9 +250,9 @@ Eсли суммарное число активных кусков во все
- Положительное целое число.
Значение по умолчанию: auto (количество ядер процессора).
Значение по умолчанию: определяется автоматически (по количеству ядер процессора).
При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска).
На старте ClickHouse читает все куски из всех таблиц (читает файлы с метаданными кусков), чтобы построить в оперативной памяти список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время. Это время можно сократить, увеличив `max_part_loading_threads` (если при этом хватает ресурсов процессора и диска).
## max_partitions_to_read {#max-partitions-to-read}

View File

@ -2078,7 +2078,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
- Положительное целое число.
Значение по умолчанию: 16.
Значение по умолчанию: 128.
## background_fetches_pool_size {#background_fetches_pool_size}
@ -2376,18 +2376,6 @@ SELECT * FROM system.events WHERE event='QueryMemoryLimitExceeded';
└──────────────────────────┴───────┴───────────────────────────────────────────────────────┘
```
## allow_experimental_bigint_types {#allow_experimental_bigint_types}
Включает или отключает поддержку целочисленных значений, превышающих максимальное значение, допустимое для типа `int`.
Возможные значения:
- 1 — большие целочисленные значения поддерживаются.
- 0 — большие целочисленные значения не поддерживаются.
Значение по умолчанию: `0`.
## lock_acquire_timeout {#lock_acquire_timeout}
Устанавливает, сколько секунд сервер ожидает возможности выполнить блокировку таблицы.

View File

@ -2,7 +2,7 @@
Содержит экземпляры трассировки стека адресов вызова, собранные с помощью семплирующего профайлера запросов.
ClickHouse создает эту таблицу когда утсановлена настройка [trace_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) в конфигурационном файле сервереа. А также настройки [query_profiler_real_time_period_ns](../settings/settings.md#query_profiler_real_time_period_ns) и [query_profiler_cpu_time_period_ns](../settings/settings.md#query_profiler_cpu_time_period_ns).
ClickHouse создает эту таблицу когда установлена настройка [trace_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) в конфигурационном файле сервера. А также настройки [query_profiler_real_time_period_ns](../settings/settings.md#query_profiler_real_time_period_ns) и [query_profiler_cpu_time_period_ns](../settings/settings.md#query_profiler_cpu_time_period_ns).
Для анализа stack traces, используйте функции интроспекции `addressToLine`, `addressToSymbol` и `demangle`.

View File

@ -116,7 +116,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
- `.*` — соответствует любому количеству событий. Для этого элемента шаблона не надо задавать условия.
- `(?t operator value)` — устанавливает время в секундах, которое должно разделять два события. Например, шаблон `(?1)(?t>1800)(?2)` соответствует событиям, которые произошли более чем через 1800 секунд друг от друга. Между этими событиями может находиться произвольное количество любых событий. Операторы могут быть `>=`, `>`, `<`, `<=`.
- `(?t operator value)` — устанавливает время в секундах, которое должно разделять два события. Например, шаблон `(?1)(?t>1800)(?2)` соответствует событиям, которые произошли более чем через 1800 секунд друг от друга. Между этими событиями может находиться произвольное количество любых событий. Операторы могут быть `>=`, `>`, `<`, `<=`, `==`.
**Примеры**
@ -496,3 +496,258 @@ FROM
Решение: пишем в запросе GROUP BY SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5
```
## sequenceNextNode {#sequenceNextNode}
Возвращает значение следующего события, соответствующего цепочке событий.
_Экспериментальная функция, чтобы включить ее, выполните: `SET allow_experimental_funnel_functions = 1`._
**Синтаксис**
``` sql
sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event1, event2, event3, ...)
```
**Параметры**
- `direction` — используется для навигации по направлениям.
- forward — двигаться вперед.
- backward — двигаться назад.
- `base` — используется для задания начальной точки.
- head — установить начальную точку на первое событие цепочки.
- tail — установить начальную точку на последнее событие цепочки.
- first_match — установить начальную точку на первое соответствующее событие `event1`.
- last_match — установить начальную точку на последнее соответствующее событие `event1`.
**Аргументы**
- `timestamp` — название столбца, содержащего `timestamp`. Поддерживаемые типы данных: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) и другие беззнаковые целые типы.
- `event_column` — название столбца, содержащего значение следующего возвращаемого события. Поддерживаемые типы данных: [String](../../sql-reference/data-types/string.md) и [Nullable(String)](../../sql-reference/data-types/nullable.md).
- `base_condition` — условие, которому должна соответствовать исходная точка.
- `event1`, `event2`, ... — условия, описывающие цепочку событий. [UInt8](../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- `event_column[next_index]` — если есть совпадение с шаблоном и существует следующее значение.
- `NULL` — если нет совпадений с шаблоном или следующего значения не существует.
Тип: [Nullable(String)](../../sql-reference/data-types/nullable.md).
**Пример**
Функцию можно использовать, если есть цепочка событий A->B->C->D->E, и вы хотите определить событие, следующее за B->C, то есть D.
Запрос ищет событие после A->B:
``` sql
CREATE TABLE test_flow (
dt DateTime,
id int,
page String)
ENGINE = MergeTree()
PARTITION BY toYYYYMMDD(dt)
ORDER BY id;
INSERT INTO test_flow VALUES (1, 1, 'A') (2, 1, 'B') (3, 1, 'C') (4, 1, 'D') (5, 1, 'E');
SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'A', page = 'A', page = 'B') as next_flow FROM test_flow GROUP BY id;
```
Результат:
``` text
┌─id─┬─next_flow─┐
│ 1 │ C │
└────┴───────────┘
```
**Поведение для `forward` и `head`**
``` sql
ALTER TABLE test_flow DELETE WHERE 1 = 1 settings mutations_sync = 1;
INSERT INTO test_flow VALUES (1, 1, 'Home') (2, 1, 'Gift') (3, 1, 'Exit');
INSERT INTO test_flow VALUES (1, 2, 'Home') (2, 2, 'Home') (3, 2, 'Gift') (4, 2, 'Basket');
INSERT INTO test_flow VALUES (1, 3, 'Gift') (2, 3, 'Home') (3, 3, 'Gift') (4, 3, 'Basket');
```
``` sql
SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'Home', page = 'Home', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
1970-01-01 09:00:01 1 Home // Исходная точка, совпадение с Home
1970-01-01 09:00:02 1 Gift // Совпадение с Gift
1970-01-01 09:00:03 1 Exit // Результат
1970-01-01 09:00:01 2 Home // Исходная точка, совпадение с Home
1970-01-01 09:00:02 2 Home // Несовпадение с Gift
1970-01-01 09:00:03 2 Gift
1970-01-01 09:00:04 2 Basket
1970-01-01 09:00:01 3 Gift // Исходная точка, несовпадение с Home
1970-01-01 09:00:02 3 Home
1970-01-01 09:00:03 3 Gift
1970-01-01 09:00:04 3 Basket
```
**Поведение для `backward` и `tail`**
``` sql
SELECT id, sequenceNextNode('backward', 'tail')(dt, page, page = 'Basket', page = 'Basket', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
1970-01-01 09:00:01 1 Home
1970-01-01 09:00:02 1 Gift
1970-01-01 09:00:03 1 Exit // Исходная точка, несовпадение с Basket
1970-01-01 09:00:01 2 Home
1970-01-01 09:00:02 2 Home // Результат
1970-01-01 09:00:03 2 Gift // Совпадение с Gift
1970-01-01 09:00:04 2 Basket // Исходная точка, совпадение с Basket
1970-01-01 09:00:01 3 Gift
1970-01-01 09:00:02 3 Home // Результат
1970-01-01 09:00:03 3 Gift // Исходная точка, совпадение с Gift
1970-01-01 09:00:04 3 Basket // Исходная точка, совпадение с Basket
```
**Поведение для `forward` и `first_match`**
``` sql
SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
1970-01-01 09:00:01 1 Home
1970-01-01 09:00:02 1 Gift // Исходная точка
1970-01-01 09:00:03 1 Exit // Результат
1970-01-01 09:00:01 2 Home
1970-01-01 09:00:02 2 Home
1970-01-01 09:00:03 2 Gift // Исходная точка
1970-01-01 09:00:04 2 Basket Результат
1970-01-01 09:00:01 3 Gift // Исходная точка
1970-01-01 09:00:02 3 Home // Результат
1970-01-01 09:00:03 3 Gift
1970-01-01 09:00:04 3 Basket
```
``` sql
SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;
dt id page
1970-01-01 09:00:01 1 Home
1970-01-01 09:00:02 1 Gift // Исходная точка
1970-01-01 09:00:03 1 Exit // Несовпадение с Home
1970-01-01 09:00:01 2 Home
1970-01-01 09:00:02 2 Home
1970-01-01 09:00:03 2 Gift // Исходная точка
1970-01-01 09:00:04 2 Basket // Несовпадение с Home
1970-01-01 09:00:01 3 Gift // Исходная точка
1970-01-01 09:00:02 3 Home // Совпадение с Home
1970-01-01 09:00:03 3 Gift // Результат
1970-01-01 09:00:04 3 Basket
```
**Поведение для `backward` и `last_match`**
``` sql
SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;
dt id page
1970-01-01 09:00:01 1 Home // Результат
1970-01-01 09:00:02 1 Gift // Исходная точка
1970-01-01 09:00:03 1 Exit
1970-01-01 09:00:01 2 Home
1970-01-01 09:00:02 2 Home // Результат
1970-01-01 09:00:03 2 Gift // Исходная точка
1970-01-01 09:00:04 2 Basket
1970-01-01 09:00:01 3 Gift
1970-01-01 09:00:02 3 Home // Результат
1970-01-01 09:00:03 3 Gift // Исходная точка
1970-01-01 09:00:04 3 Basket
```
``` sql
SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;
dt id page
1970-01-01 09:00:01 1 Home // Совпадение с Home, результат `Null`
1970-01-01 09:00:02 1 Gift // Исходная точка
1970-01-01 09:00:03 1 Exit
1970-01-01 09:00:01 2 Home // Результат
1970-01-01 09:00:02 2 Home // Совпадение с Home
1970-01-01 09:00:03 2 Gift // Исходная точка
1970-01-01 09:00:04 2 Basket
1970-01-01 09:00:01 3 Gift // Результат
1970-01-01 09:00:02 3 Home // Совпадение с Home
1970-01-01 09:00:03 3 Gift // Исходная точка
1970-01-01 09:00:04 3 Basket
```
**Поведение для `base_condition`**
``` sql
CREATE TABLE test_flow_basecond
(
`dt` DateTime,
`id` int,
`page` String,
`ref` String
)
ENGINE = MergeTree
PARTITION BY toYYYYMMDD(dt)
ORDER BY id;
INSERT INTO test_flow_basecond VALUES (1, 1, 'A', 'ref4') (2, 1, 'A', 'ref3') (3, 1, 'B', 'ref2') (4, 1, 'B', 'ref1');
```
``` sql
SELECT id, sequenceNextNode('forward', 'head')(dt, page, ref = 'ref1', page = 'A') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4 // Начало не может быть исходной точкой, поскольку столбец ref не соответствует 'ref1'.
1970-01-01 09:00:02 1 A ref3
1970-01-01 09:00:03 1 B ref2
1970-01-01 09:00:04 1 B ref1
```
``` sql
SELECT id, sequenceNextNode('backward', 'tail')(dt, page, ref = 'ref4', page = 'B') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4
1970-01-01 09:00:02 1 A ref3
1970-01-01 09:00:03 1 B ref2
1970-01-01 09:00:04 1 B ref1 // Конец не может быть исходной точкой, поскольку столбец ref не соответствует 'ref4'.
```
``` sql
SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, ref = 'ref3', page = 'A') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4 // Эта строка не может быть исходной точкой, поскольку столбец ref не соответствует 'ref3'.
1970-01-01 09:00:02 1 A ref3 // Исходная точка
1970-01-01 09:00:03 1 B ref2 // Результат
1970-01-01 09:00:04 1 B ref1
```
``` sql
SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, ref = 'ref2', page = 'B') FROM test_flow_basecond GROUP BY id;
dt id page ref
1970-01-01 09:00:01 1 A ref4
1970-01-01 09:00:02 1 A ref3 // Результат
1970-01-01 09:00:03 1 B ref2 // Исходная точка
1970-01-01 09:00:04 1 B ref1 // Эта строка не может быть исходной точкой, поскольку столбец ref не соответствует 'ref2'.
```

View File

@ -21,7 +21,7 @@ toc_priority: 208
quantileTDigestWeighted(level)(expr, weight)
```
Алиас: `medianTDigest`.
Синоним: `medianTDigestWeighted`.
**Аргументы**

View File

@ -189,6 +189,23 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV
└─────────────────────┴────────────────────────────────────────────────────────────┘
```
Вы можете изменить дату, не используя синтаксис `INTERVAL`, а просто добавив или отняв секунды, минуты и часы. Например, чтобы передвинуть дату на один день вперед, можно прибавить к ней значение `60*60*24`.
!!! note "Примечание"
Синтаксис `INTERVAL` или функция `addDays` предпочтительнее для работы с датами. Сложение с числом (например, синтаксис `now() + ...`) не учитывает региональные настройки времени, например, переход на летнее время.
Пример:
``` sql
SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 60 * 24 AS time_plus_24_hours, time + toIntervalDay(1) AS time_plus_1_day;
```
``` text
┌────────────────time─┬──time_plus_24_hours─┬─────time_plus_1_day─┐
│ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │
└─────────────────────┴─────────────────────┴─────────────────────┘
```
**Смотрите также**
- Тип данных [Interval](../../sql-reference/operators/index.md)
@ -296,4 +313,3 @@ SELECT * FROM t_null WHERE y IS NOT NULL
│ 2 │ 3 │
└───┴───┘
```

View File

@ -362,6 +362,79 @@ SHOW [CURRENT] QUOTA
SHOW ACCESS
```
## SHOW CLUSTER(s) {#show-cluster-statement}
Возвращает список кластеров. Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md).
!!! info "Note"
По запросу `SHOW CLUSTER name` вы получите содержимое таблицы system.clusters для этого кластера.
### Синтаксис {#show-cluster-syntax}
``` sql
SHOW CLUSTER '<name>'
SHOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
```
### Примеры {#show-cluster-examples}
Запрос:
``` sql
SHOW CLUSTERS;
```
Результат:
```text
┌─cluster──────────────────────────────────────┐
│ test_cluster_two_shards │
│ test_cluster_two_shards_internal_replication │
│ test_cluster_two_shards_localhost │
│ test_shard_localhost │
│ test_shard_localhost_secure │
│ test_unavailable_shard │
└──────────────────────────────────────────────┘
```
Запрос:
``` sql
SHOW CLUSTERS LIKE 'test%' LIMIT 1;
```
Результат:
```text
┌─cluster─────────────────┐
│ test_cluster_two_shards │
└─────────────────────────┘
```
Запрос:
``` sql
SHOW CLUSTER 'test_shard_localhost' FORMAT Vertical;
```
Результат:
```text
Row 1:
──────
cluster: test_shard_localhost
shard_num: 1
shard_weight: 1
replica_num: 1
host_name: localhost
host_address: 127.0.0.1
port: 9000
is_local: 1
user: default
default_database:
errors_count: 0
estimated_recovery_time: 0
```
## SHOW SETTINGS {#show-settings}
Возвращает список системных настроек и их значений. Использует данные из таблицы [system.settings](../../operations/system-tables/settings.md).
@ -426,4 +499,3 @@ SHOW CHANGED SETTINGS ILIKE '%MEMORY%'
**См. также**
- Таблица [system.settings](../../operations/system-tables/settings.md)

View File

@ -0,0 +1,145 @@
---
toc_priority: 11
toc_title: PostgreSQL
---
# PostgreSQL {#postgresql}
PostgreSQL 引擎允许 ClickHouse 对存储在远程 PostgreSQL 服务器上的数据执行 `SELECT``INSERT` 查询.
## 创建一张表 {#creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
...
) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
```
<!-- 详情请见 [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) 查询. -->
表结构可以与 PostgreSQL 源表结构不同:
- 列名应与 PostgreSQL 源表中的列名相同,但您可以按任何顺序使用其中的一些列。
- 列类型可能与源表中的列类型不同。 ClickHouse尝试将数值[映射](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) 到ClickHouse的数据类型。
- 设置 `external_table_functions_use_nulls` 来定义如何处理 Nullable 列. 默认值是 1, 当设置为 0 时 - 表函数将不会使用 nullable 列,而是插入默认值来代替 null. 这同样适用于数组数据类型中的 null 值.
**引擎参数**
- `host:port` — PostgreSQL 服务器地址.
- `database` — 数据库名称.
- `table` — 表名称.
- `user` — PostgreSQL 用户.
- `password` — 用户密码.
- `schema` — Non-default table schema. 可选.
## 实施细节 {#implementation-details}
在 PostgreSQL 上的 `SELECT` 查询以 `COPY (SELECT ...) TO STDOUT` 的方式在只读 PostgreSQL 事务中运行,每次 `SELECT` 查询后提交。
简单的 `WHERE` 子句,如`=``=``>``>=``<``<=`,和`IN`是在PostgreSQL 服务器上执行。
所有的连接、聚合、排序、`IN [ array ]`条件和`LIMIT`采样约束都是在 PostgreSQL 的查询结束后才在ClickHouse中执行的。
在 PostgreSQL 上的 `INSERT` 查询以 `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` 的方式在 PostgreSQL 事务中运行,每条 `INSERT` 语句后自动提交。
PostgreSQL 的 `Array` 类型会被转换为 ClickHouse 数组。
!!! info "Note"
要小心 - 一个在 PostgreSQL 中的数组数据,像`type_name[]`这样创建,可以在同一列的不同表行中包含不同维度的多维数组。但是在 ClickHouse 中,只允许在同一列的所有表行中包含相同维数的多维数组。
支持设置 PostgreSQL 字典源中 Replicas 的优先级。地图中的数字越大,优先级就越低。最高的优先级是 `0`
在下面的例子中,副本`example01-1`有最高的优先级。
```xml
<postgresql>
<port>5432</port>
<user>clickhouse</user>
<password>qwerty</password>
<replica>
<host>example01-1</host>
<priority>1</priority>
</replica>
<replica>
<host>example01-2</host>
<priority>2</priority>
</replica>
<db>db_name</db>
<table>table_name</table>
<where>id=10</where>
<invalidate_query>SQL_QUERY</invalidate_query>
</postgresql>
</source>
```
## 用法示例 {#usage-example}
PostgreSQL 中的表:
``` text
postgres=# CREATE TABLE "public"."test" (
"int_id" SERIAL,
"int_nullable" INT NULL DEFAULT NULL,
"float" FLOAT NOT NULL,
"str" VARCHAR(100) NOT NULL DEFAULT '',
"float_nullable" FLOAT NULL DEFAULT NULL,
PRIMARY KEY (int_id));
CREATE TABLE
postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
ClickHouse 中的表, 从上面创建的 PostgreSQL 表中检索数据:
``` sql
CREATE TABLE default.postgresql_table
(
`float_nullable` Nullable(Float32),
`str` String,
`int_id` Int32
)
ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
```
``` sql
SELECT * FROM postgresql_table WHERE str IN ('test');
```
``` text
┌─float_nullable─┬─str──┬─int_id─┐
│ ᴺᵁᴸᴸ │ test │ 1 │
└────────────────┴──────┴────────┘
```
使用非默认的模式:
```text
postgres=# CREATE SCHEMA "nice.schema";
postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
```
```sql
CREATE TABLE pg_table_schema_with_dots (a UInt32)
ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
```
**另请参阅**
<!-- - [`postgresql` 表函数](../../../sql-reference/table-functions/postgresql.md) -->
- [使用 PostgreSQL 作为外部字典的来源](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/postgresql/) <!--hide-->

View File

@ -112,7 +112,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
- `.*` — 匹配任何事件的数字。 不需要条件参数来匹配这个模式。
- `(?t operator value)` — 分开两个事件的时间。 例如: `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=` 运算符。
- `(?t operator value)` — 分开两个事件的时间。 例如: `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=`, `==` 运算符。
**例**

View File

@ -21,7 +21,7 @@
#include <unordered_set>
#include <algorithm>
#include <optional>
#include <ext/scope_guard_safe.h>
#include <common/scope_guard_safe.h>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <Poco/String.h>
@ -549,65 +549,6 @@ private:
/// Initialize DateLUT here to avoid counting time spent here as query execution time.
const auto local_tz = DateLUT::instance().getTimeZone();
if (!context->getSettingsRef().use_client_time_zone)
{
const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
if (!time_zone.empty())
{
try
{
DateLUT::setDefaultTimezone(time_zone);
}
catch (...)
{
std::cerr << "Warning: could not switch to server time zone: " << time_zone
<< ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl
<< "Proceeding with local time zone." << std::endl
<< std::endl;
}
}
else
{
std::cerr << "Warning: could not determine server time zone. "
<< "Proceeding with local time zone." << std::endl
<< std::endl;
}
}
Strings keys;
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
config().keys("prompt_by_server_display_name", keys);
for (const String & key : keys)
{
if (key != "default" && server_display_name.find(key) != std::string::npos)
{
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name." + key);
break;
}
}
/// Prompt may contain escape sequences including \e[ or \x1b[ sequences to set terminal color.
{
String unescaped_prompt_by_server_display_name;
ReadBufferFromString in(prompt_by_server_display_name);
readEscapedString(unescaped_prompt_by_server_display_name, in);
prompt_by_server_display_name = std::move(unescaped_prompt_by_server_display_name);
}
/// Prompt may contain the following substitutions in a form of {name}.
std::map<String, String> prompt_substitutions{
{"host", connection_parameters.host},
{"port", toString(connection_parameters.port)},
{"user", connection_parameters.user},
{"display_name", server_display_name},
};
/// Quite suboptimal.
for (const auto & [key, value] : prompt_substitutions)
boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);
if (is_interactive)
{
@ -805,6 +746,66 @@ private:
<< std::endl;
}
}
if (!context->getSettingsRef().use_client_time_zone)
{
const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
if (!time_zone.empty())
{
try
{
DateLUT::setDefaultTimezone(time_zone);
}
catch (...)
{
std::cerr << "Warning: could not switch to server time zone: " << time_zone
<< ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl
<< "Proceeding with local time zone." << std::endl
<< std::endl;
}
}
else
{
std::cerr << "Warning: could not determine server time zone. "
<< "Proceeding with local time zone." << std::endl
<< std::endl;
}
}
Strings keys;
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
config().keys("prompt_by_server_display_name", keys);
for (const String & key : keys)
{
if (key != "default" && server_display_name.find(key) != std::string::npos)
{
prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name." + key);
break;
}
}
/// Prompt may contain escape sequences including \e[ or \x1b[ sequences to set terminal color.
{
String unescaped_prompt_by_server_display_name;
ReadBufferFromString in(prompt_by_server_display_name);
readEscapedString(unescaped_prompt_by_server_display_name, in);
prompt_by_server_display_name = std::move(unescaped_prompt_by_server_display_name);
}
/// Prompt may contain the following substitutions in a form of {name}.
std::map<String, String> prompt_substitutions{
{"host", connection_parameters.host},
{"port", toString(connection_parameters.port)},
{"user", connection_parameters.user},
{"display_name", server_display_name},
};
/// Quite suboptimal.
for (const auto & [key, value] : prompt_substitutions)
boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);
}
@ -1202,7 +1203,9 @@ private:
client_exception.reset();
server_exception.reset();
have_error = false;
connection->forceConnected(connection_parameters.timeouts);
if (!connection->checkConnected())
connect();
}
// Report error.
@ -1603,7 +1606,8 @@ private:
if (with_output && with_output->settings_ast)
apply_query_settings(*with_output->settings_ast);
connection->forceConnected(connection_parameters.timeouts);
if (!connection->checkConnected())
connect();
ASTPtr input_function;
if (insert && insert->select)

View File

@ -10,7 +10,7 @@
#include <Common/isLocalAddress.h>
#include <Common/DNSResolver.h>
#include <common/setTerminalEcho.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#if !defined(ARCADIA_BUILD)
#include <readpassphrase.h> // Y_IGNORE

View File

@ -463,9 +463,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
}
else if (auto * table_expr = typeid_cast<ASTTableExpression *>(ast.get()))
{
fuzz(table_expr->database_and_table_name);
fuzz(table_expr->subquery);
fuzz(table_expr->table_function);
fuzz(table_expr->children);
}
else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
{

View File

@ -1746,7 +1746,7 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
task_shard.table_read_shard = DatabaseAndTableName(working_database_name, read_shard_prefix + task_table.table_id);
task_shard.main_table_split_shard = DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id);
for (const auto & piece_number : ext::range(0, task_table.number_of_splits))
for (const auto & piece_number : collections::range(0, task_table.number_of_splits))
{
task_shard.list_of_split_tables_on_shard[piece_number] =
DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id + "_piece_" + toString(piece_number));
@ -1776,7 +1776,7 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
dropAndCreateLocalTable(create_table_split_piece_ast);
/// Create auxiliary split tables for each piece
for (const auto & piece_number : ext::range(0, task_table.number_of_splits))
for (const auto & piece_number : collections::range(0, task_table.number_of_splits))
{
const auto & storage_piece_split_ast = task_table.auxiliary_engine_split_asts[piece_number];

View File

@ -3,7 +3,7 @@
#include <Common/TerminalSize.h>
#include <IO/ConnectionTimeoutsContext.h>
#include <Formats/registerFormats.h>
#include <ext/scope_guard_safe.h>
#include <common/scope_guard_safe.h>
#include <unistd.h>
#include <filesystem>

View File

@ -6,7 +6,7 @@
#include <Core/Defines.h>
#include <ext/map.h>
#include <common/map.h>
#include <boost/algorithm/string/join.hpp>
@ -305,7 +305,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
sharding_key_ast);
for (const auto piece_number : ext::range(0, number_of_splits))
for (const auto piece_number : collections::range(0, number_of_splits))
{
auxiliary_engine_split_asts.emplace_back
(

View File

@ -75,6 +75,9 @@ namespace ErrorCodes
#define HILITE "\033[1m"
#define END_HILITE "\033[0m"
static constexpr auto CLICKHOUSE_BRIDGE_USER = "clickhouse-bridge";
static constexpr auto CLICKHOUSE_BRIDGE_GROUP = "clickhouse-bridge";
using namespace DB;
namespace po = boost::program_options;
namespace fs = std::filesystem;
@ -150,7 +153,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
<< argv[0]
<< " install [options]\n";
std::cout << desc << '\n';
return 1;
}
try
@ -324,26 +326,34 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
std::string user = options["user"].as<std::string>();
std::string group = options["group"].as<std::string>();
auto create_group = [](const String & group_name)
{
std::string command = fmt::format("groupadd -r {}", group_name);
fmt::print(" {}\n", command);
executeScript(command);
};
if (!group.empty())
{
{
fmt::print("Creating clickhouse group if it does not exist.\n");
std::string command = fmt::format("groupadd -r {}", group);
fmt::print(" {}\n", command);
executeScript(command);
}
fmt::print("Creating clickhouse group if it does not exist.\n");
create_group(group);
}
else
fmt::print("Will not create clickhouse group");
auto create_user = [](const String & user_name, const String & group_name)
{
std::string command = group_name.empty()
? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user_name)
: fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group_name, user_name);
fmt::print(" {}\n", command);
executeScript(command);
};
if (!user.empty())
{
fmt::print("Creating clickhouse user if it does not exist.\n");
std::string command = group.empty()
? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user)
: fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group, user);
fmt::print(" {}\n", command);
executeScript(command);
create_user(user, group);
if (group.empty())
group = user;
@ -475,12 +485,15 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
}
}
/// Chmod and chown configs
auto change_ownership = [](const String & file_name, const String & user_name, const String & group_name)
{
std::string command = fmt::format("chown --recursive {}:{} '{}'", user, group, config_dir.string());
std::string command = fmt::format("chown --recursive {}:{} '{}'", user_name, group_name, file_name);
fmt::print(" {}\n", command);
executeScript(command);
}
};
/// Chmod and chown configs
change_ownership(config_dir.string(), user, group);
/// Symlink "preprocessed_configs" is created by the server, so "write" is needed.
fs::permissions(config_dir, fs::perms::owner_all, fs::perm_options::replace);
@ -558,7 +571,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// Data directory is not accessible to anyone except clickhouse.
fs::permissions(data_path, fs::perms::owner_all, fs::perm_options::replace);
/// Set up password for default user.
fs::path odbc_bridge_path = bin_dir / "clickhouse-odbc-bridge";
fs::path library_bridge_path = bin_dir / "clickhouse-library-bridge";
if (fs::exists(odbc_bridge_path) || fs::exists(library_bridge_path))
{
create_group(CLICKHOUSE_BRIDGE_GROUP);
create_user(CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP);
if (fs::exists(odbc_bridge_path))
change_ownership(odbc_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP);
if (fs::exists(library_bridge_path))
change_ownership(library_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP);
}
bool stdin_is_a_tty = isatty(STDIN_FILENO);
bool stdout_is_a_tty = isatty(STDOUT_FILENO);
@ -573,6 +598,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
/// We can ask password even if stdin is closed/redirected but /dev/tty is available.
bool can_ask_password = !noninteractive && stdout_is_a_tty;
/// Set up password for default user.
if (has_password_for_default_user)
{
fmt::print(HILITE "Password for default user is already specified. To remind or reset, see {} and {}." END_HILITE "\n",
@ -819,15 +845,25 @@ namespace
if (fs::exists(pid_file))
{
ReadBufferFromFile in(pid_file.string());
if (tryReadIntText(pid, in))
try
{
fmt::print("{} file exists and contains pid = {}.\n", pid_file.string(), pid);
ReadBufferFromFile in(pid_file.string());
if (tryReadIntText(pid, in))
{
fmt::print("{} file exists and contains pid = {}.\n", pid_file.string(), pid);
}
else
{
fmt::print("{} file exists but damaged, ignoring.\n", pid_file.string());
fs::remove(pid_file);
}
}
else
catch (const Exception & e)
{
fmt::print("{} file exists but damaged, ignoring.\n", pid_file.string());
fs::remove(pid_file);
if (e.code() != ErrorCodes::FILE_DOESNT_EXIST)
throw;
/// If file does not exist (TOCTOU) - it's ok.
}
}

View File

@ -12,7 +12,7 @@
#include <common/defines.h>
#include <common/logger_useful.h>
#include <common/ErrorHandlers.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <Poco/Util/HelpFormatter.h>
#include <Poco/Version.h>
#include <Poco/Environment.h>

View File

@ -51,7 +51,7 @@ namespace
void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
LOG_TRACE(log, "Request URI: {}", request.getURI());
HTMLForm params(request);
HTMLForm params(getContext()->getSettingsRef(), request);
if (!params.has("method"))
{

View File

@ -2,8 +2,8 @@
#include <Common/StringUtils/StringUtils.h>
#include <Core/Block.h>
#include <ext/bit_cast.h>
#include <ext/range.h>
#include <common/bit_cast.h>
#include <common/range.h>
#include "LibraryInterface.h"

View File

@ -1,8 +1,9 @@
#include "SharedLibraryHandler.h"
#include <ext/scope_guard.h>
#include <IO/ReadHelpers.h>
#include <common/scope_guard.h>
#include <common/bit_cast.h>
#include <common/find_symbols.h>
#include <IO/ReadHelpers.h>
namespace DB
@ -114,7 +115,7 @@ BlockInputStreamPtr SharedLibraryHandler::loadAll()
BlockInputStreamPtr SharedLibraryHandler::loadIds(const std::vector<uint64_t> & ids)
{
const ClickHouseLibrary::VectorUInt64 ids_data{ext::bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()};
const ClickHouseLibrary::VectorUInt64 ids_data{bit_cast<decltype(ClickHouseLibrary::VectorUInt64::data)>(ids.data()), ids.size()};
auto columns_holder = std::make_unique<ClickHouseLibrary::CString[]>(attributes_names.size());
ClickHouseLibrary::CStrings columns_pass{static_cast<decltype(ClickHouseLibrary::CStrings::data)>(columns_holder.get()), attributes_names.size()};

View File

@ -22,7 +22,7 @@
#include <Common/IO.h>
#include <common/phdr_cache.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
/// Universal executable for various clickhouse applications

View File

@ -27,9 +27,9 @@
#include <Core/Block.h>
#include <common/StringRef.h>
#include <common/DateLUT.h>
#include <common/bit_cast.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <ext/bit_cast.h>
#include <memory>
#include <cmath>
#include <unistd.h>
@ -254,9 +254,9 @@ Float transformFloatMantissa(Float x, UInt64 seed)
using UInt = std::conditional_t<std::is_same_v<Float, Float32>, UInt32, UInt64>;
constexpr size_t mantissa_num_bits = std::is_same_v<Float, Float32> ? 23 : 52;
UInt x_uint = ext::bit_cast<UInt>(x);
UInt x_uint = bit_cast<UInt>(x);
x_uint = feistelNetwork(x_uint, mantissa_num_bits, seed);
return ext::bit_cast<Float>(x_uint);
return bit_cast<Float>(x_uint);
}

View File

@ -13,8 +13,8 @@
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/NumberParser.h>
#include <common/logger_useful.h>
#include <common/scope_guard.h>
#include <Common/quoteString.h>
#include <ext/scope_guard.h>
#include "getIdentifierQuote.h"
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
@ -69,7 +69,7 @@ namespace
void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request, request.getStream());
HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
LOG_TRACE(log, "Request URI: {}", request.getURI());
auto process_error = [&response, this](const std::string & message)

View File

@ -11,7 +11,7 @@
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <common/logger_useful.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include "getIdentifierQuote.h"
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
@ -21,7 +21,7 @@ namespace DB
{
void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request, request.getStream());
HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
LOG_TRACE(log, "Request URI: {}", request.getURI());
auto process_error = [&response, this](const std::string & message)

View File

@ -50,7 +50,7 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string
void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request);
HTMLForm params(getContext()->getSettingsRef(), request);
LOG_TRACE(log, "Request URI: {}", request.getURI());
if (mode == "read")

View File

@ -9,7 +9,6 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <common/logger_useful.h>
#include <ext/range.h>
namespace DB

View File

@ -29,7 +29,7 @@ namespace
void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request, request.getStream());
HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
LOG_TRACE(log, "Request URI: {}", request.getURI());
auto process_error = [&response, this](const std::string & message)

View File

@ -14,7 +14,7 @@
#include <Poco/Net/NetException.h>
#include <Poco/Util/HelpFormatter.h>
#include <Poco/Environment.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <common/defines.h>
#include <common/logger_useful.h>
#include <common/phdr_cache.h>

View File

@ -2,9 +2,8 @@
#include <Access/AccessType.h>
#include <common/types.h>
#include <common/range.h>
#include <Common/Exception.h>
#include <ext/range.h>
#include <ext/push_back.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <bitset>

View File

@ -102,6 +102,9 @@ enum class AccessType
M(KILL_QUERY, "", GLOBAL, ALL) /* allows to kill a query started by another user
(anyone can kill his own queries) */\
\
M(MOVE_PARTITION_BETWEEN_SHARDS, "", GLOBAL, ALL) /* required to be able to move a part/partition to a table
identified by it's ZooKeeper path */\
\
M(CREATE_USER, "", GLOBAL, ACCESS_MANAGEMENT) \
M(ALTER_USER, "", GLOBAL, ACCESS_MANAGEMENT) \
M(DROP_USER, "", GLOBAL, ACCESS_MANAGEMENT) \

View File

@ -1,11 +1,11 @@
#include <Access/AllowedClientHosts.h>
#include <Common/Exception.h>
#include <common/SimpleCache.h>
#include <common/logger_useful.h>
#include <common/scope_guard.h>
#include <Functions/likePatternToRegexp.h>
#include <Poco/Net/SocketAddress.h>
#include <Poco/RegularExpression.h>
#include <common/logger_useful.h>
#include <ext/scope_guard.h>
#include <boost/algorithm/string/replace.hpp>
#include <ifaddrs.h>
#include <Common/DNSResolver.h>

View File

@ -4,8 +4,8 @@
#include <Access/RowPolicy.h>
#include <Interpreters/ClientInfo.h>
#include <Core/UUID.h>
#include <ext/scope_guard.h>
#include <ext/shared_ptr_helper.h>
#include <common/scope_guard.h>
#include <common/shared_ptr_helper.h>
#include <boost/container/flat_set.hpp>
#include <mutex>
@ -214,9 +214,9 @@ private:
mutable Poco::Logger * trace_log = nullptr;
mutable UserPtr user;
mutable String user_name;
mutable ext::scope_guard subscription_for_user_change;
mutable scope_guard subscription_for_user_change;
mutable std::shared_ptr<const EnabledRoles> enabled_roles;
mutable ext::scope_guard subscription_for_roles_changes;
mutable scope_guard subscription_for_roles_changes;
mutable std::shared_ptr<const EnabledRolesInfo> roles_info;
mutable std::shared_ptr<const AccessRights> access;
mutable std::shared_ptr<const AccessRights> access_with_implicit;

View File

@ -374,7 +374,7 @@ bool DiskAccessStorage::isPathEqual(const String & directory_path_) const
void DiskAccessStorage::clear()
{
entries_by_id.clear();
for (auto type : ext::range(EntityType::MAX))
for (auto type : collections::range(EntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
}
@ -384,7 +384,7 @@ bool DiskAccessStorage::readLists()
clear();
bool ok = true;
for (auto type : ext::range(EntityType::MAX))
for (auto type : collections::range(EntityType::MAX))
{
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
auto file_path = getListFilePath(directory_path, type);
@ -543,7 +543,7 @@ bool DiskAccessStorage::rebuildLists()
entries_by_name[entry.name] = &entry;
}
for (auto type : ext::range(EntityType::MAX))
for (auto type : collections::range(EntityType::MAX))
types_of_lists_to_write.insert(type);
return true;
@ -786,7 +786,7 @@ void DiskAccessStorage::prepareNotifications(const UUID & id, const Entry & entr
}
ext::scope_guard DiskAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
scope_guard DiskAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
{
std::lock_guard lock{mutex};
auto it = entries_by_id.find(id);
@ -807,7 +807,7 @@ ext::scope_guard DiskAccessStorage::subscribeForChangesImpl(const UUID & id, con
};
}
ext::scope_guard DiskAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
scope_guard DiskAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
{
std::lock_guard lock{mutex};
auto & handlers = handlers_by_type[static_cast<size_t>(type)];

View File

@ -36,8 +36,8 @@ private:
UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override;
void removeImpl(const UUID & id) override;
void updateImpl(const UUID & id, const UpdateFunc & update_func) override;
ext::scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
bool hasSubscriptionImpl(const UUID & id) const override;
bool hasSubscriptionImpl(EntityType type) const override;

View File

@ -2,8 +2,8 @@
#include <Access/QuotaUsage.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <ext/chrono_io.h>
#include <ext/range.h>
#include <common/chrono_io.h>
#include <common/range.h>
#include <boost/smart_ptr/make_shared.hpp>
#include <boost/range/algorithm/fill.hpp>
@ -28,9 +28,9 @@ struct EnabledQuota::Impl
{
const auto & type_info = Quota::ResourceTypeInfo::get(resource_type);
throw Exception(
"Quota for user " + backQuote(user_name) + " for " + ext::to_string(duration) + " has been exceeded: "
"Quota for user " + backQuote(user_name) + " for " + to_string(duration) + " has been exceeded: "
+ type_info.outputWithAmount(used) + "/" + type_info.amountToString(max) + ". "
+ "Interval will end at " + ext::to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name),
+ "Interval will end at " + to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name),
ErrorCodes::QUOTA_EXPIRED);
}
@ -137,7 +137,7 @@ struct EnabledQuota::Impl
const Intervals & intervals,
std::chrono::system_clock::time_point current_time)
{
for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE))
for (auto resource_type : collections::range(Quota::MAX_RESOURCE_TYPE))
checkExceeded(user_name, intervals, resource_type, current_time);
}
};
@ -145,7 +145,7 @@ struct EnabledQuota::Impl
EnabledQuota::Interval::Interval()
{
for (auto resource_type : ext::range(MAX_RESOURCE_TYPE))
for (auto resource_type : collections::range(MAX_RESOURCE_TYPE))
{
used[resource_type].store(0);
max[resource_type] = 0;
@ -161,7 +161,7 @@ EnabledQuota::Interval & EnabledQuota::Interval::operator =(const Interval & src
randomize_interval = src.randomize_interval;
duration = src.duration;
end_of_interval.store(src.end_of_interval.load());
for (auto resource_type : ext::range(MAX_RESOURCE_TYPE))
for (auto resource_type : collections::range(MAX_RESOURCE_TYPE))
{
max[resource_type] = src.max[resource_type];
used[resource_type].store(src.used[resource_type].load());
@ -187,7 +187,7 @@ std::optional<QuotaUsage> EnabledQuota::Intervals::getUsage(std::chrono::system_
out.randomize_interval = in.randomize_interval;
bool counters_were_reset = false;
out.end_of_interval = Impl::getEndOfInterval(in, current_time, counters_were_reset);
for (auto resource_type : ext::range(MAX_RESOURCE_TYPE))
for (auto resource_type : collections::range(MAX_RESOURCE_TYPE))
{
if (in.max[resource_type])
out.max[resource_type] = in.max[resource_type];

View File

@ -20,7 +20,7 @@ std::shared_ptr<const EnabledRolesInfo> EnabledRoles::getRolesInfo() const
}
ext::scope_guard EnabledRoles::subscribeForChanges(const OnChangeHandler & handler) const
scope_guard EnabledRoles::subscribeForChanges(const OnChangeHandler & handler) const
{
std::lock_guard lock{mutex};
handlers.push_back(handler);
@ -34,7 +34,7 @@ ext::scope_guard EnabledRoles::subscribeForChanges(const OnChangeHandler & handl
}
void EnabledRoles::setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, ext::scope_guard & notifications)
void EnabledRoles::setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, scope_guard & notifications)
{
std::lock_guard lock{mutex};
@ -46,7 +46,7 @@ void EnabledRoles::setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> &
std::vector<OnChangeHandler> handlers_to_notify;
boost::range::copy(handlers, std::back_inserter(handlers_to_notify));
notifications.join(ext::scope_guard([info = info, handlers_to_notify = std::move(handlers_to_notify)]
notifications.join(scope_guard([info = info, handlers_to_notify = std::move(handlers_to_notify)]
{
for (const auto & handler : handlers_to_notify)
handler(info);

View File

@ -1,7 +1,7 @@
#pragma once
#include <Core/UUID.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <list>
#include <mutex>
@ -37,13 +37,13 @@ public:
using OnChangeHandler = std::function<void(const std::shared_ptr<const EnabledRolesInfo> & info)>;
/// Called when either the specified roles or the roles granted to the specified roles are changed.
ext::scope_guard subscribeForChanges(const OnChangeHandler & handler) const;
scope_guard subscribeForChanges(const OnChangeHandler & handler) const;
private:
friend class RoleCache;
EnabledRoles(const Params & params_);
void setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, ext::scope_guard & notifications);
void setRolesInfo(const std::shared_ptr<const EnabledRolesInfo> & info_, scope_guard & notifications);
const Params params;
mutable std::shared_ptr<const EnabledRolesInfo> info;

View File

@ -1,6 +1,6 @@
#include <Access/GSSAcceptor.h>
#include <Common/Exception.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <Poco/StringTokenizer.h>

View File

@ -377,21 +377,21 @@ std::vector<UUID> IAccessStorage::tryUpdate(const std::vector<UUID> & ids, const
}
ext::scope_guard IAccessStorage::subscribeForChanges(EntityType type, const OnChangedHandler & handler) const
scope_guard IAccessStorage::subscribeForChanges(EntityType type, const OnChangedHandler & handler) const
{
return subscribeForChangesImpl(type, handler);
}
ext::scope_guard IAccessStorage::subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const
scope_guard IAccessStorage::subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const
{
return subscribeForChangesImpl(id, handler);
}
ext::scope_guard IAccessStorage::subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const
scope_guard IAccessStorage::subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const
{
ext::scope_guard subscriptions;
scope_guard subscriptions;
for (const auto & id : ids)
subscriptions.join(subscribeForChangesImpl(id, handler));
return subscriptions;

View File

@ -3,7 +3,7 @@
#include <Access/IAccessEntity.h>
#include <Core/Types.h>
#include <Core/UUID.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <functional>
#include <optional>
#include <vector>
@ -130,15 +130,15 @@ public:
/// Subscribes for all changes.
/// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only).
ext::scope_guard subscribeForChanges(EntityType type, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(EntityType type, const OnChangedHandler & handler) const;
template <typename EntityClassT>
ext::scope_guard subscribeForChanges(OnChangedHandler handler) const { return subscribeForChanges(EntityClassT::TYPE, handler); }
scope_guard subscribeForChanges(OnChangedHandler handler) const { return subscribeForChanges(EntityClassT::TYPE, handler); }
/// Subscribes for changes of a specific entry.
/// Can return nullptr if cannot subscribe (identifier not found) or if it doesn't make sense (the storage is read-only).
ext::scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const;
ext::scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(const UUID & id, const OnChangedHandler & handler) const;
scope_guard subscribeForChanges(const std::vector<UUID> & ids, const OnChangedHandler & handler) const;
bool hasSubscription(EntityType type) const;
bool hasSubscription(const UUID & id) const;
@ -161,8 +161,8 @@ protected:
virtual UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) = 0;
virtual void removeImpl(const UUID & id) = 0;
virtual void updateImpl(const UUID & id, const UpdateFunc & update_func) = 0;
virtual ext::scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const = 0;
virtual ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const = 0;
virtual scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const = 0;
virtual scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const = 0;
virtual bool hasSubscriptionImpl(const UUID & id) const = 0;
virtual bool hasSubscriptionImpl(EntityType type) const = 0;
virtual UUID loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const;

View File

@ -7,7 +7,7 @@
#include <Access/LDAPClient.h>
#include <Common/Exception.h>
#include <common/logger_useful.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h>
@ -475,14 +475,14 @@ void LDAPAccessStorage::updateImpl(const UUID & id, const UpdateFunc &)
}
ext::scope_guard LDAPAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
scope_guard LDAPAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
{
std::scoped_lock lock(mutex);
return memory_storage.subscribeForChanges(id, handler);
}
ext::scope_guard LDAPAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
scope_guard LDAPAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
{
std::scoped_lock lock(mutex);
return memory_storage.subscribeForChanges(type, handler);

View File

@ -4,7 +4,7 @@
#include <Access/LDAPClient.h>
#include <Access/Credentials.h>
#include <common/types.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <map>
#include <mutex>
#include <set>
@ -51,8 +51,8 @@ private: // IAccessStorage implementations.
virtual UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override;
virtual void removeImpl(const UUID & id) override;
virtual void updateImpl(const UUID & id, const UpdateFunc & update_func) override;
virtual ext::scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
virtual ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
virtual scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
virtual scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
virtual bool hasSubscriptionImpl(const UUID & id) const override;
virtual bool hasSubscriptionImpl(EntityType type) const override;
virtual UUID loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const override;
@ -80,7 +80,7 @@ private:
mutable std::map<String, std::set<String>> roles_per_users; // user name -> role names (...that should be granted to it; may but don't have to include common roles)
mutable std::map<UUID, String> granted_role_names; // (currently granted) role id -> its name
mutable std::map<String, UUID> granted_role_ids; // (currently granted) role name -> its id
ext::scope_guard role_change_subscription;
scope_guard role_change_subscription;
mutable MemoryAccessStorage memory_storage;
};
}

View File

@ -1,6 +1,6 @@
#include <Access/LDAPClient.h>
#include <Common/Exception.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <common/logger_useful.h>
#include <Poco/Logger.h>

View File

@ -14,6 +14,7 @@
#endif
#include <chrono>
#include <optional>
#include <set>
#include <vector>

View File

@ -1,5 +1,5 @@
#include <Access/MemoryAccessStorage.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <boost/container/flat_set.hpp>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
@ -266,7 +266,7 @@ void MemoryAccessStorage::prepareNotifications(const Entry & entry, bool remove,
}
ext::scope_guard MemoryAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
scope_guard MemoryAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
{
std::lock_guard lock{mutex};
auto & handlers = handlers_by_type[static_cast<size_t>(type)];
@ -282,7 +282,7 @@ ext::scope_guard MemoryAccessStorage::subscribeForChangesImpl(EntityType type, c
}
ext::scope_guard MemoryAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
scope_guard MemoryAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
{
std::lock_guard lock{mutex};
auto it = entries_by_id.find(id);

View File

@ -33,8 +33,8 @@ private:
UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override;
void removeImpl(const UUID & id) override;
void updateImpl(const UUID & id, const UpdateFunc & update_func) override;
ext::scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
bool hasSubscriptionImpl(const UUID & id) const override;
bool hasSubscriptionImpl(EntityType type) const override;

View File

@ -1,7 +1,7 @@
#include <Access/MultipleAccessStorage.h>
#include <Access/Credentials.h>
#include <Common/Exception.h>
#include <ext/range.h>
#include <common/range.h>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/adaptor/reversed.hpp>
#include <boost/range/algorithm/copy.hpp>
@ -265,7 +265,7 @@ void MultipleAccessStorage::updateImpl(const UUID & id, const UpdateFunc & updat
}
ext::scope_guard MultipleAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
scope_guard MultipleAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
{
auto storage = findStorage(id);
if (!storage)
@ -286,7 +286,7 @@ bool MultipleAccessStorage::hasSubscriptionImpl(const UUID & id) const
}
ext::scope_guard MultipleAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
scope_guard MultipleAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
{
std::unique_lock lock{mutex};
auto & handlers = handlers_by_type[static_cast<size_t>(type)];
@ -321,10 +321,10 @@ void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock
{
/// lock is already locked.
std::vector<std::pair<StoragePtr, ext::scope_guard>> added_subscriptions[static_cast<size_t>(EntityType::MAX)];
std::vector<ext::scope_guard> removed_subscriptions;
std::vector<std::pair<StoragePtr, scope_guard>> added_subscriptions[static_cast<size_t>(EntityType::MAX)];
std::vector<scope_guard> removed_subscriptions;
for (auto type : ext::range(EntityType::MAX))
for (auto type : collections::range(EntityType::MAX))
{
auto & handlers = handlers_by_type[static_cast<size_t>(type)];
auto & subscriptions = subscriptions_to_nested_storages[static_cast<size_t>(type)];
@ -364,7 +364,7 @@ void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock
lock.unlock();
removed_subscriptions.clear();
for (auto type : ext::range(EntityType::MAX))
for (auto type : collections::range(EntityType::MAX))
{
if (!added_subscriptions[static_cast<size_t>(type)].empty())
{
@ -384,7 +384,7 @@ void MultipleAccessStorage::updateSubscriptionsToNestedStorages(std::unique_lock
/// Lock the mutex again to store added subscriptions to the nested storages.
lock.lock();
for (auto type : ext::range(EntityType::MAX))
for (auto type : collections::range(EntityType::MAX))
{
if (!added_subscriptions[static_cast<size_t>(type)].empty())
{

View File

@ -44,8 +44,8 @@ protected:
UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override;
void removeImpl(const UUID & id) override;
void updateImpl(const UUID & id, const UpdateFunc & update_func) override;
ext::scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
ext::scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
bool hasSubscriptionImpl(const UUID & id) const override;
bool hasSubscriptionImpl(EntityType type) const override;
UUID loginImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators) const override;
@ -59,7 +59,7 @@ private:
std::shared_ptr<const Storages> nested_storages;
mutable LRUCache<UUID, Storage> ids_cache;
mutable std::list<OnChangedHandler> handlers_by_type[static_cast<size_t>(EntityType::MAX)];
mutable std::unordered_map<StoragePtr, ext::scope_guard> subscriptions_to_nested_storages[static_cast<size_t>(EntityType::MAX)];
mutable std::unordered_map<StoragePtr, scope_guard> subscriptions_to_nested_storages[static_cast<size_t>(EntityType::MAX)];
mutable std::mutex mutex;
};

View File

@ -2,7 +2,7 @@
#include <Access/IAccessEntity.h>
#include <Access/RolesOrUsersSet.h>
#include <ext/range.h>
#include <common/range.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/lexical_cast.hpp>
#include <chrono>
@ -219,7 +219,7 @@ inline const Quota::KeyTypeInfo & Quota::KeyTypeInfo::get(KeyType type)
{
for (const auto & token : tokens)
{
for (auto kt : ext::range(KeyType::MAX))
for (auto kt : collections::range(KeyType::MAX))
{
if (KeyTypeInfo::get(kt).name == token)
{

View File

@ -4,7 +4,7 @@
#include <Access/AccessControlManager.h>
#include <Common/Exception.h>
#include <Common/thread_local_rng.h>
#include <ext/range.h>
#include <common/range.h>
#include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm/lower_bound.hpp>
@ -124,7 +124,7 @@ boost::shared_ptr<const EnabledQuota::Intervals> QuotaCache::QuotaInfo::rebuildI
if (limits.randomize_interval)
end_of_interval += randomDuration(limits.duration);
interval.end_of_interval = end_of_interval.time_since_epoch();
for (auto resource_type : ext::range(MAX_RESOURCE_TYPE))
for (auto resource_type : collections::range(MAX_RESOURCE_TYPE))
{
if (limits.max[resource_type])
interval.max[resource_type] = *limits.max[resource_type];
@ -159,7 +159,7 @@ boost::shared_ptr<const EnabledQuota::Intervals> QuotaCache::QuotaInfo::rebuildI
/// Found an interval with the same duration, we need to copy its usage information to `result`.
const auto & current_interval = *lower_bound;
for (auto resource_type : ext::range(MAX_RESOURCE_TYPE))
for (auto resource_type : collections::range(MAX_RESOURCE_TYPE))
{
new_interval.used[resource_type].store(current_interval.used[resource_type].load());
new_interval.end_of_interval.store(current_interval.end_of_interval.load());

View File

@ -1,7 +1,7 @@
#pragma once
#include <Access/EnabledQuota.h>
#include <ext/scope_guard.h>
#include <common/scope_guard.h>
#include <memory>
#include <mutex>
#include <map>
@ -60,7 +60,7 @@ private:
mutable std::mutex mutex;
std::unordered_map<UUID /* quota id */, QuotaInfo> all_quotas;
bool all_quotas_read = false;
ext::scope_guard subscription;
scope_guard subscription;
std::map<EnabledQuota::Params, std::weak_ptr<EnabledQuota>> enabled_quotas;
};
}

View File

@ -66,7 +66,7 @@ std::shared_ptr<const EnabledRoles>
RoleCache::getEnabledRoles(const std::vector<UUID> & roles, const std::vector<UUID> & roles_with_admin_option)
{
/// Declared before `lock` to send notifications after the mutex will be unlocked.
ext::scope_guard notifications;
scope_guard notifications;
std::lock_guard lock{mutex};
EnabledRoles::Params params;
@ -88,7 +88,7 @@ RoleCache::getEnabledRoles(const std::vector<UUID> & roles, const std::vector<UU
}
void RoleCache::collectEnabledRoles(ext::scope_guard & notifications)
void RoleCache::collectEnabledRoles(scope_guard & notifications)
{
/// `mutex` is already locked.
@ -106,7 +106,7 @@ void RoleCache::collectEnabledRoles(ext::scope_guard & notifications)
}
void RoleCache::collectEnabledRoles(EnabledRoles & enabled, ext::scope_guard & notifications)
void RoleCache::collectEnabledRoles(EnabledRoles & enabled, scope_guard & notifications)
{
/// `mutex` is already locked.
@ -146,8 +146,8 @@ RolePtr RoleCache::getRole(const UUID & role_id)
auto role = manager.tryRead<Role>(role_id);
if (role)
{
auto cache_value = Poco::SharedPtr<std::pair<RolePtr, ext::scope_guard>>(
new std::pair<RolePtr, ext::scope_guard>{role, std::move(subscription)});
auto cache_value = Poco::SharedPtr<std::pair<RolePtr, scope_guard>>(
new std::pair<RolePtr, scope_guard>{role, std::move(subscription)});
cache.add(role_id, cache_value);
return role;
}
@ -159,7 +159,7 @@ RolePtr RoleCache::getRole(const UUID & role_id)
void RoleCache::roleChanged(const UUID & role_id, const RolePtr & changed_role)
{
/// Declared before `lock` to send notifications after the mutex will be unlocked.
ext::scope_guard notifications;
scope_guard notifications;
std::lock_guard lock{mutex};
auto role_from_cache = cache.get(role_id);
@ -174,7 +174,7 @@ void RoleCache::roleChanged(const UUID & role_id, const RolePtr & changed_role)
void RoleCache::roleRemoved(const UUID & role_id)
{
/// Declared before `lock` to send notifications after the mutex will be unlocked.
ext::scope_guard notifications;
scope_guard notifications;
std::lock_guard lock{mutex};
cache.remove(role_id);

View File

@ -24,14 +24,14 @@ public:
const std::vector<UUID> & current_roles_with_admin_option);
private:
void collectEnabledRoles(ext::scope_guard & notifications);
void collectEnabledRoles(EnabledRoles & enabled, ext::scope_guard & notifications);
void collectEnabledRoles(scope_guard & notifications);
void collectEnabledRoles(EnabledRoles & enabled, scope_guard & notifications);
RolePtr getRole(const UUID & role_id);
void roleChanged(const UUID & role_id, const RolePtr & changed_role);
void roleRemoved(const UUID & role_id);
const AccessControlManager & manager;
Poco::ExpireCache<UUID, std::pair<RolePtr, ext::scope_guard>> cache;
Poco::ExpireCache<UUID, std::pair<RolePtr, scope_guard>> cache;
std::map<EnabledRoles::Params, std::weak_ptr<EnabledRoles>> enabled_roles;
mutable std::mutex mutex;
};

Some files were not shown because too many files have changed in this diff Show More