Merge branch 'master' into func_zookeeper_session_uptime

This commit is contained in:
Alexander Tokmakov 2021-09-15 21:10:06 +03:00
commit 5c605189ac
1344 changed files with 6176 additions and 3168 deletions

3
.gitmodules vendored
View File

@ -246,3 +246,6 @@
[submodule "contrib/bzip2"]
path = contrib/bzip2
url = https://github.com/ClickHouse-Extras/bzip2.git
[submodule "contrib/magic_enum"]
path = contrib/magic_enum
url = https://github.com/Neargye/magic_enum

View File

@ -84,8 +84,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
set (RLIMIT_DATA 5000000000)
# set VIRT (RLIMIT_AS) to 10G (DATA*10)
set (RLIMIT_AS 10000000000)
# set CPU time limit to 800 seconds
set (RLIMIT_CPU 800)
# set CPU time limit to 1000 seconds
set (RLIMIT_CPU 1000)
# gcc10/gcc10/clang -fsanitize=memory is too heavy
if (SANITIZE STREQUAL "memory" OR COMPILER_GCC)
@ -192,7 +192,7 @@ endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-13" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (NOT OBJCOPY_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
@ -379,7 +379,7 @@ if (COMPILER_CLANG)
endif ()
# Always prefer llvm tools when using clang. For instance, we cannot use GNU ar when llvm LTO is enabled
find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-13" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8")
if (LLVM_AR_PATH)
message(STATUS "Using llvm-ar: ${LLVM_AR_PATH}.")
@ -388,7 +388,7 @@ if (COMPILER_CLANG)
message(WARNING "Cannot find llvm-ar. System ar will be used instead. It does not work with ThinLTO.")
endif ()
find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8")
find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-13" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9" "llvm-ranlib-8")
if (LLVM_RANLIB_PATH)
message(STATUS "Using llvm-ranlib: ${LLVM_RANLIB_PATH}.")

View File

@ -85,6 +85,7 @@ target_link_libraries (common
replxx
cctz
fmt
magic_enum
)
if (ENABLE_TESTS)

157
base/common/Decimal.h Normal file
View File

@ -0,0 +1,157 @@
#pragma once
#include "common/extended_types.h"
#if !defined(NO_SANITIZE_UNDEFINED)
#if defined(__clang__)
#define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
#else
#define NO_SANITIZE_UNDEFINED
#endif
#endif
namespace DB
{
template <class> struct Decimal;
class DateTime64;
using Decimal32 = Decimal<Int32>;
using Decimal64 = Decimal<Int64>;
using Decimal128 = Decimal<Int128>;
using Decimal256 = Decimal<Int256>;
template <class T>
concept is_decimal =
std::is_same_v<T, Decimal32>
|| std::is_same_v<T, Decimal64>
|| std::is_same_v<T, Decimal128>
|| std::is_same_v<T, Decimal256>
|| std::is_same_v<T, DateTime64>;
template <class T>
concept is_over_big_int =
std::is_same_v<T, Int128>
|| std::is_same_v<T, UInt128>
|| std::is_same_v<T, Int256>
|| std::is_same_v<T, UInt256>
|| std::is_same_v<T, Decimal128>
|| std::is_same_v<T, Decimal256>;
template <class T> struct NativeTypeT { using Type = T; };
template <is_decimal T> struct NativeTypeT<T> { using Type = typename T::NativeType; };
template <class T> using NativeType = typename NativeTypeT<T>::Type;
/// Own FieldType for Decimal.
/// It is only a "storage" for decimal.
/// To perform operations, you also have to provide a scale (number of digits after point).
template <typename T>
struct Decimal
{
using NativeType = T;
constexpr Decimal() = default;
constexpr Decimal(Decimal<T> &&) = default;
constexpr Decimal(const Decimal<T> &) = default;
constexpr Decimal(const T & value_): value(value_) {}
template <typename U>
constexpr Decimal(const Decimal<U> & x): value(x.value) {}
constexpr Decimal<T> & operator = (Decimal<T> &&) = default;
constexpr Decimal<T> & operator = (const Decimal<T> &) = default;
constexpr operator T () const { return value; }
template <typename U>
constexpr U convertTo() const
{
if constexpr (is_decimal<U>)
return convertTo<typename U::NativeType>();
else
return static_cast<U>(value);
}
const Decimal<T> & operator += (const T & x) { value += x; return *this; }
const Decimal<T> & operator -= (const T & x) { value -= x; return *this; }
const Decimal<T> & operator *= (const T & x) { value *= x; return *this; }
const Decimal<T> & operator /= (const T & x) { value /= x; return *this; }
const Decimal<T> & operator %= (const T & x) { value %= x; return *this; }
template <typename U> const Decimal<T> & operator += (const Decimal<U> & x) { value += x.value; return *this; }
template <typename U> const Decimal<T> & operator -= (const Decimal<U> & x) { value -= x.value; return *this; }
template <typename U> const Decimal<T> & operator *= (const Decimal<U> & x) { value *= x.value; return *this; }
template <typename U> const Decimal<T> & operator /= (const Decimal<U> & x) { value /= x.value; return *this; }
template <typename U> const Decimal<T> & operator %= (const Decimal<U> & x) { value %= x.value; return *this; }
/// This is to avoid UB for sumWithOverflow()
void NO_SANITIZE_UNDEFINED addOverflow(const T & x) { value += x; }
T value;
};
template <typename T> inline bool operator< (const Decimal<T> & x, const Decimal<T> & y) { return x.value < y.value; }
template <typename T> inline bool operator> (const Decimal<T> & x, const Decimal<T> & y) { return x.value > y.value; }
template <typename T> inline bool operator<= (const Decimal<T> & x, const Decimal<T> & y) { return x.value <= y.value; }
template <typename T> inline bool operator>= (const Decimal<T> & x, const Decimal<T> & y) { return x.value >= y.value; }
template <typename T> inline bool operator== (const Decimal<T> & x, const Decimal<T> & y) { return x.value == y.value; }
template <typename T> inline bool operator!= (const Decimal<T> & x, const Decimal<T> & y) { return x.value != y.value; }
template <typename T> inline Decimal<T> operator+ (const Decimal<T> & x, const Decimal<T> & y) { return x.value + y.value; }
template <typename T> inline Decimal<T> operator- (const Decimal<T> & x, const Decimal<T> & y) { return x.value - y.value; }
template <typename T> inline Decimal<T> operator* (const Decimal<T> & x, const Decimal<T> & y) { return x.value * y.value; }
template <typename T> inline Decimal<T> operator/ (const Decimal<T> & x, const Decimal<T> & y) { return x.value / y.value; }
template <typename T> inline Decimal<T> operator- (const Decimal<T> & x) { return -x.value; }
/// Distinguishable type to allow function resolution/deduction based on value type,
/// but also relatively easy to convert to/from Decimal64.
class DateTime64 : public Decimal64
{
public:
using Base = Decimal64;
using Base::Base;
using NativeType = Base::NativeType;
constexpr DateTime64(const Base & v): Base(v) {}
};
}
constexpr DB::UInt64 max_uint_mask = std::numeric_limits<DB::UInt64>::max();
namespace std
{
template <typename T>
struct hash<DB::Decimal<T>>
{
size_t operator()(const DB::Decimal<T> & x) const { return hash<T>()(x.value); }
};
template <>
struct hash<DB::Decimal128>
{
size_t operator()(const DB::Decimal128 & x) const
{
return std::hash<DB::Int64>()(x.value >> 64)
^ std::hash<DB::Int64>()(x.value & max_uint_mask);
}
};
template <>
struct hash<DB::DateTime64>
{
size_t operator()(const DB::DateTime64 & x) const
{
return std::hash<DB::DateTime64::NativeType>()(x);
}
};
template <>
struct hash<DB::Decimal256>
{
size_t operator()(const DB::Decimal256 & x) const
{
// FIXME temp solution
return std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value >> 64 & max_uint_mask))
^ std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value & max_uint_mask));
}
};
}

View File

@ -0,0 +1,38 @@
#pragma once
#include <magic_enum.hpp>
#include <fmt/format.h>
template <class T> concept is_enum = std::is_enum_v<T>;
namespace detail
{
template <is_enum E, class F, size_t ...I>
constexpr void static_for(F && f, std::index_sequence<I...>)
{
(std::forward<F>(f)(std::integral_constant<E, magic_enum::enum_value<E>(I)>()) , ...);
}
}
/**
* Iterate over enum values in compile-time (compile-time switch/case, loop unrolling).
*
* @example static_for<E>([](auto enum_value) { return template_func<enum_value>(); }
* ^ enum_value can be used as a template parameter
*/
template <is_enum E, class F>
constexpr void static_for(F && f)
{
constexpr size_t count = magic_enum::enum_count<E>();
detail::static_for<E>(std::forward<F>(f), std::make_index_sequence<count>());
}
/// Enable printing enum values as strings via fmt + magic_enum
template <is_enum T>
struct fmt::formatter<T> : fmt::formatter<std::string_view>
{
constexpr auto format(T value, auto& format_context)
{
return formatter<string_view>::format(magic_enum::enum_name(value), format_context);
}
};

View File

@ -16,6 +16,10 @@ extern "C"
}
#endif
#if defined(__clang__) && __clang_major__ >= 13
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
namespace
{

View File

@ -41,22 +41,14 @@ template <> struct is_unsigned<UInt256> { static constexpr bool value = true; };
template <typename T>
inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
template <class T> concept is_integer =
std::is_integral_v<T>
|| std::is_same_v<T, Int128>
|| std::is_same_v<T, UInt128>
|| std::is_same_v<T, Int256>
|| std::is_same_v<T, UInt256>;
/// TODO: is_integral includes char, char8_t and wchar_t.
template <typename T>
struct is_integer
{
static constexpr bool value = std::is_integral_v<T>;
};
template <> struct is_integer<Int128> { static constexpr bool value = true; };
template <> struct is_integer<UInt128> { static constexpr bool value = true; };
template <> struct is_integer<Int256> { static constexpr bool value = true; };
template <> struct is_integer<UInt256> { static constexpr bool value = true; };
template <typename T>
inline constexpr bool is_integer_v = is_integer<T>::value;
template <class T> concept is_floating_point = std::is_floating_point_v<T>;
template <typename T>
struct is_arithmetic

View File

@ -36,18 +36,7 @@
namespace detail
{
template <char s0>
inline bool is_in(char x)
{
return x == s0;
}
template <char s0, char s1, char... tail>
inline bool is_in(char x)
{
return x == s0 || is_in<s1, tail...>(x);
}
template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); }
#if defined(__SSE2__)
template <char s0>
@ -67,16 +56,10 @@ inline __m128i mm_is_in(__m128i bytes)
#endif
template <bool positive>
bool maybe_negate(bool x)
{
if constexpr (positive)
return x;
else
return !x;
}
constexpr bool maybe_negate(bool x) { return x == positive; }
template <bool positive>
uint16_t maybe_negate(uint16_t x)
constexpr uint16_t maybe_negate(uint16_t x)
{
if constexpr (positive)
return x;
@ -149,12 +132,13 @@ template <bool positive, ReturnMode return_mode, size_t num_chars,
char c05 = 0, char c06 = 0, char c07 = 0, char c08 = 0,
char c09 = 0, char c10 = 0, char c11 = 0, char c12 = 0,
char c13 = 0, char c14 = 0, char c15 = 0, char c16 = 0>
inline const char * find_first_symbols_sse42_impl(const char * const begin, const char * const end)
inline const char * find_first_symbols_sse42(const char * const begin, const char * const end)
{
const char * pos = begin;
#if defined(__SSE4_2__)
#define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT)
constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT;
__m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16);
for (; pos + 15 < end; pos += 16)
@ -163,16 +147,15 @@ inline const char * find_first_symbols_sse42_impl(const char * const begin, cons
if constexpr (positive)
{
if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE))
return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE);
if (_mm_cmpestrc(set, num_chars, bytes, 16, mode))
return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode);
}
else
{
if (_mm_cmpestrc(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY))
return pos + _mm_cmpestri(set, num_chars, bytes, 16, MODE | _SIDD_NEGATIVE_POLARITY);
if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY))
return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY);
}
}
#undef MODE
#endif
for (; pos < end; ++pos)
@ -197,20 +180,15 @@ inline const char * find_first_symbols_sse42_impl(const char * const begin, cons
}
template <bool positive, ReturnMode return_mode, char... symbols>
inline const char * find_first_symbols_sse42(const char * begin, const char * end)
{
return find_first_symbols_sse42_impl<positive, return_mode, sizeof...(symbols), symbols...>(begin, end);
}
/// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do.
template <bool positive, ReturnMode return_mode, char... symbols>
inline const char * find_first_symbols_dispatch(const char * begin, const char * end)
requires(0 <= sizeof...(symbols) && sizeof...(symbols) <= 16)
{
#if defined(__SSE4_2__)
if (sizeof...(symbols) >= 5)
return find_first_symbols_sse42<positive, return_mode, symbols...>(begin, end);
return find_first_symbols_sse42<positive, return_mode, sizeof...(symbols), symbols...>(begin, end);
else
#endif
return find_first_symbols_sse2<positive, return_mode, symbols...>(begin, end);

View File

@ -1,3 +1,7 @@
#if defined(__clang__) && __clang_major__ >= 13
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
/// This code was based on the code by Fedor Korotkiy (prime@yandex-team.ru) for YT product in Yandex.
#include <common/defines.h>

View File

@ -15,15 +15,15 @@ private:
public:
using UnderlyingType = T;
template <class Enable = typename std::is_copy_constructible<T>::type>
explicit StrongTypedef(const T & t_) : t(t_) {}
constexpr explicit StrongTypedef(const T & t_) : t(t_) {}
template <class Enable = typename std::is_move_constructible<T>::type>
explicit StrongTypedef(T && t_) : t(std::move(t_)) {}
constexpr explicit StrongTypedef(T && t_) : t(std::move(t_)) {}
template <class Enable = typename std::is_default_constructible<T>::type>
StrongTypedef(): t() {}
constexpr StrongTypedef(): t() {}
StrongTypedef(const Self &) = default;
StrongTypedef(Self &&) = default;
constexpr StrongTypedef(const Self &) = default;
constexpr StrongTypedef(Self &&) = default;
Self & operator=(const Self &) = default;
Self & operator=(Self &&) = default;

View File

@ -1,6 +1,10 @@
#pragma once
#include <cstddef>
#if defined(__clang__) && __clang_major__ >= 13
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
constexpr size_t KiB = 1024;
constexpr size_t MiB = 1024 * KiB;
constexpr size_t GiB = 1024 * MiB;

View File

@ -1,3 +1,7 @@
#if defined(__clang__) && __clang_major__ >= 13
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
#include <daemon/BaseDaemon.h>
#include <daemon/SentryWriter.h>

View File

@ -49,6 +49,8 @@ if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR)
target_include_directories (mysqlxx SYSTEM PRIVATE ${OPENSSL_INCLUDE_DIR})
endif ()
target_no_warning(mysqlxx reserved-macro-identifier)
if (NOT USE_INTERNAL_MYSQL_LIBRARY AND USE_STATIC_LIBRARIES)
message(WARNING "Statically linking with system mysql/mariadb only works "
"if mysql client libraries are built with same openssl version as "

View File

@ -79,7 +79,7 @@ PoolWithFailover PoolFactory::get(const Poco::Util::AbstractConfiguration & conf
std::lock_guard<std::mutex> lock(impl->mutex);
if (auto entry = impl->pools.find(config_name); entry != impl->pools.end())
{
return *(entry->second.get());
return *(entry->second);
}
else
{
@ -100,7 +100,7 @@ PoolWithFailover PoolFactory::get(const Poco::Util::AbstractConfiguration & conf
impl->pools.insert_or_assign(config_name, pool);
impl->pools_by_ids.insert_or_assign(entry_name, config_name);
}
return *(pool.get());
return *pool;
}
}

View File

@ -27,6 +27,10 @@
#define _PATH_TTY "/dev/tty"
#endif
#if defined(__clang__) && __clang_major__ >= 13
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
#include <termios.h>
#include <signal.h>
#include <ctype.h>

View File

@ -6,7 +6,7 @@ if (ENABLE_CLANG_TIDY)
message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.")
endif()
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-12" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-13" "clang-tidy-12" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
if (CLANG_TIDY_PATH)
message(STATUS

View File

@ -192,4 +192,29 @@ elseif (COMPILER_GCC)
# For some reason (bug in gcc?) macro 'GCC diagnostic ignored "-Wstringop-overflow"' doesn't help.
add_cxx_compile_options(-Wno-stringop-overflow)
endif()
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
# reinterpretAs.cpp:182:31: error: void* memcpy(void*, const void*, size_t) copying an object of non-trivial type
# using ToFieldType = using FieldType = using UUID = struct StrongTypedef<wide::integer<128, unsigned int>, DB::UUIDTag>
# {aka struct StrongTypedef<wide::integer<128, unsigned int>, DB::UUIDTag>} from an array of const char8_t
add_cxx_compile_options(-Wno-error=class-memaccess)
# Maybe false positive...
# In file included from /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:673,
# In function void std::__1::__libcpp_operator_delete(_Args ...) [with _Args = {void*, long unsigned int}],
# inlined from void std::__1::__do_deallocate_handle_size(void*, size_t, _Args ...) [with _Args = {}] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:271:34,
# inlined from void std::__1::__libcpp_deallocate(void*, size_t, size_t) at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:285:41,
# inlined from constexpr void std::__1::allocator<_Tp>::deallocate(_Tp*, size_t) [with _Tp = char] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:849:39,
# inlined from static constexpr void std::__1::allocator_traits<_Alloc>::deallocate(std::__1::allocator_traits<_Alloc>::allocator_type&, std::__1::allocator_traits<_Alloc>::pointer, std::__1::allocator_traits<_Alloc>::size_type) [with _Alloc = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/__memory/allocator_traits.h:476:24,
# inlined from std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits<char>; _Allocator = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2219:35,
# inlined from std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits<char>; _Allocator = std::__1::allocator<char>] at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2213:1,
# inlined from DB::JSONBuilder::JSONMap::Pair::~Pair() at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:90:12,
# inlined from void DB::JSONBuilder::JSONMap::add(std::__1::string, DB::JSONBuilder::ItemPtr) at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:97:68,
# inlined from virtual void DB::ExpressionStep::describeActions(DB::JSONBuilder::JSONMap&) const at /home/jakalletti/ClickHouse/ClickHouse/src/Processors/QueryPlan/ExpressionStep.cpp:102:12:
# /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:247:20: error: void operator delete(void*, size_t) called on a pointer to an unallocated object 7598543875853023301 [-Werror=free-nonheap-object]
add_cxx_compile_options(-Wno-error=free-nonheap-object)
# AggregateFunctionAvg.h:203:100: error: this pointer is null [-Werror=nonnull]
add_cxx_compile_options(-Wno-error=nonnull)
endif()
endif ()

View File

@ -33,6 +33,7 @@ endif()
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (abseil-cpp-cmake)
add_subdirectory (magic-enum-cmake)
add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing)

2
contrib/abseil-cpp vendored

@ -1 +1 @@
Subproject commit 4f3b686f86c3ebaba7e4e926e62a79cb1c659a54
Subproject commit b004a8a02418b83de8b686caa0b0f6e39ac2191f

2
contrib/fastops vendored

@ -1 +1 @@
Subproject commit 88752a5e03cf34639a4a37a4b41d8b463fffd2b5
Subproject commit 012b777df9e2d145a24800a6c8c3d4a0249bb09e

2
contrib/llvm vendored

@ -1 +1 @@
Subproject commit e5751459412bce1391fb7a2e9bbc01e131bf72f1
Subproject commit f30bbecef78b75b527e257c1304d0be2f2f95975

View File

@ -0,0 +1,3 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/magic_enum")
add_library (magic_enum INTERFACE)
target_include_directories(magic_enum INTERFACE ${LIBRARY_DIR}/include)

1
contrib/magic_enum vendored Submodule

@ -0,0 +1 @@
Subproject commit 38f86e4d093cfc9034a140d37de2168e3951bef3

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit b6480c69bf3ab6e298e0d019a07fd4f69029b26a
Subproject commit 5ea892c8673e6c5a052887653673b967d44cc59b

View File

@ -3,10 +3,17 @@
# Provides: clickhouse-server
# Default-Start: 2 3 4 5
# Default-Stop: 0 1 6
# Required-Start: $network
# Required-Stop: $network
# Should-Start: $time $network
# Should-Stop: $network
# Short-Description: Yandex clickhouse-server daemon
### END INIT INFO
#
# NOTES:
# - Should-* -- script can start if the listed facilities are missing, unlike Required-*
#
# For the documentation [1]:
#
# [1]: https://wiki.debian.org/LSBInitScripts
CLICKHOUSE_USER=clickhouse
CLICKHOUSE_GROUP=${CLICKHOUSE_USER}

View File

@ -1,7 +1,12 @@
[Unit]
Description=ClickHouse Server (analytic DBMS for big data)
Requires=network-online.target
After=network-online.target
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
# that the time was adjusted already, if you use systemd-timesyncd you are
# safe, but if you use ntp or some other daemon, you should configure it
# additionaly.
After=time-sync.target network-online.target
Wants=time-sync.target
[Service]
Type=simple
@ -16,4 +21,5 @@ LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).
WantedBy=multi-user.target

4
debian/rules vendored
View File

@ -36,8 +36,8 @@ endif
CMAKE_FLAGS += -DENABLE_UTILS=0
DEB_CC ?= $(shell which gcc-10 gcc-9 gcc | head -n1)
DEB_CXX ?= $(shell which g++-10 g++-9 g++ | head -n1)
DEB_CC ?= $(shell which gcc-11 gcc-10 gcc-9 gcc | head -n1)
DEB_CXX ?= $(shell which g++-11 g++-10 g++-9 g++ | head -n1)
ifdef DEB_CXX
DEB_BUILD_GNU_TYPE := $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE)

View File

@ -1,6 +1,6 @@
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

View File

@ -4,7 +4,7 @@ set -e
#ccache -s # uncomment to display CCache statistics
mkdir -p /server/build_docker
cd /server/build_docker
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-12)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-12)"
cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-13)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-13)"
# Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
# By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.

View File

@ -1,7 +1,7 @@
# docker build -t clickhouse/binary-builder .
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
@ -41,18 +41,14 @@ RUN apt-get update \
ccache \
cmake \
curl \
g++-10 \
gcc-10 \
gdb \
git \
gperf \
libicu-dev \
libreadline-dev \
clang-12 \
clang-tidy-12 \
lld-12 \
llvm-12 \
llvm-12-dev \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
libicu-dev \
libreadline-dev \
moreutils \
@ -104,15 +100,10 @@ RUN wget -nv "https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.0
# Download toolchain for FreeBSD 11.3
RUN wget -nv https://clickhouse-datasets.s3.yandex.net/toolchains/toolchains/freebsd-11.3-toolchain.tar.xz
# NOTE: For some reason we have outdated version of gcc-10 in ubuntu 20.04 stable.
# Current workaround is to use latest version proposed repo. Remove as soon as
# gcc-10.2 appear in stable repo.
RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main multiverse universe' > /etc/apt/sources.list.d/proposed-repositories.list
RUN apt-get update \
&& apt-get install gcc-10 g++-10 --yes
RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update
# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
&& apt-get update \
&& apt-get install gcc-11 g++-11 --yes
COPY build.sh /

View File

@ -1,7 +1,7 @@
# docker build -t clickhouse/deb-builder .
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
@ -37,17 +37,17 @@ RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
RUN apt-get update \
&& apt-get install \
alien \
clang-12 \
clang-tidy-12 \
clang-${LLVM_VERSION} \
clang-tidy-${LLVM_VERSION} \
cmake \
debhelper \
devscripts \
gdb \
git \
gperf \
lld-12 \
llvm-12 \
llvm-12-dev \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
moreutils \
ninja-build \
perl \
@ -57,15 +57,11 @@ RUN apt-get update \
tzdata \
--yes --no-install-recommends
# NOTE: For some reason we have outdated version of gcc-10 in ubuntu 20.04 stable.
# Current workaround is to use latest version proposed repo. Remove as soon as
# gcc-10.2 appear in stable repo.
RUN echo 'deb http://archive.ubuntu.com/ubuntu/ focal-proposed restricted main multiverse universe' > /etc/apt/sources.list.d/proposed-repositories.list
# NOTE: Seems like gcc-11 is too new for ubuntu20 repository
RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
&& apt-get update \
&& apt-get install gcc-11 g++-11 --yes
RUN apt-get update \
&& apt-get install gcc-10 g++-10 --yes --no-install-recommends
RUN rm /etc/apt/sources.list.d/proposed-repositories.list && apt-get update
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld

View File

@ -205,7 +205,8 @@ if __name__ == "__main__":
parser.add_argument("--build-type", choices=("debug", ""), default="")
parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
"clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64",
"clang-11-freebsd", "clang-12-freebsd", "gcc-10"), default="clang-12")
"clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64",
"clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13")
parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
parser.add_argument("--unbundled", action="store_true")
parser.add_argument("--split-binary", action="store_true")

View File

@ -1,7 +1,7 @@
# docker build -t clickhouse/test-base .
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

View File

@ -11,7 +11,7 @@ RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libl
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser
RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-12 -DCMAKE_C_COMPILER=clang-12 && make -j
RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-13 -DCMAKE_C_COMPILER=clang-13 && make -j
ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator
ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator
@ -24,7 +24,7 @@ ENV SHA=nosha
ENV DATA="data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-12 -DCMAKE_C_COMPILER=/usr/bin/clang-12 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-13 -DCMAKE_C_COMPILER=/usr/bin/clang-13 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\

View File

@ -80,7 +80,7 @@ LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAM
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-client --query "SHOW TABLES FROM test"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test -j 8 --testname --shard --zookeeper --print-time --use-skip-list 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_result.txt
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test -j 8 --testname --shard --zookeeper --print-time 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_result.txt
readarray -t FAILED_TESTS < <(awk '/FAIL|TIMEOUT|ERROR/ { print substr($3, 1, length($3)-1) }' "/test_result.txt")
@ -97,7 +97,7 @@ then
echo "Going to run again: ${FAILED_TESTS[*]}"
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test --order=random --testname --shard --zookeeper --use-skip-list "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_result.txt
LLVM_PROFILE_FILE='client_coverage_%5m.profraw' clickhouse-test --order=random --testname --shard --zookeeper "${FAILED_TESTS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a /test_result.txt
else
echo "No failed tests"
fi

View File

@ -1,7 +1,7 @@
# docker build -t clickhouse/fasttest .
FROM ubuntu:20.04
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

View File

@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
stage=${stage:-}
# Compiler version, normally set by Dockerfile
export LLVM_VERSION=${LLVM_VERSION:-12}
export LLVM_VERSION=${LLVM_VERSION:-13}
# A variable to pass additional flags to CMake.
# Here we explicitly default it to nothing so that bash doesn't complain about
@ -159,6 +159,7 @@ function clone_submodules
cd "$FASTTEST_SOURCE"
SUBMODULES_TO_UPDATE=(
contrib/magic_enum
contrib/abseil-cpp
contrib/boost
contrib/zlib-ng
@ -261,153 +262,8 @@ function run_tests
start_server
TESTS_TO_SKIP=(
00105_shard_collations
00109_shard_totals_after_having
00110_external_sort
00302_http_compression
00417_kill_query
00436_convert_charset
00490_special_line_separators_and_characters_outside_of_bmp
00652_replicated_mutations_zookeeper
00682_empty_parts_merge
00701_rollup
00834_cancel_http_readonly_queries_on_client_close
00911_tautological_compare
# Hyperscan
00926_multimatch
00929_multi_match_edit_distance
01681_hyperscan_debug_assertion
02004_max_hyperscan_regex_length
01176_mysql_client_interactive # requires mysql client
01031_mutations_interpreter_and_context
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
01083_expressions_in_engine_arguments
01092_memory_profiler
01098_msgpack_format
01098_temporary_and_external_tables
01103_check_cpu_instructions_at_startup # avoid dependency on qemu -- invonvenient when running locally
01193_metadata_loading
01238_http_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01251_dict_is_in_infinite_loop
01259_dictionary_custom_settings_ddl
01268_dictionary_direct_layout
01280_ssd_complex_key_dictionary
01281_group_by_limit_memory_tracking # max_memory_usage_for_user can interfere another queries running concurrently
01318_encrypt # Depends on OpenSSL
01318_decrypt # Depends on OpenSSL
01663_aes_msan # Depends on OpenSSL
01667_aes_args_check # Depends on OpenSSL
01683_codec_encrypted # Depends on OpenSSL
01776_decrypt_aead_size_check # Depends on OpenSSL
01811_filter_by_null # Depends on OpenSSL
02012_sha512_fixedstring # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
01305_replica_create_drop_zookeeper
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01798_uniq_theta_sketch
01799_long_uniq_theta_sketch
01890_stem # depends on libstemmer_c
02003_compress_bz2 # depends on bzip2
01059_storage_file_compression # depends on brotli and bzip2
collate
collation
_orc_
arrow
avro
base64
brotli
capnproto
client
ddl_dictionaries
h3
hashing
hdfs
java_hash
json
limit_memory
live_view
memory_leak
memory_limit
mysql
odbc
parallel_alter
parquet
protobuf
secure
sha256
xz
# Not sure why these two fail even in sequential mode. Disabled for now
# to make some progress.
00646_url_engine
00974_query_profiler
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
01504_rocksdb
01686_rocksdb
# Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert
01541_max_memory_usage_for_user_long
# Require python libraries like scipy, pandas and numpy
01322_ttest_scipy
01561_mann_whitney_scipy
01545_system_errors
# Checks system.errors
01563_distributed_query_finish
# nc - command not found
01601_proxy_protocol
01622_defaults_for_url_engine
# JSON functions
01666_blns
# Requires postgresql-client
01802_test_postgresql_protocol_with_row_policy
# Depends on AWS
01801_s3_cluster
02012_settings_clause_for_s3
# needs psql
01889_postgresql_protocol_null_fields
# needs pv
01923_network_receive_time_metric_insert
01889_sqlite_read_write
# needs s2
01849_geoToS2
01851_s2_to_geo
01852_s2_get_neighbours
01853_s2_cells_intersect
01854_s2_cap_contains
01854_s2_cap_union
# needs s3
01944_insert_partition_by
# depends on Go
02013_zlib_read_after_eof
# Accesses CH via mysql table function (which is unavailable)
01747_system_session_log_long
)
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \
--no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" \
time clickhouse-test --hung-check -j 8 --order=random \
--fast-tests-only --no-long --testname --shard --zookeeper \
-- "$FASTTEST_FOCUS" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee "$FASTTEST_OUTPUT/test_log.txt"

View File

@ -12,7 +12,7 @@ stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir"
repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-12_debug_none_bundled_unsplitted_disable_False_binary"}
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"}
function clone
{

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-12_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -38,7 +38,7 @@ RUN set -x \
&& dpkg -i "${PKG_VERSION}.deb"
CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-12 -DCMAKE_CXX_COMPILER=clang\+\+-12 \
&& cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \
&& ninja re2_st clickhouse_grpc_protos \
&& pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
cp /repo_folder/pvs-studio.log /test_output; \

View File

@ -108,7 +108,7 @@ function run_tests()
ADDITIONAL_OPTIONS+=('--replicated-database')
fi
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --use-skip-list --print-time "${ADDITIONAL_OPTIONS[@]}" \
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
}

View File

@ -97,7 +97,7 @@ function run_tests()
fi
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
--use-skip-list --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
}

View File

@ -13,8 +13,4 @@ dpkg -i package_folder/clickhouse-test_*.deb
service clickhouse-server start && sleep 5
if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
SKIP_LIST_OPT="--use-skip-list"
fi
clickhouse-test --testname --shard --zookeeper "$SKIP_LIST_OPT" "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
clickhouse-test --testname --shard --zookeeper "$ADDITIONAL_OPTIONS" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -10,14 +10,6 @@ import logging
import time
def get_skip_list_cmd(path):
with open(path, 'r') as f:
for line in f:
if '--use-skip-list' in line:
return '--use-skip-list'
return ''
def get_options(i):
options = []
client_options = []
@ -56,8 +48,6 @@ def get_options(i):
def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_time_limit):
skip_list_opt = get_skip_list_cmd(cmd)
global_time_limit_option = ''
if global_time_limit:
global_time_limit_option = "--global_time_limit={}".format(global_time_limit)
@ -66,7 +56,7 @@ def run_func_test(cmd, output_prefix, num_processes, skip_tests_option, global_t
pipes = []
for i in range(0, len(output_paths)):
f = open(output_paths[i], 'w')
full_command = "{} {} {} {} {}".format(cmd, skip_list_opt, get_options(i), global_time_limit_option, skip_tests_option)
full_command = "{} {} {} {}".format(cmd, get_options(i), global_time_limit_option, skip_tests_option)
logging.info("Run func tests '%s'", full_command)
p = Popen(full_command, shell=True, stdout=f, stderr=f)
pipes.append(p)

View File

@ -76,7 +76,7 @@ cd ClickHouse
rm -rf build
mkdir build
cd build
cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-11 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-11 -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
cmake --build . --config RelWithDebInfo
cd ..
```

View File

@ -23,7 +23,7 @@ $ sudo apt-get install git cmake python ninja-build
Or cmake3 instead of cmake on older systems.
### Install clang-12 (recommended) {#install-clang-12}
### Install clang-13 (recommended) {#install-clang-13}
On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
@ -33,11 +33,11 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html).
#### Use clang-12 for Builds
#### Use clang-13 for Builds
``` bash
$ export CC=clang-12
$ export CXX=clang++-12
$ export CC=clang-13
$ export CXX=clang++-13
```
Gcc can also be used though it is discouraged.

View File

@ -210,4 +210,4 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-
## See also
- [S3 table function](../../../sql-reference/table-functions/s3.md)
- [s3 table function](../../../sql-reference/table-functions/s3.md)

View File

@ -288,5 +288,7 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_fetches_pool_size](../../../operations/settings/settings.md#background_fetches_pool_size)
- [execute_merges_on_single_replica_time_threshold](../../../operations/settings/settings.md#execute-merges-on-single-replica-time-threshold)
- [max_replicated_fetches_network_bandwidth](../../../operations/settings/merge-tree-settings.md#max_replicated_fetches_network_bandwidth)
- [max_replicated_sends_network_bandwidth](../../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -60,6 +60,7 @@ The supported formats are:
| [LineAsString](#lineasstring) | ✔ | ✗ |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](../operations/settings/settings.md) section.
@ -1551,4 +1552,31 @@ Result:
f9725a22f9191e064120d718e26862a9 -
```
[Original article](https://clickhouse.tech/docs/en/interfaces/formats/) <!--hide-->
## MsgPack {#msgpack}
ClickHouse supports reading and writing [MessagePack](https://msgpack.org/) data files.
### Data Types Matching {#data-types-matching-msgpack}
| MsgPack data type | ClickHouse data type |
|---------------------------------|----------------------------------------------------------------------------------|
| `uint N`, `positive fixint` | [UIntN](../sql-reference/data-types/int-uint.md) |
| `int N` | [IntN](../sql-reference/data-types/int-uint.md) |
| `fixstr`, `str 8`, `str 16`, `str 32` | [String](../sql-reference/data-types/string.md), [FixedString](../sql-reference/data-types/fixedstring.md) |
| `float 32` | [Float32](../sql-reference/data-types/float.md) |
| `float 64` | [Float64](../sql-reference/data-types/float.md) |
| `uint 16` | [Date](../sql-reference/data-types/date.md) |
| `uint 32` | [DateTime](../sql-reference/data-types/datetime.md) |
| `uint 64` | [DateTime64](../sql-reference/data-types/datetime.md) |
| `fixarray`, `array 16`, `array 32`| [Array](../sql-reference/data-types/array.md) |
| `nil` | [Nothing](../sql-reference/data-types/special-data-types/nothing.md) |
Example:
Writing to a file ".msgpk":
```sql
$ clickhouse-client --query="CREATE TABLE msgpack (array Array(UInt8)) ENGINE = Memory;"
$ clickhouse-client --query="INSERT INTO msgpack VALUES ([0, 1, 2, 3, 42, 253, 254, 255]), ([255, 254, 253, 42, 3, 2, 1, 0])";
$ clickhouse-client --query="SELECT * FROM msgpack FORMAT MsgPack" > tmp_msgpack.msgpk;
```

View File

@ -84,7 +84,7 @@ Features:
- Table data preview.
- Full-text search.
By default, DBeaver does not connect using a session (the CLI for example does). If you require session support (for example to set settings for your session), edit the driver connection properties and set session_id to a random string (it uses the http connection under the hood). Then you can use any setting from the query window
By default, DBeaver does not connect using a session (the CLI for example does). If you require session support (for example to set settings for your session), edit the driver connection properties and set `session_id` to a random string (it uses the http connection under the hood). Then you can use any setting from the query window.
### clickhouse-cli {#clickhouse-cli}

View File

@ -69,29 +69,85 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression>
```
<!--
## encryption {#server-settings-encryption}
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). The command, or a shell script, is expected to write a Base64-encoded key of any length to the stdout.
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in enviroment variables or be set in configuration file.
Keys can be hex or string. Their length must be equal to 16.
**Example**
For Linux with systemd:
Load from config:
```xml
<encryption>
<key_command>/usr/bin/systemd-ask-password --id="clickhouse-server" --timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command>
</encryption>
<encryption_codecs>
<aes_128_gcm_siv>
<key>12345567812345678</key>
</aes_128_gcm_siv>
</encryption_codecs>
```
For other systems:
!!! note "NOTE"
Storing keys in configuration file is not recommended. It isn't secure. You can move the keys into a separate config file on a secure disk and put a symlink to that config file to `config.d/` folder.
Load from config, when key is in hex:
```xml
<encryption>
<key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command>
</encryption>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
```
-->
Load key from environment variable:
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex from_env="KEY"></key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
```
Where current_key_id sets the current key for encryption, and all specified keys can be used for decryption.
All this methods can be applied for multiple keys:
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex id="0">00112233445566778899aabbccddeeff</key_hex>
<key_hex id="1" from_env=".."></key_hex>
<current_key_id>1</current_key_id>
</aes_128_gcm_siv>
</encryption_codecs>
```
Where `current_key_id` shows current key for encryption.
Also user can add nonce that must be 12 bytes long (by default encryption and decryption will use nonce consisting of zero bytes):
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<nonce>0123456789101</nonce>
</aes_128_gcm_siv>
</encryption_codecs>
```
Or it can be set in hex:
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<nonce_hex>abcdefabcdef</nonce_hex>
</aes_128_gcm_siv>
</encryption_codecs>
```
Everything above can be applied for `aes_256_gcm_siv` (but key must be 32 bytes length).
## custom_settings_prefixes {#custom_settings_prefixes}
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.

View File

@ -181,6 +181,44 @@ Possible values:
Default value: 0.
## max_replicated_fetches_network_bandwidth {#max_replicated_fetches_network_bandwidth}
Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) fetches. This setting is applied to a particular table, unlike the [max_replicated_fetches_network_bandwidth_for_server](settings.md#max_replicated_fetches_network_bandwidth_for_server) setting, which is applied to the server.
You can limit both server network and network for a particular table, but for this the value of the table-level setting should be less than server-level one. Otherwise the server considers only the `max_replicated_fetches_network_bandwidth_for_server` setting.
The setting isn't followed perfectly accurately.
Possible values:
- Positive integer.
- 0 — Unlimited.
Default value: `0`.
**Usage**
Could be used for throttling speed when replicating data to add or replace new nodes.
## max_replicated_sends_network_bandwidth {#max_replicated_sends_network_bandwidth}
Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) sends. This setting is applied to a particular table, unlike the [max_replicated_sends_network_bandwidth_for_server](settings.md#max_replicated_sends_network_bandwidth_for_server) setting, which is applied to the server.
You can limit both server network and network for a particular table, but for this the value of the table-level setting should be less than server-level one. Otherwise the server considers only the `max_replicated_sends_network_bandwidth_for_server` setting.
The setting isn't followed perfectly accurately.
Possible values:
- Positive integer.
- 0 — Unlimited.
Default value: `0`.
**Usage**
Could be used for throttling speed when replicating data to add or replace new nodes.
## old_parts_lifetime {#old-parts-lifetime}
The time (in seconds) of storing inactive parts to protect against data loss during spontaneous server reboots.

View File

@ -1140,6 +1140,40 @@ Possible values:
Default value: `5`.
## max_replicated_fetches_network_bandwidth_for_server {#max_replicated_fetches_network_bandwidth_for_server}
Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) fetches for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_fetches_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_fetches_network_bandwidth) setting.
The setting isn't followed perfectly accurately.
Possible values:
- Positive integer.
- 0 — Unlimited.
Default value: `0`.
**Usage**
Could be used for throttling speed when replicating the data to add or replace new nodes.
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) sends for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth) setting.
The setting isn't followed perfectly accurately.
Possible values:
- Positive integer.
- 0 — Unlimited.
Default value: `0`.
**Usage**
Could be used for throttling speed when replicating the data to add or replace new nodes.
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the shard and replica sections are used in the cluster definition.

View File

@ -87,7 +87,23 @@ The function is using uppercase letters `A-F` and not using any prefixes (like `
For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if the leading digit is zero.
**Example**
Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted.
Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
**Arguments**
- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
- A string with the hexadecimal representation of the argument.
Type: [String](../../sql-reference/data-types/string.md).
**Examples**
Query:
@ -101,28 +117,10 @@ Result:
01
```
Values of type `Date` and `DateTime` are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted.
Values of floating point and Decimal types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
**Arguments**
- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
- A string with the hexadecimal representation of the argument.
Type: `String`.
**Example**
Query:
``` sql
SELECT hex(toFloat32(number)) as hex_presentation FROM numbers(15, 2);
SELECT hex(toFloat32(number)) AS hex_presentation FROM numbers(15, 2);
```
Result:
@ -137,7 +135,7 @@ Result:
Query:
``` sql
SELECT hex(toFloat64(number)) as hex_presentation FROM numbers(15, 2);
SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2);
```
Result:
@ -210,52 +208,52 @@ Result:
Returns a string containing the arguments binary representation.
Alias: `BIN`.
**Syntax**
``` sql
bin(arg)
```
Alias: `BIN`.
For integer arguments, it prints bin digits from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints eight digits of every byte if the leading digit is zero.
**Example**
Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`).
Query:
For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted.
``` sql
SELECT bin(1);
```
Result:
``` text
00000001
```
Values of type `Date` and `DateTime` are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
For `String` and `FixedString`, all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted.
Values of floating-point and Decimal types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
**Arguments**
- `arg` — A value to convert to binary. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
- A string with the binary representation of the argument.
Type: `String`.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
**Examples**
Query:
``` sql
SELECT bin(toFloat32(number)) as bin_presentation FROM numbers(15, 2);
SELECT bin(14);
```
Result:
``` text
┌─bin(14)──┐
│ 00001110 │
└──────────┘
```
Query:
``` sql
SELECT bin(toFloat32(number)) AS bin_presentation FROM numbers(15, 2);
```
Result:
@ -270,7 +268,7 @@ Result:
Query:
``` sql
SELECT bin(toFloat64(number)) as bin_presentation FROM numbers(15, 2);
SELECT bin(toFloat64(number)) AS bin_presentation FROM numbers(15, 2);
```
Result:
@ -284,14 +282,7 @@ Result:
## unbin {#unbinstr}
Performs the opposite operation of [bin](#bin). It interprets each pair of binary digits (in the argument) as a number and converts it to the byte represented by the number. The return value is a binary string (BLOB).
If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions.
!!! note "Note"
If `unbin` is invoked from within the `clickhouse-client`, binary strings display using UTF-8.
Alias: `UNBIN`.
Interprets each pair of binary digits (in the argument) as a number and converts it to the byte represented by the number. The functions performs the opposite operation to [bin](#bin).
**Syntax**
@ -299,11 +290,18 @@ Alias: `UNBIN`.
unbin(arg)
```
Alias: `UNBIN`.
For a numeric argument `unbin()` does not return the inverse of `bin()`. If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) functions.
!!! note "Note"
If `unbin` is invoked from within the `clickhouse-client`, binary strings are displayed using UTF-8.
Supports binary digits `0` and `1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isnt thrown).
**Arguments**
- `arg` — A string containing any number of binary digits. Type: [String](../../sql-reference/data-types/string.md).
Supports binary digits `0-1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isnt thrown). For a numeric argument the inverse of bin(N) is not performed by unbin().
- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md).
**Returned value**
@ -311,7 +309,7 @@ Supports binary digits `0-1`. The number of binary digits does not have to be mu
Type: [String](../../sql-reference/data-types/string.md).
**Example**
**Examples**
Query:
@ -330,14 +328,14 @@ Result:
Query:
``` sql
SELECT reinterpretAsUInt64(reverse(unbin('1010'))) AS num;
SELECT reinterpretAsUInt64(reverse(unbin('1110'))) AS num;
```
Result:
``` text
┌─num─┐
│ 10
│ 14
└─────┘
```
@ -396,7 +394,7 @@ Result:
Query:
``` sql
select bitPositionsToArray(toInt8(-1)) as bit_positions;
SELECT bitPositionsToArray(toInt8(-1)) AS bit_positions;
```
Result:

View File

@ -3,7 +3,7 @@ toc_priority: 45
toc_title: s3
---
# S3 Table Function {#s3-table-function}
# s3 Table Function {#s3-table-function}
Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
@ -125,6 +125,30 @@ INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test
SELECT name, value FROM existing_table;
```
## Partitioned Write {#partitioned-write}
If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
**Examples**
1. Using partition ID in a key creates separate files:
```sql
INSERT INTO TABLE FUNCTION
s3('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32')
PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24);
```
As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`.
2. Using partition ID in a bucket name creates files in different buckets:
```sql
INSERT INTO TABLE FUNCTION
s3('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24);
```
As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`.
**See Also**
- [S3 engine](../../engines/table-engines/integrations/s3.md)

View File

@ -74,7 +74,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/
$ rm -rf build
$ mkdir build
$ cd build
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-10 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-10 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-11 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-11 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF ..
$ cmake --build . --config RelWithDebInfo
$ cd ..
```

View File

@ -15,7 +15,7 @@ toc_title: PostgreSQL
``` sql
CREATE DATABASE test_database
ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cache`]);
ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use_table_cache`]);
```
**Параметры движка**
@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
- `database` — имя удаленной БД.
- `user` — пользователь PostgreSQL.
- `password` — пароль пользователя.
- `schema` — схема PostgreSQL.
- `use_table_cache` — определяет кеширование структуры таблиц БД. Необязательный параметр. Значение по умолчанию: `0`.
## Поддерживаемые типы данных {#data_types-support}

View File

@ -151,4 +151,4 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-
**Смотрите также**
- [Табличная функция S3](../../../sql-reference/table-functions/s3.md)
- [Табличная функция s3](../../../sql-reference/table-functions/s3.md)

View File

@ -253,4 +253,5 @@ $ sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background_fetches_pool_size](../../../operations/settings/settings.md#background_fetches_pool_size)
- [execute_merges_on_single_replica_time_threshold](../../../operations/settings/settings.md#execute-merges-on-single-replica-time-threshold)
- [max_replicated_fetches_network_bandwidth](../../../operations/settings/merge-tree-settings.md#max_replicated_fetches_network_bandwidth)
- [max_replicated_sends_network_bandwidth](../../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth)

View File

@ -59,6 +59,7 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
| [LineAsString](#lineasstring) | ✔ | ✗ |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
Вы можете регулировать некоторые параметры работы с форматами с помощью настроек ClickHouse. За дополнительной информацией обращайтесь к разделу [Настройки](../operations/settings/settings.md).
@ -1464,3 +1465,32 @@ $ clickhouse-client --query "SELECT * FROM {some_table} FORMAT RawBLOB" | md5sum
``` text
f9725a22f9191e064120d718e26862a9 -
```
## MsgPack {#msgpack}
ClickHouse поддерживает запись и чтение из файлов в формате [MessagePack](https://msgpack.org/).
### Соответствие типов данных {#data-types-matching-msgpack}
| Тип данных MsgPack | Тип данных ClickHouse |
|---------------------------------|------------------------------------------------------------------------------------|
| `uint N`, `positive fixint` | [UIntN](../sql-reference/data-types/int-uint.md) |
| `int N` | [IntN](../sql-reference/data-types/int-uint.md) |
| `fixstr`, `str 8`, `str 16`, `str 32` | [String](../sql-reference/data-types/string.md), [FixedString](../sql-reference/data-types/fixedstring.md) |
| `float 32` | [Float32](../sql-reference/data-types/float.md) |
| `float 64` | [Float64](../sql-reference/data-types/float.md) |
| `uint 16` | [Date](../sql-reference/data-types/date.md) |
| `uint 32` | [DateTime](../sql-reference/data-types/datetime.md) |
| `uint 64` | [DateTime64](../sql-reference/data-types/datetime.md) |
| `fixarray`, `array 16`, `array 32`| [Array](../sql-reference/data-types/array.md) |
| `nil` | [Nothing](../sql-reference/data-types/special-data-types/nothing.md) |
Пример:
Запись в файл ".msgpk":
```sql
$ clickhouse-client --query="CREATE TABLE msgpack (array Array(UInt8)) ENGINE = Memory;"
$ clickhouse-client --query="INSERT INTO msgpack VALUES ([0, 1, 2, 3, 42, 253, 254, 255]), ([255, 254, 253, 42, 3, 2, 1, 0])";
$ clickhouse-client --query="SELECT * FROM msgpack FORMAT MsgPack" > tmp_msgpack.msgpk;
```

View File

@ -75,11 +75,13 @@ toc_title: "Визуальные интерфейсы от сторонних р
Основные возможности:
- Построение запросов с подсветкой синтаксиса;
- Просмотр таблиц;
- Автодополнение команд;
- Построение запросов с подсветкой синтаксиса.
- Просмотр таблиц.
- Автодополнение команд.
- Полнотекстовый поиск.
По умолчанию DBeaver не использует сессии при подключении (в отличие от CLI, например). Если вам нужна поддержка сессий (например, для установки настроек на сессию), измените настройки подключения драйвера и укажите для настройки `session_id` любое произвольное значение (драйвер использует подключение по http). После этого вы можете использовать любую настройку (setting) в окне запроса.
### clickhouse-cli {#clickhouse-cli}
[clickhouse-cli](https://github.com/hatarist/clickhouse-cli) - это альтернативный клиент командной строки для ClickHouse, написанный на Python 3.

View File

@ -201,6 +201,44 @@ Eсли суммарное число активных кусков во все
Значение по умолчанию: `0`.
## max_replicated_fetches_network_bandwidth {#max_replicated_fetches_network_bandwidth}
Ограничивает максимальную скорость скачивания данных в сети (в байтах в секунду) для синхронизаций между [репликами](../../engines/table-engines/mergetree-family/replication.md). Настройка применяется к конкретной таблице, в отличие от [max_replicated_fetches_network_bandwidth_for_server](settings.md#max_replicated_fetches_network_bandwidth_for_server), которая применяется к серверу.
Можно ограничить скорость обмена данными как для всего сервера, так и для конкретной таблицы, но для этого значение табличной настройки должно быть меньше серверной. Иначе сервер будет учитывать только настройку `max_replicated_fetches_network_bandwidth_for_server`.
Настройка соблюдается неточно.
Возможные значения:
- Любое целое положительное число.
- 0 — Скорость не ограничена.
Значение по умолчанию: `0`.
**Использование**
Может быть использована для ограничения скорости передачи данных при репликации данных для добавления или замены новых узлов.
## max_replicated_sends_network_bandwidth {#max_replicated_sends_network_bandwidth}
Ограничивает максимальную скорость отправки данных по сети (в байтах в секунду) для синхронизации между [репликами](../../engines/table-engines/mergetree-family/replication.md). Настройка применяется к конкретной таблице, в отличие от [max_replicated_sends_network_bandwidth_for_server](settings.md#max_replicated_sends_network_bandwidth_for_server), которая применяется к серверу.
Можно ограничить скорость обмена данными как для всего сервера, так и для конкретной таблицы, но для этого значение табличной настройки должно быть меньше серверной. Иначе сервер будет учитывать только настройку `max_replicated_sends_network_bandwidth_for_server`.
Настройка следуется неточно.
Возможные значения:
- Любое целое положительное число.
- 0 — Скорость не ограничена.
Значение по умолчанию: `0`.
**Использование**
Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов.
## max_bytes_to_merge_at_max_space_in_pool {#max-bytes-to-merge-at-max-space-in-pool}
Максимальный суммарный размер кусков (в байтах) в одном слиянии, если есть свободные ресурсы в фоновом пуле.

View File

@ -1098,6 +1098,40 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test'
Значение по умолчанию: `5`.
## max_replicated_fetches_network_bandwidth_for_server {#max_replicated_fetches_network_bandwidth_for_server}
Ограничивает максимальную скорость обмена данными в сети (в байтах в секунду) для синхронизации между [репликами](../../engines/table-engines/mergetree-family/replication.md). Применяется только при запуске сервера. Можно также ограничить скорость для конкретной таблицы с помощью настройки [max_replicated_fetches_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_fetches_network_bandwidth).
Значение настройки соблюдается неточно.
Возможные значения:
- Любое целое положительное число.
- 0 — Скорость не ограничена.
Значение по умолчанию: `0`.
**Использование**
Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов.
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
Ограничивает максимальную скорость обмена данными в сети (в байтах в секунду) для [репликационных](../../engines/table-engines/mergetree-family/replication.md) отправок. Применяется только при запуске сервера. Можно также ограничить скорость для конкретной таблицы с помощью настройки [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth).
Значение настройки соблюдается неточно.
Возможные значения:
- Любое целое положительное число.
- 0 — Скорость не ограничена.
Значение по умолчанию: `0`.
**Использование**
Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов.
## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms}
Таймаут в миллисекундах на соединение с удалённым сервером, для движка таблиц Distributed, если используются секции shard и replica в описании кластера.

View File

@ -78,10 +78,11 @@ active_replicas: 2
- `log_max_index` (`UInt64`) - максимальный номер записи в общем логе действий.
- `log_pointer` (`UInt64`) - максимальный номер записи из общего лога действий, которую реплика скопировала в свою очередь для выполнения, плюс единица. Если log_pointer сильно меньше log_max_index, значит что-то не так.
- `last_queue_update` (`DateTime`) - When the queue was updated last time.
- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
- `last_queue_update` (`DateTime`) - время последнего обновления запроса.
- `absolute_delay` (`UInt64`) - задержка (в секундах) для текущей реплики.
- `total_replicas` (`UInt8`) - общее число известных реплик этой таблицы.
- `active_replicas` (`UInt8`) - число реплик этой таблицы, имеющих сессию в ZK; то есть, число работающих реплик.
- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — соответствие между именем реплики и признаком активности реплики.
Если запрашивать все столбцы, то таблица может работать слегка медленно, так как на каждую строчку делается несколько чтений из ZK.
Если не запрашивать последние 4 столбца (log_max_index, log_pointer, total_replicas, active_replicas), то таблица работает быстро.

View File

@ -17,13 +17,13 @@ char(number_1, [number_2, ..., number_n]);
**Аргументы**
- `number_1, number_2, ..., number_n` — числовые аргументы, которые интерпретируются как целые числа. Типы: [Int](../../sql-reference/functions/encoding-functions.md), [Float](../../sql-reference/functions/encoding-functions.md).
- `number_1, number_2, ..., number_n` — числовые аргументы, которые интерпретируются как целые числа. Типы: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- Строка из соответствующих байт.
Тип: `String`.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
@ -73,61 +73,57 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
## hex {#hex}
Returns a string containing the arguments hexadecimal representation.
Возвращает строку, содержащую шестнадцатеричное представление аргумента.
Синоним: `HEX`.
**Syntax**
**Синтаксис**
``` sql
hex(arg)
```
The function is using uppercase letters `A-F` and not using any prefixes (like `0x`) or suffixes (like `h`).
Функция использует прописные буквы `A-F` и не использует никаких префиксов (например, `0x`) или суффиксов (например, `h`).
For integer arguments, it prints hex digits («nibbles») from the most significant to least significant (big endian or «human readable» order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if leading digit is zero.
Для целочисленных аргументов возвращает шестнадцатеричные цифры от наиболее до наименее значимых (`big endian`, человекочитаемый порядок).Он начинается с самого значимого ненулевого байта (начальные нулевые байты опущены), но всегда выводит обе цифры каждого байта, даже если начальная цифра равна нулю.
Example:
Значения типа [Date](../../sql-reference/data-types/date.md) и [DateTime](../../sql-reference/data-types/datetime.md) формируются как соответствующие целые числа (количество дней с момента Unix-эпохи для `Date` и значение Unix Timestamp для `DateTime`).
**Example**
Для [String](../../sql-reference/data-types/string.md) и [FixedString](../../sql-reference/data-types/fixedstring.md), все байты просто кодируются как два шестнадцатеричных числа. Нулевые байты не опущены.
Query:
Значения [Float](../../sql-reference/data-types/float.md) и [Decimal](../../sql-reference/data-types/decimal.md) кодируются как их представление в памяти. Поскольку ClickHouse поддерживает архитектуру `little-endian`, они кодируются от младшего к старшему байту. Нулевые начальные/конечные байты не опущены.
**Аргументы**
- `arg` — значение для преобразования в шестнадцатеричное. [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md).
**Возвращаемое значение**
- Строка — шестнадцатеричное представление аргумента.
Тип: [String](../../sql-reference/data-types/string.md).
**Примеры**
Запрос:
``` sql
SELECT hex(1);
```
Result:
Результат:
``` text
01
```
Values of type `Date` and `DateTime` are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted.
Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted.
**Parameters**
- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/functions/encoding-functions.md), [UInt](../../sql-reference/functions/encoding-functions.md), [Float](../../sql-reference/functions/encoding-functions.md), [Decimal](../../sql-reference/functions/encoding-functions.md), [Date](../../sql-reference/functions/encoding-functions.md) or [DateTime](../../sql-reference/functions/encoding-functions.md).
**Returned value**
- A string with the hexadecimal representation of the argument.
Type: `String`.
**Example**
Query:
Запрос:
``` sql
SELECT hex(toFloat32(number)) as hex_presentation FROM numbers(15, 2);
SELECT hex(toFloat32(number)) AS hex_presentation FROM numbers(15, 2);
```
Result:
Результат:
``` text
┌─hex_presentation─┐
@ -136,13 +132,13 @@ Result:
└──────────────────┘
```
Query:
Запрос:
``` sql
SELECT hex(toFloat64(number)) as hex_presentation FROM numbers(15, 2);
SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2);
```
Result:
Результат:
``` text
┌─hex_presentation─┐
@ -208,6 +204,141 @@ SELECT reinterpretAsUInt64(reverse(unhex('FFF'))) AS num;
└──────┘
```
## bin {#bin}
Возвращает строку, содержащую бинарное представление аргумента.
**Синтаксис**
``` sql
bin(arg)
```
Синоним: `BIN`.
Для целочисленных аргументов возвращаются двоичные числа от наиболее значимого до наименее значимого (`big-endian`, человекочитаемый порядок). Порядок начинается с самого значимого ненулевого байта (начальные нулевые байты опущены), но всегда возвращает восемь цифр каждого байта, если начальная цифра равна нулю.
Значения типа [Date](../../sql-reference/data-types/date.md) и [DateTime](../../sql-reference/data-types/datetime.md) формируются как соответствующие целые числа (количество дней с момента Unix-эпохи для `Date` и значение Unix Timestamp для `DateTime`).
Для [String](../../sql-reference/data-types/string.md) и [FixedString](../../sql-reference/data-types/fixedstring.md) все байты кодируются как восемь двоичных чисел. Нулевые байты не опущены.
Значения [Float](../../sql-reference/data-types/float.md) и [Decimal](../../sql-reference/data-types/decimal.md) кодируются как их представление в памяти. Поскольку ClickHouse поддерживает архитектуру `little-endian`, они кодируются от младшего к старшему байту. Нулевые начальные/конечные байты не опущены.
**Аргументы**
- `arg` — значение для преобразования в двоичный код. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md).
**Возвращаемое значение**
- Бинарная строка (BLOB) — двоичное представление аргумента.
Тип: [String](../../sql-reference/data-types/string.md).
**Примеры**
Запрос:
``` sql
SELECT bin(14);
```
Результат:
``` text
┌─bin(14)──┐
│ 00001110 │
└──────────┘
```
Запрос:
``` sql
SELECT bin(toFloat32(number)) AS bin_presentation FROM numbers(15, 2);
```
Результат:
``` text
┌─bin_presentation─────────────────┐
│ 00000000000000000111000001000001 │
│ 00000000000000001000000001000001 │
└──────────────────────────────────┘
```
Запрос:
``` sql
SELECT bin(toFloat64(number)) AS bin_presentation FROM numbers(15, 2);
```
Результат:
``` text
┌─bin_presentation─────────────────────────────────────────────────┐
│ 0000000000000000000000000000000000000000000000000010111001000000 │
│ 0000000000000000000000000000000000000000000000000011000001000000 │
└──────────────────────────────────────────────────────────────────┘
```
## unbin {#unbinstr}
Интерпретирует каждую пару двоичных цифр аргумента как число и преобразует его в байт, представленный числом. Функция выполняет операцию, противоположную [bin](#bin).
**Синтаксис**
``` sql
unbin(arg)
```
Синоним: `UNBIN`.
Для числового аргумента `unbin()` не возвращает значение, обратное результату `bin()`. Чтобы преобразовать результат в число, используйте функции [reverse](../../sql-reference/functions/string-functions.md#reverse) и [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264).
!!! note "Примечание"
Если `unbin` вызывается из клиента `clickhouse-client`, бинарная строка возвращается в кодировке UTF-8.
Поддерживает двоичные цифры `0` и `1`. Количество двоичных цифр не обязательно должно быть кратно восьми. Если строка аргумента содержит что-либо, кроме двоичных цифр, возвращается некоторый результат, определенный реализацией (ошибки не возникает).
**Аргументы**
- `arg` — строка, содержащая любое количество двоичных цифр. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Бинарная строка (BLOB).
Тип: [String](../../sql-reference/data-types/string.md).
**Примеры**
Запрос:
``` sql
SELECT UNBIN('001100000011000100110010'), UNBIN('0100110101111001010100110101000101001100');
```
Результат:
``` text
┌─unbin('001100000011000100110010')─┬─unbin('0100110101111001010100110101000101001100')─┐
│ 012 │ MySQL │
└───────────────────────────────────┴───────────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT reinterpretAsUInt64(reverse(unbin('1110'))) AS num;
```
Результат:
``` text
┌─num─┐
│ 14 │
└─────┘
```
## UUIDStringToNum(str) {#uuidstringtonumstr}
Принимает строку, содержащую 36 символов в формате `123e4567-e89b-12d3-a456-426655440000`, и возвращает в виде набора байт в FixedString(16).
@ -263,7 +394,7 @@ SELECT bitPositionsToArray(toInt8(1)) AS bit_positions;
Запрос:
``` sql
select bitPositionsToArray(toInt8(-1)) as bit_positions;
SELECT bitPositionsToArray(toInt8(-1)) AS bit_positions;
```
Результат:

View File

@ -133,6 +133,30 @@ INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test
SELECT name, value FROM existing_table;
```
## Партиционирование при записи данных {#partitioned-write}
Если при добавлении данных в таблицу S3 указать выражение `PARTITION BY`, то для каждого значения ключа партиционирования создается отдельный файл. Это повышает эффективность операций чтения.
**Примеры**
1. При использовании ID партиции в имени ключа создаются отдельные файлы:
```sql
INSERT INTO TABLE FUNCTION
s3('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24);
```
В результате данные будут записаны в три файла: `file_x.csv`, `file_y.csv` и `file_z.csv`.
2. При использовании ID партиции в названии бакета создаются файлы в разных бакетах:
```sql
INSERT INTO TABLE FUNCTION
s3('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24);
```
В результате будут созданы три файла в разных бакетах: `my_bucket_1/file.csv`, `my_bucket_10/file.csv` и `my_bucket_20/file.csv`.
**Смотрите также**
- [Движок таблиц S3](../../engines/table-engines/integrations/s3.md)

View File

@ -4,6 +4,7 @@ set (CLICKHOUSE_CLIENT_SOURCES
QueryFuzzer.cpp
Suggest.cpp
TestHint.cpp
TestTags.cpp
)
set (CLICKHOUSE_CLIENT_LINK

View File

@ -6,6 +6,7 @@
#include "QueryFuzzer.h"
#include "Suggest.h"
#include "TestHint.h"
#include "TestTags.h"
#if USE_REPLXX
# include <common/ReplxxLineReader.h>
@ -1031,19 +1032,30 @@ private:
if (server_exception)
{
bool print_stack_trace = config().getBool("stacktrace", false);
std::cerr << "Received exception from server (version " << server_version << "):" << std::endl
<< getExceptionMessage(*server_exception, print_stack_trace, true) << std::endl;
fmt::print(stderr, "Received exception from server (version {}):\n{}\n",
server_version,
getExceptionMessage(*server_exception, print_stack_trace, true));
if (is_interactive)
std::cerr << std::endl;
{
fmt::print(stderr, "\n");
}
else
{
fmt::print(stderr, "(query: {})\n", full_query);
}
}
if (client_exception)
{
fmt::print(stderr, "Error on processing query '{}':\n{}\n", full_query, client_exception->message());
fmt::print(stderr, "Error on processing query: {}\n", client_exception->message());
if (is_interactive)
{
fmt::print(stderr, "\n");
}
else
{
fmt::print(stderr, "(query: {})\n", full_query);
}
}
// A debug check -- at least some exception must be set, if the error
@ -1067,12 +1079,17 @@ private:
bool echo_query = echo_queries;
/// Test tags are started with "--" so they are interpreted as comments anyway.
/// But if the echo is enabled we have to remove the test tags from `all_queries_text`
/// because we don't want test tags to be echoed.
size_t test_tags_length = test_mode ? getTestTagsLength(all_queries_text) : 0;
/// Several queries separated by ';'.
/// INSERT data is ended by the end of line, not ';'.
/// An exception is VALUES format where we also support semicolon in
/// addition to end of line.
const char * this_query_begin = all_queries_text.data();
const char * this_query_begin = all_queries_text.data() + test_tags_length;
const char * all_queries_end = all_queries_text.data() + all_queries_text.size();
while (this_query_begin < all_queries_end)
@ -1244,13 +1261,17 @@ private:
if (!server_exception)
{
error_matches_hint = false;
fmt::print(stderr, "Expected server error code '{}' but got no server error.\n", test_hint.serverError());
fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n",
test_hint.serverError(),
full_query);
}
else if (server_exception->code() != test_hint.serverError())
{
error_matches_hint = false;
std::cerr << "Expected server error code: " << test_hint.serverError() << " but got: " << server_exception->code()
<< "." << std::endl;
fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n",
test_hint.serverError(),
server_exception->code(),
full_query);
}
}
@ -1259,13 +1280,17 @@ private:
if (!client_exception)
{
error_matches_hint = false;
fmt::print(stderr, "Expected client error code '{}' but got no client error.\n", test_hint.clientError());
fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n",
test_hint.clientError(),
full_query);
}
else if (client_exception->code() != test_hint.clientError())
{
error_matches_hint = false;
fmt::print(
stderr, "Expected client error code '{}' but got '{}'.\n", test_hint.clientError(), client_exception->code());
fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n",
test_hint.clientError(),
client_exception->code(),
full_query);
}
}
@ -1281,13 +1306,17 @@ private:
{
if (test_hint.clientError())
{
fmt::print(stderr, "The query succeeded but the client error '{}' was expected.\n", test_hint.clientError());
fmt::print(stderr, "The query succeeded but the client error '{}' was expected (query: {}).\n",
test_hint.clientError(),
full_query);
error_matches_hint = false;
}
if (test_hint.serverError())
{
fmt::print(stderr, "The query succeeded but the server error '{}' was expected.\n", test_hint.serverError());
fmt::print(stderr, "The query succeeded but the server error '{}' was expected (query: {}).\n",
test_hint.serverError(),
full_query);
error_matches_hint = false;
}
}
@ -2010,8 +2039,21 @@ private:
PullingAsyncPipelineExecutor executor(pipeline);
Block block;
while (executor.pull(block))
while (true)
{
try
{
if (!executor.pull(block))
{
break;
}
}
catch (Exception & e)
{
e.addMessage(fmt::format("(in query: {})", full_query));
throw;
}
/// Check if server send Log packet
receiveLogs();

View File

@ -0,0 +1,51 @@
#include "TestTags.h"
#include <cstring>
namespace DB
{
size_t getTestTagsLength(const String & multiline_query)
{
const String & text = multiline_query;
size_t pos = 0;
bool first_line = true;
while (true)
{
size_t line_start = pos;
/// Skip spaces.
while ((pos != text.length()) && (text[pos] == ' ' || text[pos] == '\t'))
++pos;
/// Skip comment "--".
static constexpr const char comment[] = "--";
if (text.compare(pos, strlen(comment), comment) != 0)
return line_start;
pos += strlen(comment);
/// Skip the prefix "Tags:" if it's the first line.
if (first_line)
{
while ((pos != text.length()) && (text[pos] == ' ' || text[pos] == '\t'))
++pos;
static constexpr const char tags_prefix[] = "Tags:";
if (text.compare(pos, strlen(tags_prefix), tags_prefix) != 0)
return 0;
pos += strlen(tags_prefix);
first_line = false;
}
/// Skip end-of-line.
size_t eol_pos = text.find_first_of("\r\n", pos);
if (eol_pos == String::npos)
return text.length();
bool two_chars_eol = (eol_pos + 1 < text.length()) && ((text[eol_pos + 1] == '\r') || (text[eol_pos + 1] == '\n')) && (text[eol_pos + 1] != text[eol_pos]);
size_t eol_length = two_chars_eol ? 2 : 1;
pos = eol_pos + eol_length;
}
}
}

View File

@ -0,0 +1,18 @@
#pragma once
#include <Core/Types.h>
namespace DB
{
/// Returns the length of a text looking like
/// -- Tags: x, y, z
/// -- Tag x: explanation of tag x
/// -- Tag y: explanation of tag y
/// -- Tag z: explanation of tag z
///
/// at the beginning of a multiline query.
/// If there are no test tags in the multiline query the function returns 0.
size_t getTestTagsLength(const String & multiline_query);
}

View File

@ -79,6 +79,7 @@
#include <Server/ProtocolServerAdapter.h>
#include <Server/HTTP/HTTPServer.h>
#include <filesystem>
#include <Compression/CompressionCodecEncrypted.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
@ -251,7 +252,6 @@ namespace ErrorCodes
extern const int SUPPORT_IS_DISABLED;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
extern const int INCORRECT_DATA;
extern const int INVALID_CONFIG_PARAMETER;
extern const int SYSTEM_ERROR;
extern const int FAILED_TO_GETPWUID;
@ -456,40 +456,6 @@ void checkForUsersNotInMainConfig(
}
}
static void loadEncryptionKey(const std::string & key_command [[maybe_unused]], Poco::Logger * log)
{
#if USE_BASE64 && USE_SSL && USE_INTERNAL_SSL_LIBRARY
auto process = ShellCommand::execute(key_command);
std::string b64_key;
readStringUntilEOF(b64_key, process->out);
process->wait();
// turbob64 doesn't like whitespace characters in input. Strip
// them before decoding.
std::erase_if(b64_key, [](char c)
{
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
});
std::vector<char> buf(b64_key.size());
const size_t key_size = tb64dec(reinterpret_cast<const unsigned char *>(b64_key.data()), b64_key.size(),
reinterpret_cast<unsigned char *>(buf.data()));
if (!key_size)
throw Exception("Failed to decode encryption key", ErrorCodes::INCORRECT_DATA);
else if (key_size < 16)
LOG_WARNING(log, "The encryption key should be at least 16 octets long.");
const std::string_view key = std::string_view(buf.data(), key_size);
CompressionCodecEncrypted::setMasterKey(key);
#else
LOG_WARNING(log, "Server was built without Base64 or SSL support. Encryption is disabled.");
#endif
}
[[noreturn]] void forceShutdown()
{
#if defined(THREAD_SANITIZER) && defined(OS_LINUX)
@ -904,6 +870,8 @@ if (ThreadFuzzer::instance().isEffective())
global_context->updateStorageConfiguration(*config);
global_context->updateInterserverCredentials(*config);
CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, "encryption_codecs");
},
/* already_loaded = */ false); /// Reload it right now (initial loading)
@ -976,9 +944,9 @@ if (ThreadFuzzer::instance().isEffective())
global_context->getMergeTreeSettings().sanityCheck(settings);
global_context->getReplicatedMergeTreeSettings().sanityCheck(settings);
/// Set up encryption.
if (config().has("encryption.key_command"))
loadEncryptionKey(config().getString("encryption.key_command"), log);
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);

View File

@ -1026,10 +1026,28 @@
defined, or encryption codecs will be disabled otherwise. The
command is executed through /bin/sh and is expected to write
a Base64-encoded key to the stdout. -->
<encryption>
<!-- <key_command>/usr/bin/systemd-ask-password &#45;&#45;id="clickhouse-server" &#45;&#45;timeout=0 "Enter the ClickHouse encryption passphrase:" | base64</key_command> -->
<!-- <key_command><![CDATA[IFS=; echo -n >/dev/tty "Enter the ClickHouse encryption passphrase: "; stty=`stty -F /dev/tty -g`; stty -F /dev/tty -echo; read k </dev/tty; stty -F /dev/tty "$stty"; echo -n $k | base64]]></key_command> -->
</encryption>
<encryption_codecs>
<!-- aes_128_gcm_siv -->
<!-- Example of getting hex key from env -->
<!-- the code should use this key and throw an exception if its length is not 16 bytes -->
<!--key_hex from_env="..."></key_hex -->
<!-- Example of multiple hex keys. They can be imported from env or be written down in config-->
<!-- the code should use these keys and throw an exception if their length is not 16 bytes -->
<!-- key_hex id="0">...</key_hex -->
<!-- key_hex id="1" from_env=".."></key_hex -->
<!-- key_hex id="2">...</key_hex -->
<!-- current_key_id>2</current_key_id -->
<!-- Example of getting hex key from config -->
<!-- the code should use this key and throw an exception if its length is not 16 bytes -->
<!-- key>...</key -->
<!-- example of adding nonce -->
<!-- nonce>...</nonce -->
<!-- /aes_128_gcm_siv -->
</encryption_codecs>
<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->

View File

@ -21,6 +21,8 @@ class AggregateFunctionCombinatorArray final : public IAggregateFunctionCombinat
public:
String getName() const override { return "Array"; }
bool supportsNesting() const override { return true; }
DataTypes transformArguments(const DataTypes & arguments) const override
{
if (arguments.empty())

View File

@ -21,11 +21,9 @@
namespace DB
{
struct Settings;
template <typename T>
using DecimalOrVectorCol = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
template <typename T> constexpr bool DecimalOrExtendedInt =
IsDecimalNumber<T>
is_decimal<T>
|| std::is_same_v<T, Int128>
|| std::is_same_v<T, Int256>
|| std::is_same_v<T, UInt128>
@ -44,7 +42,7 @@ struct AvgFraction
/// Invoked only is either Numerator or Denominator are Decimal.
Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale [[maybe_unused]]) const
{
if constexpr (IsDecimalNumber<Numerator> && IsDecimalNumber<Denominator>)
if constexpr (is_decimal<Numerator> && is_decimal<Denominator>)
{
// According to the docs, num(S1) / denom(S2) would have scale S1
@ -60,7 +58,7 @@ struct AvgFraction
/// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64.
Float64 num_converted;
if constexpr (IsDecimalNumber<Numerator>)
if constexpr (is_decimal<Numerator>)
num_converted = DecimalUtils::convertTo<Float64>(numerator, num_scale);
else
num_converted = static_cast<Float64>(numerator); /// all other types, including extended integral.
@ -68,7 +66,7 @@ struct AvgFraction
std::conditional_t<DecimalOrExtendedInt<Denominator>,
Float64, Denominator> denom_converted;
if constexpr (IsDecimalNumber<Denominator>)
if constexpr (is_decimal<Denominator>)
denom_converted = DecimalUtils::convertTo<Float64>(denominator, denom_scale);
else if constexpr (DecimalOrExtendedInt<Denominator>)
/// no way to divide Float64 and extended integral type without an explicit cast.
@ -139,7 +137,7 @@ public:
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
if constexpr (is_decimal<Numerator> || is_decimal<Denominator>)
assert_cast<ColumnVector<Float64> &>(to).getData().push_back(
this->data(place).divideIfAnyDecimal(num_scale, denom_scale));
else
@ -222,7 +220,7 @@ private:
};
template <typename T>
using AvgFieldType = std::conditional_t<IsDecimalNumber<T>,
using AvgFieldType = std::conditional_t<is_decimal<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
@ -239,7 +237,7 @@ public:
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
{
this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
this->data(place).numerator += static_cast<const ColumnVectorOrDecimal<T> &>(*columns[0]).getData()[row_num];
++this->data(place).denominator;
}

View File

@ -8,7 +8,7 @@ namespace DB
struct Settings;
template <typename T>
using AvgWeightedFieldType = std::conditional_t<IsDecimalNumber<T>,
using AvgWeightedFieldType = std::conditional_t<is_decimal<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
std::conditional_t<DecimalOrExtendedInt<T>,
Float64, // no way to do UInt128 * UInt128, better cast to Float64
@ -34,10 +34,10 @@ public:
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
const auto& weights = static_cast<const DecimalOrVectorCol<Weight> &>(*columns[1]);
const auto& weights = static_cast<const ColumnVectorOrDecimal<Weight> &>(*columns[1]);
this->data(place).numerator += static_cast<Numerator>(
static_cast<const DecimalOrVectorCol<Value> &>(*columns[0]).getData()[row_num]) *
static_cast<const ColumnVectorOrDecimal<Value> &>(*columns[0]).getData()[row_num]) *
static_cast<Numerator>(weights.getData()[row_num]);
this->data(place).denominator += static_cast<Denominator>(weights.getData()[row_num]);

View File

@ -29,6 +29,7 @@ namespace ErrorCodes
{
extern const int UNKNOWN_AGGREGATE_FUNCTION;
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_AGGREGATION;
}
const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
@ -159,13 +160,32 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{
const std::string & combinator_name = combinator->getName();
if (combinator->isForInternalUsageOnly())
throw Exception("Aggregate function combinator '" + combinator->getName() + "' is only for internal usage", ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION);
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION,
"Aggregate function combinator '{}' is only for internal usage",
combinator_name);
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunctionCombinator, combinator->getName());
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunctionCombinator, combinator_name);
String nested_name = name.substr(0, name.size() - combinator_name.size());
/// Nested identical combinators (i.e. uniqCombinedIfIf) is not
/// supported (since they even don't work -- silently).
///
/// But non-identical does supported and works, for example
/// uniqCombinedIfMergeIf, it is useful in case when the underlying
/// storage stores AggregateFunction(uniqCombinedIf) and in SELECT you
/// need to filter aggregation result based on another column for
/// example.
if (!combinator->supportsNesting() && nested_name.ends_with(combinator_name))
{
throw Exception(ErrorCodes::ILLEGAL_AGGREGATION,
"Nested identical combinator '{}' is not supported",
combinator_name);
}
String nested_name = name.substr(0, name.size() - combinator->getName().size());
DataTypes nested_types = combinator->transformArguments(argument_types);
Array nested_parameters = combinator->transformParameters(parameters);

View File

@ -25,14 +25,14 @@ namespace
template <typename T, typename LimitNumberOfElements>
struct MovingSum
{
using Data = MovingSumData<std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>>;
using Data = MovingSumData<std::conditional_t<is_decimal<T>, Decimal128, NearestFieldType<T>>>;
using Function = MovingImpl<T, LimitNumberOfElements, Data>;
};
template <typename T, typename LimitNumberOfElements>
struct MovingAvg
{
using Data = MovingAvgData<std::conditional_t<IsDecimalNumber<T>, Decimal128, Float64>>;
using Data = MovingAvgData<std::conditional_t<is_decimal<T>, Decimal128, Float64>>;
using Function = MovingImpl<T, LimitNumberOfElements, Data>;
};

View File

@ -87,18 +87,10 @@ class MovingImpl final
public:
using ResultT = typename Data::Accumulator;
using ColumnSource = std::conditional_t<IsDecimalNumber<T>,
ColumnDecimal<T>,
ColumnVector<T>>;
using ColumnSource = ColumnVectorOrDecimal<T>;
/// Probably for overflow function in the future.
using ColumnResult = std::conditional_t<IsDecimalNumber<ResultT>,
ColumnDecimal<ResultT>,
ColumnVector<ResultT>>;
using DataTypeResult = std::conditional_t<IsDecimalNumber<ResultT>,
DataTypeDecimal<ResultT>,
DataTypeNumber<ResultT>>;
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
: IAggregateFunctionDataHelper<Data, MovingImpl<T, Tlimit_num_elems, Data>>({data_type_}, {})
@ -106,14 +98,7 @@ public:
String getName() const override { return Data::name; }
DataTypePtr getReturnType() const override
{
if constexpr (IsDecimalNumber<ResultT>)
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>(
DataTypeResult::maxPrecision(), getDecimalScale(*this->argument_types.at(0))));
else
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>());
}
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(getReturnTypeElement()); }
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
@ -196,6 +181,18 @@ public:
{
return true;
}
private:
auto getReturnTypeElement() const
{
if constexpr (!is_decimal<ResultT>)
return std::make_shared<DataTypeNumber<ResultT>>();
else
{
using Res = DataTypeDecimal<ResultT>;
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*this->argument_types.at(0)));
}
}
};
#undef AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE

View File

@ -10,7 +10,6 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_AGGREGATION;
}
class AggregateFunctionCombinatorIf final : public IAggregateFunctionCombinator
@ -37,10 +36,6 @@ public:
const DataTypes & arguments,
const Array & params) const override
{
if (nested_function->getName().find(getName()) != String::npos)
{
throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, "nested function for {0}-combinator must not have {0}-combinator", getName());
}
return std::make_shared<AggregateFunctionIf>(nested_function, arguments, params);
}
};

View File

@ -44,7 +44,7 @@ struct SingleValueDataFixed
{
private:
using Self = SingleValueDataFixed;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
using ColVecType = ColumnVectorOrDecimal<T>;
bool has_value = false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf.
T value;

View File

@ -23,6 +23,9 @@ private:
public:
explicit AggregateFunctionCombinatorOrFill(Kind kind_) : kind(kind_) {}
/// Due to aggregate_functions_null_for_empty
bool supportsNesting() const override { return true; }
String getName() const override
{
return kind == Kind::OrNull ? "OrNull" : "OrDefault";

View File

@ -67,10 +67,10 @@ class AggregateFunctionQuantile final : public IAggregateFunctionDataHelper<Data
AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>
{
private:
using ColVecType = std::conditional_t<IsDecimalNumber<Value>, ColumnDecimal<Value>, ColumnVector<Value>>;
using ColVecType = ColumnVectorOrDecimal<Value>;
static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
static_assert(!IsDecimalNumber<Value> || !returns_float);
static_assert(!is_decimal<Value> || !returns_float);
QuantileLevels<Float64> levels;

View File

@ -0,0 +1,72 @@
#include <AggregateFunctions/AggregateFunctionSparkbar.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
template <template <typename, typename> class AggregateFunctionTemplate, typename Data, typename ... TArgs>
static IAggregateFunction * createWithUIntegerOrTimeType(const std::string & name, const IDataType & argument_type, TArgs && ... args)
{
WhichDataType which(argument_type);
if (which.idx == TypeIndex::Date || which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, Data>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::DateTime || which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, Data>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::UInt8) return new AggregateFunctionTemplate<UInt8, Data>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, Data>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::UInt128) return new AggregateFunctionTemplate<UInt128, Data>(std::forward<TArgs>(args)...);
if (which.idx == TypeIndex::UInt256) return new AggregateFunctionTemplate<UInt256, Data>(std::forward<TArgs>(args)...);
throw Exception("The first argument type must be UInt or Date or DateTime for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
template <typename ... TArgs>
AggregateFunctionPtr createAggregateFunctionSparkbarImpl(const std::string & name, const IDataType & x_argument_type, const IDataType & y_argument_type, TArgs ... args)
{
WhichDataType which(y_argument_type);
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) return AggregateFunctionPtr(createWithUIntegerOrTimeType<AggregateFunctionSparkbar, TYPE>(name, x_argument_type, std::forward<TArgs>(args)...));
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
throw Exception("The second argument type must be numeric for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
{
assertBinary(name, arguments);
if (params.size() != 1 && params.size() != 3)
throw Exception("The number of params does not match for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (params.size() == 3)
{
if (params.at(1).getType() != arguments[0]->getDefault().getType() || params.at(2).getType() != arguments[0]->getDefault().getType())
{
throw Exception("The second and third parameters are not the same type as the first arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
return createAggregateFunctionSparkbarImpl(name, *arguments[0], *arguments[1], arguments, params);
}
}
void registerAggregateFunctionSparkbar(AggregateFunctionFactory & factory)
{
factory.registerFunction("sparkbar", createAggregateFunctionSparkbar);
}
}

View File

@ -0,0 +1,309 @@
#pragma once
#include <DataTypes/DataTypeString.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <common/range.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Columns/ColumnString.h>
#include <common/logger_useful.h>
#include <IO/ReadBufferFromString.h>
#include <Common/HashTable/HashMap.h>
namespace DB
{
template<typename X, typename Y>
struct AggregateFunctionSparkbarData
{
using Points = HashMap<X, Y>;
Points points;
X min_x = std::numeric_limits<X>::max();
X max_x = std::numeric_limits<X>::lowest();
Y min_y = std::numeric_limits<Y>::max();
Y max_y = std::numeric_limits<Y>::lowest();
void insert(const X & x, const Y & y)
{
auto result = points.insert({x, y});
if (!result.second)
result.first->getMapped() += y;
}
void add(X x, Y y)
{
insert(x, y);
min_x = std::min(x, min_x);
max_x = std::max(x, max_x);
min_y = std::min(y, min_y);
max_y = std::max(y, max_y);
}
void merge(const AggregateFunctionSparkbarData & other)
{
if (other.points.empty())
return;
for (auto & point : other.points)
insert(point.getKey(), point.getMapped());
min_x = std::min(other.min_x, min_x);
max_x = std::max(other.max_x, max_x);
min_y = std::min(other.min_y, min_y);
max_y = std::max(other.max_y, max_y);
}
void serialize(WriteBuffer & buf) const
{
writeBinary(min_x, buf);
writeBinary(max_x, buf);
writeBinary(min_y, buf);
writeBinary(max_y, buf);
writeVarUInt(points.size(), buf);
for (const auto & elem : points)
{
writeBinary(elem.getKey(), buf);
writeBinary(elem.getMapped(), buf);
}
}
void deserialize(ReadBuffer & buf)
{
readBinary(min_x, buf);
readBinary(max_x, buf);
readBinary(min_y, buf);
readBinary(max_y, buf);
size_t size;
readVarUInt(size, buf);
/// TODO Protection against huge size
X x;
Y y;
for (size_t i = 0; i < size; ++i)
{
readBinary(x, buf);
readBinary(y, buf);
insert(x, y);
}
}
};
template<typename X, typename Y>
class AggregateFunctionSparkbar final
: public IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar<X, Y>>
{
private:
size_t width;
X min_x;
X max_x;
String getBar(const UInt8 value) const
{
// ▁▂▃▄▅▆▇█
switch (value)
{
case 1: return "";
case 2: return "";
case 3: return "";
case 4: return "";
case 5: return "";
case 6: return "";
case 7: return "";
case 8: return "";
}
return " ";
}
/**
* The minimum value of y is rendered as the lowest height "",
* the maximum value of y is rendered as the highest height "", and the middle value will be rendered proportionally.
* If a bucket has no y value, it will be rendered as " ".
* If the actual number of buckets is greater than the specified bucket, it will be compressed by width.
* For example, there are actually 11 buckets, specify 10 buckets, and divide the 11 buckets as follows (11/10):
* 0.0-1.1, 1.1-2.2, 2.2-3.3, 3.3-4.4, 4.4-5.5, 5.5-6.6, 6.6-7.7, 7.7-8.8, 8.8-9.9, 9.9-11.
* The y value of the first bucket will be calculated as follows:
* the actual y value of the first position + the actual second position y*0.1, and the remaining y*0.9 is reserved for the next bucket.
* The next bucket will use the last y*0.9 + the actual third position y*0.2, and the remaining y*0.8 will be reserved for the next bucket. And so on.
*/
String render(const AggregateFunctionSparkbarData<X, Y> & data) const
{
String value;
if (data.points.empty() || !width)
return value;
X local_min_x = data.min_x;
X local_max_x = data.max_x;
size_t diff_x = local_max_x - local_min_x;
if ((diff_x + 1) <= width)
{
Y min_y = data.min_y;
Y max_y = data.max_y;
Float64 diff_y = max_y - min_y;
if (diff_y)
{
for (size_t i = 0; i <= diff_x; ++i)
{
auto it = data.points.find(local_min_x + i);
bool found = it != data.points.end();
value += getBar(found ? static_cast<UInt8>(std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1) : 0);
}
}
else
{
for (size_t i = 0; i <= diff_x; ++i)
value += getBar(data.points.has(local_min_x + i) ? 1 : 0);
}
}
else
{
// begin reshapes to width buckets
Float64 multiple_d = (diff_x + 1) / static_cast<Float64>(width);
std::optional<Float64> min_y;
std::optional<Float64> max_y;
std::optional<Float64> new_y;
std::vector<std::optional<Float64>> newPoints;
newPoints.reserve(width);
std::pair<size_t, Float64> bound{0, 0.0};
size_t cur_bucket_num = 0;
// upper bound for bucket
auto upperBound = [&](size_t bucket_num)
{
bound.second = (bucket_num + 1) * multiple_d;
bound.first = std::floor(bound.second);
};
upperBound(cur_bucket_num);
for (size_t i = 0; i <= (diff_x + 1); ++i)
{
if (i == bound.first) // is bound
{
Float64 proportion = bound.second - bound.first;
auto it = data.points.find(local_min_x + i);
bool found = (it != data.points.end());
if (found)
new_y = new_y.value_or(0) + it->getMapped() * proportion;
if (new_y)
{
Float64 avg_y = new_y.value() / multiple_d;
newPoints.emplace_back(avg_y);
// If min_y has no value, or if the avg_y of the current bucket is less than min_y, update it.
if (!min_y || avg_y < min_y)
min_y = avg_y;
if (!max_y || avg_y > max_y)
max_y = avg_y;
}
else
{
newPoints.emplace_back();
}
// next bucket
new_y = found ? ((1 - proportion) * it->getMapped()) : std::optional<Float64>();
upperBound(++cur_bucket_num);
}
else
{
auto it = data.points.find(local_min_x + i);
if (it != data.points.end())
new_y = new_y.value_or(0) + it->getMapped();
}
}
if (!min_y || !max_y) // No value is set
return {};
Float64 diff_y = max_y.value() - min_y.value();
auto getBars = [&] (const std::optional<Float64> & point_y)
{
value += getBar(point_y ? static_cast<UInt8>(std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1) : 0);
};
auto getBarsForConstant = [&] (const std::optional<Float64> & point_y)
{
value += getBar(point_y ? 1 : 0);
};
if (diff_y)
std::for_each(newPoints.begin(), newPoints.end(), getBars);
else
std::for_each(newPoints.begin(), newPoints.end(), getBarsForConstant);
}
return value;
}
public:
AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(
arguments, params)
{
width = params.at(0).safeGet<UInt64>();
if (params.size() == 3)
{
min_x = params.at(1).safeGet<X>();
max_x = params.at(2).safeGet<X>();
}
else
{
min_x = std::numeric_limits<X>::min();
max_x = std::numeric_limits<X>::max();
}
}
String getName() const override
{
return "sparkbar";
}
DataTypePtr getReturnType() const override
{
return std::make_shared<DataTypeString>();
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override
{
X x = assert_cast<const ColumnVector<X> *>(columns[0])->getData()[row_num];
if (min_x <= x && x <= max_x)
{
Y y = assert_cast<const ColumnVector<Y> *>(columns[1])->getData()[row_num];
this->data(place).add(x, y);
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * /*arena*/) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
{
this->data(place).serialize(buf);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
{
this->data(place).deserialize(buf);
}
bool allocatesMemoryInArena() const override { return false; }
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
{
auto & to_column = assert_cast<ColumnString &>(to);
const auto & data = this->data(place);
const String & value = render(data);
to_column.insertData(value.data(), value.size());
}
};
}

View File

@ -49,7 +49,7 @@ struct StatFuncOneArg
using Type1 = T;
using Type2 = T;
using ResultType = std::conditional_t<std::is_same_v<T, Float32>, Float32, Float64>;
using Data = std::conditional_t<IsDecimalNumber<T>, VarMomentsDecimal<Decimal128, _level>, VarMoments<ResultType, _level>>;
using Data = std::conditional_t<is_decimal<T>, VarMomentsDecimal<Decimal128, _level>, VarMoments<ResultType, _level>>;
static constexpr StatisticsFunctionKind kind = _kind;
static constexpr UInt32 num_args = 1;
@ -75,8 +75,8 @@ class AggregateFunctionVarianceSimple final
public:
using T1 = typename StatFunc::Type1;
using T2 = typename StatFunc::Type2;
using ColVecT1 = std::conditional_t<IsDecimalNumber<T1>, ColumnDecimal<T1>, ColumnVector<T1>>;
using ColVecT2 = std::conditional_t<IsDecimalNumber<T2>, ColumnDecimal<T2>, ColumnVector<T2>>;
using ColVecT1 = ColumnVectorOrDecimal<T1>;
using ColVecT2 = ColumnVectorOrDecimal<T2>;
using ResultType = typename StatFunc::ResultType;
using ColVecResult = ColumnVector<ResultType>;
@ -132,7 +132,7 @@ public:
static_cast<ResultType>(static_cast<const ColVecT2 &>(*columns[1]).getData()[row_num]));
else
{
if constexpr (IsDecimalNumber<T1>)
if constexpr (is_decimal<T1>)
{
this->data(place).add(static_cast<ResultType>(
static_cast<const ColVecT1 &>(*columns[0]).getData()[row_num].value));
@ -163,7 +163,7 @@ public:
const auto & data = this->data(place);
auto & dst = static_cast<ColVecResult &>(to).getData();
if constexpr (IsDecimalNumber<T1>)
if constexpr (is_decimal<T1>)
{
if constexpr (StatFunc::kind == StatisticsFunctionKind::varPop)
dst.push_back(data.getPopulation(src_scale * 2));

View File

@ -20,10 +20,9 @@ template <typename T>
struct SumSimple
{
/// @note It uses slow Decimal128 (cause we need such a variant). sumWithOverflow is faster for Decimal32/64
using ResultType = std::conditional_t<IsDecimalNumber<T>,
using ResultType = std::conditional_t<is_decimal<T>,
std::conditional_t<std::is_same_v<T, Decimal256>, Decimal256, Decimal128>,
NearestFieldType<T>>;
// using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
using AggregateDataType = AggregateFunctionSumData<ResultType>;
using Function = AggregateFunctionSum<T, ResultType, AggregateDataType, AggregateFunctionTypeSum>;
};
@ -47,7 +46,7 @@ struct SumKahan
template <typename T> using AggregateFunctionSumSimple = typename SumSimple<T>::Function;
template <typename T> using AggregateFunctionSumWithOverflow = typename SumSameType<T>::Function;
template <typename T> using AggregateFunctionSumKahan =
std::conditional_t<IsDecimalNumber<T>, typename SumSimple<T>::Function, typename SumKahan<T>::Function>;
std::conditional_t<is_decimal<T>, typename SumSimple<T>::Function, typename SumKahan<T>::Function>;
template <template <typename> class Function>

View File

@ -1,5 +1,6 @@
#pragma once
#include <cstring>
#include <memory>
#include <experimental/type_traits>
#include <type_traits>
@ -104,8 +105,8 @@ struct AggregateFunctionSumData
const auto * end = ptr + count;
if constexpr (
(is_integer_v<T> && !is_big_int_v<T>)
|| (IsDecimalNumber<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
(is_integer<T> && !is_big_int_v<T>)
|| (is_decimal<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
{
/// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
/// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
@ -123,6 +124,10 @@ struct AggregateFunctionSumData
if constexpr (std::is_floating_point_v<T>)
{
/// For floating point we use a similar trick as above, except that now we reinterpret the floating point number as an unsigned
/// integer of the same size and use a mask instead (0 to discard, 0xFF..FF to keep)
static_assert(sizeof(Value) == 4 || sizeof(Value) == 8);
typedef typename std::conditional<sizeof(Value) == 4, UInt32, UInt64>::type equivalent_integer;
constexpr size_t unroll_count = 128 / sizeof(T);
T partial_sums[unroll_count]{};
@ -132,10 +137,12 @@ struct AggregateFunctionSumData
{
for (size_t i = 0; i < unroll_count; ++i)
{
if (!condition_map[i] == add_if_zero)
{
Impl::add(partial_sums[i], ptr[i]);
}
equivalent_integer value;
std::memcpy(&value, &ptr[i], sizeof(Value));
value &= (!condition_map[i] != add_if_zero) - 1;
Value d;
std::memcpy(&d, &value, sizeof(Value));
Impl::add(partial_sums[i], d);
}
ptr += unroll_count;
condition_map += unroll_count;
@ -334,9 +341,7 @@ class AggregateFunctionSum final : public IAggregateFunctionDataHelper<Data, Agg
public:
static constexpr bool DateTime64Supported = false;
using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<TResult>, DataTypeNumber<TResult>>;
using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<TResult>, ColumnVector<TResult>>;
using ColVecType = ColumnVectorOrDecimal<T>;
String getName() const override
{
@ -361,10 +366,13 @@ public:
DataTypePtr getReturnType() const override
{
if constexpr (IsDecimalNumber<T>)
return std::make_shared<ResultDataType>(ResultDataType::maxPrecision(), scale);
if constexpr (!is_decimal<T>)
return std::make_shared<DataTypeNumber<TResult>>();
else
return std::make_shared<ResultDataType>();
{
using DataType = DataTypeDecimal<TResult>;
return std::make_shared<DataType>(DataType::maxPrecision(), scale);
}
}
bool allocatesMemoryInArena() const override { return false; }
@ -431,8 +439,7 @@ public:
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & column = assert_cast<ColVecResult &>(to);
column.getData().push_back(this->data(place).get());
castColumnToResult(to).getData().push_back(this->data(place).get());
}
#if USE_EMBEDDED_COMPILER
@ -511,6 +518,14 @@ public:
private:
UInt32 scale;
static constexpr auto & castColumnToResult(IColumn & to)
{
if constexpr (is_decimal<T>)
return assert_cast<ColumnDecimal<TResult> &>(to);
else
return assert_cast<ColumnVector<TResult> &>(to);
}
};
}

View File

@ -8,8 +8,6 @@
namespace DB
{
template <typename T>
using DecimalOrNumberDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<AvgFieldType<T>>, DataTypeNumber<AvgFieldType<T>>>;
template <typename T>
class AggregateFunctionSumCount final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>
{
public:
@ -20,20 +18,13 @@ public:
DataTypePtr getReturnType() const override
{
DataTypes types;
if constexpr (IsDecimalNumber<T>)
types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>(DecimalOrNumberDataType<T>::maxPrecision(), scale));
else
types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>());
types.emplace_back(std::make_shared<DataTypeUInt64>());
return std::make_shared<DataTypeTuple>(types);
auto second_elem = std::make_shared<DataTypeUInt64>();
return std::make_shared<DataTypeTuple>(DataTypes{getReturnTypeFirstElement(), std::move(second_elem)});
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final
{
assert_cast<DecimalOrVectorCol<AvgFieldType<T>> &>((assert_cast<ColumnTuple &>(to)).getColumn(0)).getData().push_back(
assert_cast<ColumnVectorOrDecimal<AvgFieldType<T>> &>((assert_cast<ColumnTuple &>(to)).getColumn(0)).getData().push_back(
this->data(place).numerator);
assert_cast<ColumnUInt64 &>((assert_cast<ColumnTuple &>(to)).getColumn(1)).getData().push_back(
@ -42,7 +33,7 @@ public:
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
{
this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
this->data(place).numerator += static_cast<const ColumnVectorOrDecimal<T> &>(*columns[0]).getData()[row_num];
++this->data(place).denominator;
}
@ -59,6 +50,19 @@ public:
private:
UInt32 scale;
auto getReturnTypeFirstElement() const
{
using FieldType = AvgFieldType<T>;
if constexpr (!is_decimal<T>)
return std::make_shared<DataTypeNumber<FieldType>>();
else
{
using DataType = DataTypeDecimal<FieldType>;
return std::make_shared<DataType>(DataType::maxPrecision(), scale);
}
}
};
}

View File

@ -190,7 +190,7 @@ public:
continue;
decltype(merged_maps.begin()) it;
if constexpr (IsDecimalNumber<T>)
if constexpr (is_decimal<T>)
{
// FIXME why is storing NearestFieldType not enough, and we
// have to check for decimals again here?
@ -217,7 +217,7 @@ public:
new_values.resize(size);
new_values[col] = value;
if constexpr (IsDecimalNumber<T>)
if constexpr (is_decimal<T>)
{
UInt32 scale = static_cast<const ColumnDecimal<T> &>(key_column).getData().getScale();
merged_maps.emplace(DecimalField<T>(key, scale), std::move(new_values));
@ -280,7 +280,7 @@ public:
for (size_t col = 0; col < values_types.size(); ++col)
values_serializations[col]->deserializeBinary(values[col], buf);
if constexpr (IsDecimalNumber<T>)
if constexpr (is_decimal<T>)
merged_maps[key.get<DecimalField<T>>()] = values;
else
merged_maps[key.get<T>()] = values;
@ -396,7 +396,7 @@ private:
using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
/// ARCADIA_BUILD disallow unordered_set for big ints for some reason
static constexpr const bool allow_hash = !OverBigInt<T>;
static constexpr const bool allow_hash = !is_over_big_int<T>;
using ContainerT = std::conditional_t<allow_hash, std::unordered_set<T>, std::set<T>>;
ContainerT keys_to_keep;

View File

@ -35,6 +35,10 @@ public:
virtual bool isForInternalUsageOnly() const { return false; }
/** Does combinator supports nesting (of itself, i.e. ArrayArray or IfIf)
*/
virtual bool supportsNesting() const { return false; }
/** From the arguments for combined function (ex: UInt64, UInt8 for sumIf),
* get the arguments for nested function (ex: UInt64 for sum).
* If arguments are not suitable for combined function, throw an exception.

View File

@ -30,7 +30,7 @@ struct QuantileExactWeighted
};
using Weight = UInt64;
using UnderlyingType = typename NativeType<Value>::Type;
using UnderlyingType = NativeType<Value>;
using Hasher = std::conditional_t<std::is_same_v<Value, Decimal128>, Int128Hash, HashCRC32<UnderlyingType>>;
/// When creating, the hash table must be small.

View File

@ -121,7 +121,7 @@ public:
{
if (samples.empty())
{
if (DB::IsDecimalNumber<T>)
if (DB::is_decimal<T>)
return 0;
return onEmpty<double>();
}
@ -134,7 +134,7 @@ public:
size_t right_index = left_index + 1;
if (right_index == samples.size())
{
if constexpr (DB::IsDecimalNumber<T>)
if constexpr (DB::is_decimal<T>)
return static_cast<double>(samples[left_index].value);
else
return static_cast<double>(samples[left_index]);
@ -143,7 +143,7 @@ public:
double left_coef = right_index - index;
double right_coef = index - left_index;
if constexpr (DB::IsDecimalNumber<T>)
if constexpr (DB::is_decimal<T>)
return static_cast<double>(samples[left_index].value) * left_coef + static_cast<double>(samples[right_index].value) * right_coef;
else
return static_cast<double>(samples[left_index]) * left_coef + static_cast<double>(samples[right_index]) * right_coef;

View File

@ -50,6 +50,7 @@ void registerAggregateFunctionWelchTTest(AggregateFunctionFactory &);
void registerAggregateFunctionStudentTTest(AggregateFunctionFactory &);
void registerAggregateFunctionSingleValueOrNull(AggregateFunctionFactory &);
void registerAggregateFunctionSequenceNextNode(AggregateFunctionFactory &);
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -119,6 +120,7 @@ void registerAggregateFunctions()
registerWindowFunctions(factory);
registerAggregateFunctionIntervalLengthSum(factory);
registerAggregateFunctionSparkbar(factory);
}
{

View File

@ -83,9 +83,8 @@ namespace
/// Replaces elements of types TEMPORARY_TABLE or ALL_TEMPORARY_TABLES with elements of type TABLE or DATABASE.
void replaceTemporaryTablesWithTemporaryDatabase(Elements & elements)
{
for (size_t i = 0; i != elements.size(); ++i)
for (auto & element : elements)
{
auto & element = elements[i];
switch (element.type)
{
case ElementType::TEMPORARY_TABLE:

View File

@ -27,8 +27,8 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
: pool(pool_), settings(settings_), timeouts(timeouts_), table_to_check(table_to_check_), log(&Poco::Logger::get("HedgedConnectionsFactory"))
{
shuffled_pools = pool->getShuffledPools(settings);
for (size_t i = 0; i != shuffled_pools.size(); ++i)
replicas.emplace_back(ConnectionEstablisherAsync(shuffled_pools[i].pool, &timeouts, settings, log, table_to_check.get()));
for (auto shuffled_pool : shuffled_pools)
replicas.emplace_back(ConnectionEstablisherAsync(shuffled_pool.pool, &timeouts, settings, log, table_to_check.get()));
max_tries
= (settings ? size_t{settings->connections_with_failover_max_tries} : size_t{DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES});

View File

@ -506,8 +506,9 @@ static void pushBackAndCreateState(ColumnAggregateFunction::Container & data, Ar
void ColumnAggregateFunction::insert(const Field & x)
{
if (x.getType() != Field::Types::AggregateFunctionState)
throw Exception(String("Inserting field of type ") + x.getTypeName() + " into ColumnAggregateFunction. "
"Expected " + Field::Types::toString(Field::Types::AggregateFunctionState), ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Inserting field of type {} into ColumnAggregateFunction. Expected {}",
x.getTypeName(), Field::Types::AggregateFunctionState);
const auto & field_name = x.get<const AggregateFunctionStateData &>().name;
if (type_string != field_name)

View File

@ -57,9 +57,13 @@ public:
*/
static ColumnPtr wrap(ColumnPtr column)
{
/// The order of evaluation of function arguments is unspecified
/// and could cause interacting with object in moved-from state
const auto size = column->size();
const auto bytes = column->allocatedBytes();
return ColumnCompressed::create(
column->size(),
column->allocatedBytes(),
size,
bytes,
[column = std::move(column)]{ return column; });
}
@ -124,4 +128,3 @@ private:
};
}

View File

@ -38,7 +38,7 @@ template class DecimalPaddedPODArray<Decimal128>;
template class DecimalPaddedPODArray<Decimal256>;
template class DecimalPaddedPODArray<DateTime64>;
template <typename T>
template <is_decimal T>
int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
{
auto & other = static_cast<const Self &>(rhs_);
@ -50,7 +50,7 @@ int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) c
return decimalLess<T>(b, a, other.scale, scale) ? 1 : (decimalLess<T>(a, b, scale, other.scale) ? -1 : 0);
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
@ -59,13 +59,13 @@ void ColumnDecimal<T>::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint);
}
template <typename T>
template <is_decimal T>
bool ColumnDecimal<T>::hasEqualValues() const
{
return this->template hasEqualValuesImpl<ColumnDecimal<T>>();
}
template <typename T>
template <is_decimal T>
StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
auto * pos = arena.allocContinue(sizeof(T), begin);
@ -73,20 +73,20 @@ StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, cha
return StringRef(pos, sizeof(T));
}
template <typename T>
template <is_decimal T>
const char * ColumnDecimal<T>::deserializeAndInsertFromArena(const char * pos)
{
data.push_back(unalignedLoad<T>(pos));
return pos + sizeof(T);
}
template <typename T>
template <is_decimal T>
const char * ColumnDecimal<T>::skipSerializedInArena(const char * pos) const
{
return pos + sizeof(T);
}
template <typename T>
template <is_decimal T>
UInt64 ColumnDecimal<T>::get64([[maybe_unused]] size_t n) const
{
if constexpr (sizeof(T) > sizeof(UInt64))
@ -95,13 +95,13 @@ UInt64 ColumnDecimal<T>::get64([[maybe_unused]] size_t n) const
return static_cast<NativeT>(data[n]);
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::updateHashWithValue(size_t n, SipHash & hash) const
{
hash.update(data[n].value);
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
{
auto s = data.size();
@ -122,13 +122,13 @@ void ColumnDecimal<T>::updateWeakHash32(WeakHash32 & hash) const
}
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::updateHashFast(SipHash & hash) const
{
hash.update(reinterpret_cast<const char *>(data.data()), size() * sizeof(data[0]));
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn::Permutation & res) const
{
#if 1 /// TODO: perf test
@ -147,7 +147,7 @@ void ColumnDecimal<T>::getPermutation(bool reverse, size_t limit, int , IColumn:
permutation(reverse, limit, res);
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_ranges) const
{
if (equal_ranges.empty())
@ -228,7 +228,7 @@ void ColumnDecimal<T>::updatePermutation(bool reverse, size_t limit, int, IColum
}
}
template <typename T>
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, size_t limit) const
{
size_t size = limit ? std::min(data.size(), limit) : data.size();
@ -244,7 +244,7 @@ ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, size_t li
return res;
}
template <typename T>
template <is_decimal T>
MutableColumnPtr ColumnDecimal<T>::cloneResized(size_t size) const
{
auto res = this->create(0, scale);
@ -268,7 +268,7 @@ MutableColumnPtr ColumnDecimal<T>::cloneResized(size_t size) const
return res;
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
{
T tmp;
@ -276,7 +276,7 @@ void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
data.emplace_back(tmp);
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const ColumnDecimal & src_vec = assert_cast<const ColumnDecimal &>(src);
@ -292,7 +292,7 @@ void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
}
template <typename T>
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
{
size_t size = data.size();
@ -321,19 +321,19 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
return res;
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::expand(const IColumn::Filter & mask, bool inverted)
{
expandDataByMask<T>(data, mask, inverted);
}
template <typename T>
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::index(const IColumn & indexes, size_t limit) const
{
return selectIndexImpl(*this, indexes, limit);
}
template <typename T>
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::replicate(const IColumn::Offsets & offsets) const
{
size_t size = data.size();
@ -360,13 +360,13 @@ ColumnPtr ColumnDecimal<T>::replicate(const IColumn::Offsets & offsets) const
return res;
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
template <typename T>
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::compress() const
{
size_t source_size = data.size() * sizeof(T);
@ -390,7 +390,7 @@ ColumnPtr ColumnDecimal<T>::compress() const
});
}
template <typename T>
template <is_decimal T>
void ColumnDecimal<T>::getExtremes(Field & min, Field & max) const
{
if (data.empty())

View File

@ -7,6 +7,8 @@
#include <Core/DecimalFunctions.h>
#include <Common/typeid_cast.h>
#include <common/sort.h>
#include <Core/TypeId.h>
#include <Core/TypeName.h>
#include <cmath>
@ -59,11 +61,9 @@ extern template class DecimalPaddedPODArray<Decimal256>;
extern template class DecimalPaddedPODArray<DateTime64>;
/// A ColumnVector for Decimals
template <typename T>
template <is_decimal T>
class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>>
{
static_assert(IsDecimalNumber<T>);
private:
using Self = ColumnDecimal;
friend class COWHelper<ColumnVectorHelper, Self>;
@ -210,7 +210,12 @@ protected:
}
};
template <typename T>
template <class> class ColumnVector;
template <class T> struct ColumnVectorOrDecimalT { using Col = ColumnVector<T>; };
template <is_decimal T> struct ColumnVectorOrDecimalT<T> { using Col = ColumnDecimal<T>; };
template <class T> using ColumnVectorOrDecimal = typename ColumnVectorOrDecimalT<T>::Col;
template <is_decimal T>
template <typename Type>
ColumnPtr ColumnDecimal<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
{

View File

@ -276,7 +276,10 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
ColumnPtr ColumnMap::compress() const
{
auto compressed = nested->compress();
return ColumnCompressed::create(size(), compressed->byteSize(), [compressed = std::move(compressed)]
const auto byte_size = compressed->byteSize();
/// The order of evaluation of function arguments is unspecified
/// and could cause interacting with object in moved-from state
return ColumnCompressed::create(size(), byte_size, [compressed = std::move(compressed)]
{
return ColumnMap::create(compressed->decompress());
});

Some files were not shown because too many files have changed in this diff Show More