Merge branch 'master' into kssenii-rabbitmq-improvements

This commit is contained in:
alesapin 2020-09-07 11:36:08 +03:00
commit 4ce975c512
255 changed files with 6679 additions and 1174 deletions

View File

@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse at ByteDance (in Chinese)](https://mp.weixin.qq.com/s/Em-HjPylO8D7WPui4RREAQ) on August 28, 2020.
* [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020.

View File

@ -38,18 +38,18 @@ namespace common
}
template <>
inline bool addOverflow(bInt256 x, bInt256 y, bInt256 & res)
inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res)
{
res = x + y;
return (y > 0 && x > std::numeric_limits<bInt256>::max() - y) ||
(y < 0 && x < std::numeric_limits<bInt256>::min() - y);
return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) ||
(y < 0 && x < std::numeric_limits<wInt256>::min() - y);
}
template <>
inline bool addOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
{
res = x + y;
return x > std::numeric_limits<bUInt256>::max() - y;
return x > std::numeric_limits<wUInt256>::max() - y;
}
template <typename T>
@ -86,15 +86,15 @@ namespace common
}
template <>
inline bool subOverflow(bInt256 x, bInt256 y, bInt256 & res)
inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res)
{
res = x - y;
return (y < 0 && x > std::numeric_limits<bInt256>::max() + y) ||
(y > 0 && x < std::numeric_limits<bInt256>::min() + y);
return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) ||
(y > 0 && x < std::numeric_limits<wInt256>::min() + y);
}
template <>
inline bool subOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
{
res = x - y;
return x < y;
@ -137,19 +137,19 @@ namespace common
}
template <>
inline bool mulOverflow(bInt256 x, bInt256 y, bInt256 & res)
inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res)
{
res = x * y;
if (!x || !y)
return false;
bInt256 a = (x > 0) ? x : -x;
bInt256 b = (y > 0) ? y : -y;
wInt256 a = (x > 0) ? x : -x;
wInt256 b = (y > 0) ? y : -y;
return (a * b) / b != a;
}
template <>
inline bool mulOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
{
res = x * y;
if (!x || !y)

View File

@ -6,7 +6,7 @@
#include <string>
#include <type_traits>
#include <boost/multiprecision/cpp_int.hpp>
#include <common/wide_integer.h>
using Int8 = int8_t;
using Int16 = int16_t;
@ -25,12 +25,11 @@ using UInt64 = uint64_t;
using Int128 = __int128;
/// We have to use 127 and 255 bit integers to safe a bit for a sign serialization
//using bInt256 = boost::multiprecision::int256_t;
using bInt256 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<
255, 255, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void> >;
using bUInt256 = boost::multiprecision::uint256_t;
using wInt256 = std::wide_integer<256, signed>;
using wUInt256 = std::wide_integer<256, unsigned>;
static_assert(sizeof(wInt256) == 32);
static_assert(sizeof(wUInt256) == 32);
using String = std::string;
@ -44,7 +43,7 @@ struct is_signed
};
template <> struct is_signed<Int128> { static constexpr bool value = true; };
template <> struct is_signed<bInt256> { static constexpr bool value = true; };
template <> struct is_signed<wInt256> { static constexpr bool value = true; };
template <typename T>
inline constexpr bool is_signed_v = is_signed<T>::value;
@ -55,7 +54,7 @@ struct is_unsigned
static constexpr bool value = std::is_unsigned_v<T>;
};
template <> struct is_unsigned<bUInt256> { static constexpr bool value = true; };
template <> struct is_unsigned<wUInt256> { static constexpr bool value = true; };
template <typename T>
inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
@ -69,8 +68,8 @@ struct is_integer
};
template <> struct is_integer<Int128> { static constexpr bool value = true; };
template <> struct is_integer<bInt256> { static constexpr bool value = true; };
template <> struct is_integer<bUInt256> { static constexpr bool value = true; };
template <> struct is_integer<wInt256> { static constexpr bool value = true; };
template <> struct is_integer<wUInt256> { static constexpr bool value = true; };
template <typename T>
inline constexpr bool is_integer_v = is_integer<T>::value;
@ -93,9 +92,9 @@ struct make_unsigned
typedef std::make_unsigned_t<T> type;
};
template <> struct make_unsigned<__int128> { using type = unsigned __int128; };
template <> struct make_unsigned<bInt256> { using type = bUInt256; };
template <> struct make_unsigned<bUInt256> { using type = bUInt256; };
template <> struct make_unsigned<Int128> { using type = unsigned __int128; };
template <> struct make_unsigned<wInt256> { using type = wUInt256; };
template <> struct make_unsigned<wUInt256> { using type = wUInt256; };
template <typename T> using make_unsigned_t = typename make_unsigned<T>::type;
@ -105,8 +104,8 @@ struct make_signed
typedef std::make_signed_t<T> type;
};
template <> struct make_signed<bInt256> { typedef bInt256 type; };
template <> struct make_signed<bUInt256> { typedef bInt256 type; };
template <> struct make_signed<wInt256> { using type = wInt256; };
template <> struct make_signed<wUInt256> { using type = wInt256; };
template <typename T> using make_signed_t = typename make_signed<T>::type;
@ -116,8 +115,8 @@ struct is_big_int
static constexpr bool value = false;
};
template <> struct is_big_int<bUInt256> { static constexpr bool value = true; };
template <> struct is_big_int<bInt256> { static constexpr bool value = true; };
template <> struct is_big_int<wInt256> { static constexpr bool value = true; };
template <> struct is_big_int<wUInt256> { static constexpr bool value = true; };
template <typename T>
inline constexpr bool is_big_int_v = is_big_int<T>::value;
@ -125,14 +124,11 @@ inline constexpr bool is_big_int_v = is_big_int<T>::value;
template <typename T>
inline std::string bigintToString(const T & x)
{
return x.str();
return to_string(x);
}
template <typename To, typename From>
inline To bigint_cast(const From & x [[maybe_unused]])
{
if constexpr ((is_big_int_v<From> && std::is_same_v<To, UInt8>) || (is_big_int_v<To> && std::is_same_v<From, UInt8>))
return static_cast<uint8_t>(x);
else
return static_cast<To>(x);
return static_cast<To>(x);
}

249
base/common/wide_integer.h Normal file
View File

@ -0,0 +1,249 @@
#pragma once
///////////////////////////////////////////////////////////////
// Distributed under the Boost Software License, Version 1.0.
// (See at http://www.boost.org/LICENSE_1_0.txt)
///////////////////////////////////////////////////////////////
/* Divide and multiply
*
*
* Copyright (c) 2008
* Evan Teran
*
* Permission to use, copy, modify, and distribute this software and its
* documentation for any purpose and without fee is hereby granted, provided
* that the above copyright notice appears in all copies and that both the
* copyright notice and this permission notice appear in supporting
* documentation, and that the same name not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. We make no representations about the
* suitability this software for any purpose. It is provided "as is"
* without express or implied warranty.
*/
#include <climits> // CHAR_BIT
#include <cmath>
#include <cstdint>
#include <limits>
#include <type_traits>
namespace std
{
template <size_t Bits, typename Signed>
class wide_integer;
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
struct common_type<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>;
template <size_t Bits, typename Signed, typename Arithmetic>
struct common_type<wide_integer<Bits, Signed>, Arithmetic>;
template <typename Arithmetic, size_t Bits, typename Signed>
struct common_type<Arithmetic, wide_integer<Bits, Signed>>;
template <size_t Bits, typename Signed>
class wide_integer
{
public:
using base_type = uint8_t;
using signed_base_type = int8_t;
// ctors
wide_integer() = default;
template <typename T>
constexpr wide_integer(T rhs) noexcept;
template <typename T>
constexpr wide_integer(std::initializer_list<T> il) noexcept;
// assignment
template <size_t Bits2, typename Signed2>
constexpr wide_integer<Bits, Signed> & operator=(const wide_integer<Bits2, Signed2> & rhs) noexcept;
template <typename Arithmetic>
constexpr wide_integer<Bits, Signed> & operator=(Arithmetic rhs) noexcept;
template <typename Arithmetic>
constexpr wide_integer<Bits, Signed> & operator*=(const Arithmetic & rhs);
template <typename Arithmetic>
constexpr wide_integer<Bits, Signed> & operator/=(const Arithmetic & rhs);
template <typename Arithmetic>
constexpr wide_integer<Bits, Signed> & operator+=(const Arithmetic & rhs) noexcept(is_same<Signed, unsigned>::value);
template <typename Arithmetic>
constexpr wide_integer<Bits, Signed> & operator-=(const Arithmetic & rhs) noexcept(is_same<Signed, unsigned>::value);
template <typename Integral>
constexpr wide_integer<Bits, Signed> & operator%=(const Integral & rhs);
template <typename Integral>
constexpr wide_integer<Bits, Signed> & operator&=(const Integral & rhs) noexcept;
template <typename Integral>
constexpr wide_integer<Bits, Signed> & operator|=(const Integral & rhs) noexcept;
template <typename Integral>
constexpr wide_integer<Bits, Signed> & operator^=(const Integral & rhs) noexcept;
constexpr wide_integer<Bits, Signed> & operator<<=(int n);
constexpr wide_integer<Bits, Signed> & operator>>=(int n) noexcept;
constexpr wide_integer<Bits, Signed> & operator++() noexcept(is_same<Signed, unsigned>::value);
constexpr wide_integer<Bits, Signed> operator++(int) noexcept(is_same<Signed, unsigned>::value);
constexpr wide_integer<Bits, Signed> & operator--() noexcept(is_same<Signed, unsigned>::value);
constexpr wide_integer<Bits, Signed> operator--(int) noexcept(is_same<Signed, unsigned>::value);
// observers
constexpr explicit operator bool() const noexcept;
template <class T>
using __integral_not_wide_integer_class = typename std::enable_if<std::is_arithmetic<T>::value, T>::type;
template <class T, class = __integral_not_wide_integer_class<T>>
constexpr operator T() const noexcept;
constexpr operator long double() const noexcept;
constexpr operator double() const noexcept;
constexpr operator float() const noexcept;
struct _impl;
private:
template <size_t Bits2, typename Signed2>
friend class wide_integer;
friend class numeric_limits<wide_integer<Bits, signed>>;
friend class numeric_limits<wide_integer<Bits, unsigned>>;
base_type m_arr[_impl::arr_size];
};
template <typename T>
static constexpr bool ArithmeticConcept() noexcept;
template <class T1, class T2>
using __only_arithmetic = typename std::enable_if<ArithmeticConcept<T1>() && ArithmeticConcept<T2>()>::type;
template <typename T>
static constexpr bool IntegralConcept() noexcept;
template <class T, class T2>
using __only_integer = typename std::enable_if<IntegralConcept<T>() && IntegralConcept<T2>()>::type;
// Unary operators
template <size_t Bits, typename Signed>
constexpr wide_integer<Bits, Signed> operator~(const wide_integer<Bits, Signed> & lhs) noexcept;
template <size_t Bits, typename Signed>
constexpr wide_integer<Bits, Signed> operator-(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value);
template <size_t Bits, typename Signed>
constexpr wide_integer<Bits, Signed> operator+(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value);
// Binary operators
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator*(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
std::common_type_t<Arithmetic, Arithmetic2> constexpr operator*(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator/(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
std::common_type_t<Arithmetic, Arithmetic2> constexpr operator/(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator+(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
std::common_type_t<Arithmetic, Arithmetic2> constexpr operator+(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator-(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
std::common_type_t<Arithmetic, Arithmetic2> constexpr operator-(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator%(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
std::common_type_t<Integral, Integral2> constexpr operator%(const Integral & rhs, const Integral2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator&(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
std::common_type_t<Integral, Integral2> constexpr operator&(const Integral & rhs, const Integral2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator|(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
std::common_type_t<Integral, Integral2> constexpr operator|(const Integral & rhs, const Integral2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
operator^(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
std::common_type_t<Integral, Integral2> constexpr operator^(const Integral & rhs, const Integral2 & lhs);
// TODO: Integral
template <size_t Bits, typename Signed>
constexpr wide_integer<Bits, Signed> operator<<(const wide_integer<Bits, Signed> & lhs, int n) noexcept;
template <size_t Bits, typename Signed>
constexpr wide_integer<Bits, Signed> operator>>(const wide_integer<Bits, Signed> & lhs, int n) noexcept;
template <size_t Bits, typename Signed, typename Int, typename = std::enable_if_t<!std::is_same_v<Int, int>>>
constexpr wide_integer<Bits, Signed> operator<<(const wide_integer<Bits, Signed> & lhs, Int n) noexcept
{
return lhs << int(n);
}
template <size_t Bits, typename Signed, typename Int, typename = std::enable_if_t<!std::is_same_v<Int, int>>>
constexpr wide_integer<Bits, Signed> operator>>(const wide_integer<Bits, Signed> & lhs, Int n) noexcept
{
return lhs >> int(n);
}
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
constexpr bool operator<(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
constexpr bool operator<(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
constexpr bool operator>(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
constexpr bool operator>(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
constexpr bool operator<=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
constexpr bool operator<=(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
constexpr bool operator>=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
constexpr bool operator>=(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
constexpr bool operator==(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
constexpr bool operator==(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
constexpr bool operator!=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
constexpr bool operator!=(const Arithmetic & rhs, const Arithmetic2 & lhs);
template <size_t Bits, typename Signed>
std::string to_string(const wide_integer<Bits, Signed> & n);
template <size_t Bits, typename Signed>
struct hash<wide_integer<Bits, Signed>>;
}
#include "wide_integer_impl.h"

File diff suppressed because it is too large Load Diff

View File

@ -74,10 +74,9 @@ target_link_libraries(capnpc PUBLIC capnp)
# The library has substandard code
if (COMPILER_GCC)
set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-maybe-uninitialized
-Wno-deprecated-declarations -Wno-class-memaccess)
set (SUPPRESS_WARNINGS -w)
elseif (COMPILER_CLANG)
set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-deprecated-declarations)
set (SUPPRESS_WARNINGS -w)
set (CAPNP_PRIVATE_CXX_FLAGS -fno-char8_t)
endif ()

View File

@ -67,13 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
fi
SUPPORTED_COMMANDS="{start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
is_supported_command()
{
echo "$SUPPORTED_COMMANDS" | grep -E "(\{|\|)$1(\||})" &> /dev/null
}
is_running()
{
pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
@ -283,13 +276,12 @@ use_cron()
fi
return 0
}
# returns false if cron disabled (with systemd)
enable_cron()
{
use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
}
# returns false if cron disabled (with systemd)
disable_cron()
{
use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
@ -312,15 +304,14 @@ main()
EXIT_STATUS=0
case "$1" in
start)
start && enable_cron
service_or_func start && enable_cron
;;
stop)
# disable_cron returns false if cron disabled (with systemd) - not checking return status
disable_cron
stop
service_or_func stop
;;
restart)
restart && enable_cron
service_or_func restart && enable_cron
;;
forcestop)
disable_cron
@ -330,7 +321,7 @@ main()
forcerestart && enable_cron
;;
reload)
restart
service_or_func restart
;;
condstart)
is_running || service_or_func start
@ -354,7 +345,7 @@ main()
disable_cron
;;
*)
echo "Usage: $0 $SUPPORTED_COMMANDS"
echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
exit 2
;;
esac

View File

@ -7,3 +7,4 @@ services:
MYSQL_ROOT_PASSWORD: clickhouse
ports:
- 3308:3306
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency

View File

@ -1,10 +0,0 @@
version: '2.3'
services:
mysql5_7:
image: mysql:5.7
restart: always
environment:
MYSQL_ROOT_PASSWORD: clickhouse
ports:
- 33307:3306
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency

View File

@ -565,40 +565,54 @@ create table unstable_queries_report engine File(TSV, 'report/unstable-queries.t
toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
create table test_time_changes engine File(TSV, 'report/test-time-changes.tsv') as
select test, queries, average_time_change from (
select test, count(*) queries,
sum(left) as left, sum(right) as right,
(right - left) / right average_time_change
from queries
group by test
order by abs(average_time_change) desc
)
;
create table unstable_tests engine File(TSV, 'report/unstable-tests.tsv') as
select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
create view test_speedup as
select
test,
exp2(avg(log2(left / right))) times_speedup,
count(*) queries,
unstable + changed bad,
sum(changed_show) changed,
sum(unstable_show) unstable
from queries
group by test
order by total_unstable + total_changed desc
order by times_speedup desc
;
create view total_speedup as
select
'Total' test,
exp2(avg(log2(times_speedup))) times_speedup,
sum(queries) queries,
unstable + changed bad,
sum(changed) changed,
sum(unstable) unstable
from test_speedup
;
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') as
select test,
queries,
coalesce(total_unstable, 0) total_unstable,
coalesce(total_changed, 0) total_changed,
total_unstable + total_changed total_bad,
coalesce(toString(toDecimal64(average_time_change, 3)), '??') average_time_change_str
from test_time_changes
full join unstable_tests
using test
where (abs(average_time_change) > 0.05 and queries > 5)
or (total_bad > 0)
order by total_bad desc, average_time_change desc
settings join_use_nulls = 1
with
(times_speedup >= 1
? '-' || toString(toDecimal64(times_speedup, 3)) || 'x'
: '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x')
as times_speedup_str
select test, times_speedup_str, queries, bad, changed, unstable
-- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all
-- the braces.
from (
(
select * from total_speedup
) union all (
select * from test_speedup
where
(times_speedup >= 1 ? times_speedup : (1 / times_speedup)) >= 1.005
or bad
)
)
order by test = 'Total' desc, times_speedup desc
;
create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');

View File

@ -262,6 +262,13 @@ for query_index, q in enumerate(test_queries):
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
server_seconds += c.last_query.elapsed
if c.last_query.elapsed > 10:
# Stop processing pathologically slow queries, to avoid timing out
# the entire test task. This shouldn't really happen, so we don't
# need much handling for this case and can just exit.
print(f'The query no. {query_index} is taking too long to run ({c.last_query.elapsed} s)', file=sys.stderr)
exit(2)
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')

View File

@ -370,7 +370,7 @@ if args.report == 'main':
columns = [
'Old,&nbsp;s', # 0
'New,&nbsp;s', # 1
'Times speedup / slowdown', # 2
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', # 2
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 3
'p&nbsp;<&nbsp;0.001 threshold', # 4
# Failed # 5
@ -447,7 +447,7 @@ if args.report == 'main':
addSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
addSimpleTable('Test performance changes',
['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
['Test', 'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
tsvRows('report/test-perf-changes.tsv'))
def add_test_times():
@ -647,7 +647,7 @@ elif args.report == 'all-queries':
# Unstable #1
'Old,&nbsp;s', #2
'New,&nbsp;s', #3
'Times speedup / slowdown', #4
'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', #4
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5
'p&nbsp;&lt;&nbsp;0.001 threshold', #6
'Test', #7

View File

@ -29,17 +29,26 @@ if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then
ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/
fi
echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment
echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
function start()
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
timeout 120 service clickhouse-server start
sleep 0.5
counter=$(($counter + 1))
done
}
service zookeeper start
sleep 5
service clickhouse-server start
sleep 5
start
/s3downloader --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "SHOW DATABASES"

View File

@ -71,14 +71,26 @@ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config
ln -s --backup=simple --suffix=_original.xml \
/usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/
function start()
{
counter=0
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt 120 ]
then
echo "Cannot start clickhouse-server"
cat /var/log/clickhouse-server/stdout.log
tail -n1000 /var/log/clickhouse-server/stderr.log
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
break
fi
timeout 120 service clickhouse-server start
sleep 0.5
counter=$(($counter + 1))
done
}
service zookeeper start
sleep 5
start_clickhouse
sleep 5
start
if ! /s3downloader --dataset-names $DATASETS; then
echo "Cannot download datatsets"

View File

@ -1290,6 +1290,47 @@ Possible values:
Default value: 0.
## distributed\_group\_by\_no\_merge {#distributed-group-by-no-merge}
Do not merge aggregation states from different servers for distributed query processing, you can use this in case it is for certain that there are different keys on different shards
Possible values:
- 0 — Disabled (final query processing is done on the initiator node).
- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data).
- 2 - Same as 1 but apply `ORDER BY` and `LIMIT` on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
**Example**
```sql
SELECT *
FROM remote('127.0.0.{2,3}', system.one)
GROUP BY dummy
LIMIT 1
SETTINGS distributed_group_by_no_merge = 1
FORMAT PrettyCompactMonoBlock
┌─dummy─┐
│ 0 │
│ 0 │
└───────┘
```
```sql
SELECT *
FROM remote('127.0.0.{2,3}', system.one)
GROUP BY dummy
LIMIT 1
SETTINGS distributed_group_by_no_merge = 2
FORMAT PrettyCompactMonoBlock
┌─dummy─┐
│ 0 │
└───────┘
```
Default value: 0
## optimize\_skip\_unused\_shards {#optimize-skip-unused-shards}
Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing).
@ -1337,6 +1378,40 @@ Possible values:
Default value: 0
## optimize\_distributed\_group\_by\_sharding\_key {#optimize-distributed-group-by-sharding-key}
Optimize `GROUP BY sharding_key` queries, by avoiding costly aggregation on the initiator server (which will reduce memory usage for the query on the initiator server).
The following types of queries are supported (and all combinations of them):
- `SELECT DISTINCT [..., ]sharding_key[, ...] FROM dist`
- `SELECT ... FROM dist GROUP BY sharding_key[, ...]`
- `SELECT ... FROM dist GROUP BY sharding_key[, ...] ORDER BY x`
- `SELECT ... FROM dist GROUP BY sharding_key[, ...] LIMIT 1`
- `SELECT ... FROM dist GROUP BY sharding_key[, ...] LIMIT 1 BY x`
The following types of queries are not supported (support for some of them may be added later):
- `SELECT ... GROUP BY sharding_key[, ...] WITH TOTALS`
- `SELECT ... GROUP BY sharding_key[, ...] WITH ROLLUP`
- `SELECT ... GROUP BY sharding_key[, ...] WITH CUBE`
- `SELECT ... GROUP BY sharding_key[, ...] SETTINGS extremes=1`
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 0
See also:
- [distributed\_group\_by\_no\_merge](#distributed-group-by-no-merge)
- [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards)
!!! note "Note"
Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key).
## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}
Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) query didnt perform a merge.
@ -1894,9 +1969,9 @@ Locking timeout is used to protect from deadlocks while executing read/write ope
Possible values:
- Positive integer.
- Positive integer (in seconds).
- 0 — No locking timeout.
Default value: `120`.
Default value: `120` seconds.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -1756,4 +1756,17 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
- [Секции и настройки запроса CREATE TABLE](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) (настройка `merge_with_ttl_timeout`)
- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl)
## lock_acquire_timeout {#lock_acquire_timeout}
Устанавливает, сколько секунд сервер ожидает возможности выполнить блокировку таблицы.
Таймаут устанавливается для защиты от взаимоблокировки при выполнении операций чтения или записи. Если время ожидания истекло, а блокировку выполнить не удалось, сервер возвращает исключение с кодом `DEADLOCK_AVOIDED` и сообщением "Locking attempt timed out! Possible deadlock avoided. Client should retry." ("Время ожидания блокировки истекло! Возможная взаимоблокировка предотвращена. Повторите запрос.").
Возможные значения:
- Положительное целое число (в секундах).
- 0 — таймаут не устанавливается.
Значение по умолчанию: `120` секунд.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -180,12 +180,13 @@ def build(args):
if not args.skip_website:
website.build_website(args)
test.test_templates(args.website_dir)
if not args.skip_test_templates:
test.test_templates(args.website_dir)
build_docs(args)
from github import build_releases
build_releases(args, build_docs)
if not args.skip_docs:
build_docs(args)
from github import build_releases
build_releases(args, build_docs)
if not args.skip_blog:
blog.build_blog(args)
@ -220,6 +221,8 @@ if __name__ == '__main__':
arg_parser.add_argument('--skip-website', action='store_true')
arg_parser.add_argument('--skip-blog', action='store_true')
arg_parser.add_argument('--skip-git-log', action='store_true')
arg_parser.add_argument('--skip-docs', action='store_true')
arg_parser.add_argument('--skip-test-templates', action='store_true')
arg_parser.add_argument('--test-only', action='store_true')
arg_parser.add_argument('--minify', action='store_true')
arg_parser.add_argument('--htmlproofer', action='store_true')

View File

@ -104,6 +104,8 @@ public:
query_processing_stage = QueryProcessingStage::FetchColumns;
else if (stage == "with_mergeable_state")
query_processing_stage = QueryProcessingStage::WithMergeableState;
else if (stage == "with_mergeable_state_after_aggregation")
query_processing_stage = QueryProcessingStage::WithMergeableStateAfterAggregation;
else
throw Exception("Unknown query processing stage: " + stage, ErrorCodes::BAD_ARGUMENTS);
@ -564,8 +566,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
desc.add_options()
("help", "produce help message")
("concurrency,c", value<unsigned>()->default_value(1), "number of parallel queries")
("delay,d", value<double>()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)")
("stage", value<std::string>()->default_value("complete"), "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state")
("delay,d", value<double>()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)")
("stage", value<std::string>()->default_value("complete"), "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation")
("iterations,i", value<size_t>()->default_value(0), "amount of queries to be executed")
("timelimit,t", value<double>()->default_value(0.), "stop launch of queries after specified time limit")
("randomize,r", value<bool>()->default_value(false), "randomize order of execution")

View File

@ -13,6 +13,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeUUID.h>
#include <Interpreters/Context.h>
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/LimitBlockInputStream.h>
@ -363,6 +364,17 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
}
}
static void transformUUID(const UInt128 & src, UInt128 & dst, UInt64 seed)
{
SipHash hash;
hash.update(seed);
hash.update(reinterpret_cast<const char *>(&src), sizeof(UInt128));
/// Saving version and variant from an old UUID
hash.get128(reinterpret_cast<char *>(&dst));
dst.high = (dst.high & 0x1fffffffffffffffull) | (src.high & 0xe000000000000000ull);
dst.low = (dst.low & 0xffffffffffff0fffull) | (src.low & 0x000000000000f000ull);
}
class FixedStringModel : public IModel
{
@ -400,6 +412,38 @@ public:
}
};
class UUIDModel : public IModel
{
private:
UInt64 seed;
public:
explicit UUIDModel(UInt64 seed_) : seed(seed_) {}
void train(const IColumn &) override {}
void finalize() override {}
ColumnPtr generate(const IColumn & column) override
{
const ColumnUInt128 & src_column = assert_cast<const ColumnUInt128 &>(column);
const auto & src_data = src_column.getData();
auto res_column = ColumnUInt128::create();
auto & res_data = res_column->getData();
res_data.resize(src_data.size());
for (size_t i = 0; i < src_column.size(); ++i)
transformUUID(src_data[i], res_data[i], seed);
return res_column;
}
void updateSeed() override
{
seed = hash(seed);
}
};
/// Leave date part as is and apply pseudorandom permutation to time difference with previous value within the same log2 class.
class DateTimeModel : public IModel
@ -935,6 +979,9 @@ public:
if (typeid_cast<const DataTypeFixedString *>(&data_type))
return std::make_unique<FixedStringModel>(seed);
if (typeid_cast<const DataTypeUUID *>(&data_type))
return std::make_unique<UUIDModel>(seed);
if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));

View File

@ -85,12 +85,12 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
load_factor = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
if (load_factor < 1)
throw Exception("Too small parameter for aggregate function " + name + ". Minimum: 1",
throw Exception("Too small parameter 'load_factor' for aggregate function " + name + ". Minimum: 1",
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
if (k > TOP_K_MAX_SIZE)
throw Exception("Too large parameter for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE),
if (k > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || k * load_factor > TOP_K_MAX_SIZE)
throw Exception("Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (k == 0)

View File

@ -126,7 +126,7 @@ public:
bool isNumeric() const override { return false; }
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return is_POD; }
bool isFixedAndContiguous() const override { return true; }
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
size_t size() const override { return data.size(); }

View File

@ -12,11 +12,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
/** Stuff for comparing numbers.
* Integer values are compared as usual.
* Floating-point numbers are compared this way that NaNs always end up at the end
@ -298,23 +293,17 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override;
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return is_POD; }
bool isFixedAndContiguous() const override { return true; }
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
StringRef getRawData() const override
{
if constexpr (is_POD)
return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
else
throw Exception("getRawData() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
}
StringRef getDataAt(size_t n) const override
{
if constexpr (is_POD)
return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
else
throw Exception("getDataAt() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
}
bool structureEquals(const IColumn & rhs) const override

View File

@ -1,6 +1,7 @@
#pragma once
#include <Core/Types.h>
#include <Core/BigInt.h>
#include <Common/UInt128.h>
#include <common/unaligned.h>
@ -89,8 +90,7 @@ template <typename T>
inline typename std::enable_if<is_big_int_v<T>, DB::UInt64>::type
intHashCRC32(const T & x, DB::UInt64 updated_value)
{
std::vector<UInt64> parts;
export_bits(x, std::back_inserter(parts), sizeof(UInt64), false);
std::vector<UInt64> parts = DB::BigInt<T>::toIntArray(x);
for (const auto & part : parts)
updated_value = intHashCRC32(part, updated_value);
@ -199,7 +199,7 @@ inline size_t DefaultHash64(std::enable_if_t<(sizeof(T) > sizeof(UInt64)), T> ke
{
return intHash64(key.low ^ key.high);
}
else if constexpr (std::is_same_v<T, bInt256> || std::is_same_v<T, bUInt256>)
else if constexpr (is_big_int_v<T> && sizeof(T) == 32)
{
return intHash64(static_cast<UInt64>(key) ^
static_cast<UInt64>(key >> 64) ^
@ -256,7 +256,7 @@ inline size_t hashCRC32(std::enable_if_t<(sizeof(T) > sizeof(UInt64)), T> key)
{
return intHashCRC32(key.low ^ key.high);
}
else if constexpr (std::is_same_v<T, bInt256> || std::is_same_v<T, bUInt256>)
else if constexpr (is_big_int_v<T> && sizeof(T) == 32)
{
return intHashCRC32(static_cast<UInt64>(key) ^
static_cast<UInt64>(key >> 64) ^
@ -358,7 +358,7 @@ struct IntHash32
{
return intHash32<salt>(key.low ^ key.high);
}
else if constexpr (std::is_same_v<T, bInt256> || std::is_same_v<T, bUInt256>)
else if constexpr (is_big_int_v<T> && sizeof(T) == 32)
{
return intHash32<salt>(static_cast<UInt64>(key) ^
static_cast<UInt64>(key >> 64) ^

View File

@ -148,7 +148,7 @@ public:
}
template <typename T>
std::enable_if_t<is_big_int_v<T>, void> update(const T & x)
std::enable_if_t<is_big_int_v<T> && !std::has_unique_object_representations_v<T>, void> update(const T & x)
{
update(DB::BigInt<T>::serialize(x));
}
@ -213,7 +213,7 @@ std::enable_if_t<std::has_unique_object_representations_v<T>, UInt64> sipHash64(
}
template <typename T>
std::enable_if_t<(std::is_floating_point_v<T> || is_big_int_v<T>), UInt64> sipHash64(const T & x)
std::enable_if_t<(std::is_floating_point_v<T> || (is_big_int_v<T> && !std::has_unique_object_representations_v<T>)), UInt64> sipHash64(const T & x)
{
SipHash hash;
hash.update(x);

View File

@ -147,16 +147,17 @@ public:
{
// Increase weight of a key that already exists
auto hash = counter_map.hash(key);
auto counter = findCounter(key, hash);
if (counter)
if (auto counter = findCounter(key, hash); counter)
{
counter->count += increment;
counter->error += error;
percolate(counter);
return;
}
// Key doesn't exist, but can fit in the top K
else if (unlikely(size() < capacity()))
if (unlikely(size() < capacity()))
{
auto c = new Counter(arena.emplace(key), increment, error, hash);
push(c);

View File

@ -254,7 +254,7 @@ public:
const auto offset = __builtin_ctz(mask);
haystack += offset;
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
if (haystack + n <= haystack_end && pageSafe(haystack))
{
const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel);
@ -463,7 +463,7 @@ public:
const auto offset = __builtin_ctz(mask);
haystack += offset;
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
if (haystack + n <= haystack_end && pageSafe(haystack))
{
const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel);
@ -652,7 +652,7 @@ public:
const auto offset = __builtin_ctz(mask);
haystack += offset;
if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
if (haystack + n <= haystack_end && pageSafe(haystack))
{
/// check for first 16 octets
const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));

View File

@ -67,6 +67,11 @@ struct UInt128
bool inline operator <= (const Int128 rhs) const { return *this <= UInt128(rhs, rhs >> 64) && rhs >= 0; }
bool inline operator < (const Int128 rhs) const { return *this < UInt128(rhs, rhs >> 64) && rhs >= 0; }
bool inline operator > (const Int256 rhs) const { return (rhs < 0) || ((Int256(high) << 64) + low) > rhs; }
bool inline operator > (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) > rhs; }
bool inline operator < (const Int256 rhs) const { return (rhs >= 0) && ((Int256(high) << 64) + low) < rhs; }
bool inline operator < (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) < rhs; }
template <typename T> bool inline operator== (const T rhs) const { return *this == UInt128(rhs); }
template <typename T> bool inline operator!= (const T rhs) const { return *this != UInt128(rhs); }
template <typename T> bool inline operator>= (const T rhs) const { return *this >= UInt128(rhs); }

View File

@ -424,7 +424,7 @@ void ZooKeeperRequest::write(WriteBuffer & out) const
struct ZooKeeperResponse : virtual Response
{
virtual ~ZooKeeperResponse() = default;
virtual ~ZooKeeperResponse() override = default;
virtual void readImpl(ReadBuffer &) = 0;
};

View File

@ -260,7 +260,7 @@ struct ZooKeeperRequest : virtual Request
ZooKeeperRequest() = default;
ZooKeeperRequest(const ZooKeeperRequest &) = default;
virtual ~ZooKeeperRequest() = default;
virtual ~ZooKeeperRequest() override = default;
virtual ZooKeeper::OpNum getOpNum() const = 0;

View File

@ -138,9 +138,9 @@ constexpr inline Int128 exp10_i128(int x)
}
inline bInt256 exp10_i256(int x)
inline wInt256 exp10_i256(int x)
{
using Int256 = bInt256;
using Int256 = wInt256;
static constexpr Int256 i10e18{1000000000000000000ll};
static const Int256 values[] = {
static_cast<Int256>(1ll),

View File

@ -7,46 +7,15 @@ namespace DB
{
template <typename T>
struct BigIntPayload
struct BigInt
{
static_assert(!is_big_int_v<T>);
static constexpr size_t size = 0;
};
template <> struct BigIntPayload<bUInt256> { static constexpr size_t size = 32; };
template <> struct BigIntPayload<bInt256>
{
using UnsingedType = bUInt256;
static_assert(sizeof(T) == 32);
static constexpr size_t size = 32;
};
template <typename T>
struct BigInt : BigIntPayload<T>
{
using BigIntPayload<T>::size;
static constexpr size_t lastBit()
{
return size * 8 - 1;
}
static StringRef serialize(const T & x, char * pos)
{
if constexpr (is_signed_v<T>)
{
using UnsignedT = typename BigIntPayload<T>::UnsingedType;
if (x < 0)
{
UnsignedT unsigned_x = UnsignedT{0} - static_cast<UnsignedT>(-x);
export_bits(unsigned_x, pos, 8, false);
}
else
export_bits(x, pos, 8, false);
}
else
export_bits(x, pos, 8, false);
//unalignedStore<T>(pos, x);
memcpy(pos, &x, size);
return StringRef(pos, size);
}
@ -59,24 +28,20 @@ struct BigInt : BigIntPayload<T>
static T deserialize(const char * pos)
{
if constexpr (is_signed_v<T>)
{
using UnsignedT = typename BigIntPayload<T>::UnsingedType;
//return unalignedLoad<T>(pos);
T res;
memcpy(&res, pos, size);
return res;
}
UnsignedT unsigned_x;
import_bits(unsigned_x, pos, pos + size, false);
bool is_negative = bit_test(unsigned_x, lastBit());
if (is_negative)
unsigned_x = UnsignedT{0} - unsigned_x;
return static_cast<T>(unsigned_x);
}
else
{
T x;
import_bits(x, pos, pos + size, false);
return x;
}
static std::vector<UInt64> toIntArray(const T & x)
{
std::vector<UInt64> parts(4, 0);
parts[0] = UInt64(x);
parts[1] = UInt64(x >> 64);
parts[2] = UInt64(x >> 128);
parts[4] = UInt64(x >> 192);
return parts;
}
};

View File

@ -226,25 +226,25 @@ private:
static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]])
{
CompareInt x;
if constexpr (is_big_int_v<CompareInt> && IsDecimalNumber<A>)
if constexpr (IsDecimalNumber<A>)
x = a.value;
else
x = bigint_cast<CompareInt>(a);
x = a;
CompareInt y;
if constexpr (is_big_int_v<CompareInt> && IsDecimalNumber<B>)
if constexpr (IsDecimalNumber<B>)
y = b.value;
else
y = bigint_cast<CompareInt>(b);
y = b;
if constexpr (_check_overflow)
{
bool overflow = false;
if constexpr (sizeof(A) > sizeof(CompareInt))
overflow |= (A(x) != a);
overflow |= (bigint_cast<A>(x) != a);
if constexpr (sizeof(B) > sizeof(CompareInt))
overflow |= (B(y) != b);
overflow |= (bigint_cast<B>(y) != b);
if constexpr (is_unsigned_v<A>)
overflow |= (x < 0);
if constexpr (is_unsigned_v<B>)

View File

@ -742,7 +742,9 @@ namespace MySQLReplication
void GTIDEvent::dump(std::ostream & out) const
{
auto gtid_next = gtid.uuid.toUnderType().toHexString() + ":" + std::to_string(gtid.seq_no);
WriteBufferFromOwnString ws;
writeUUIDText(gtid.uuid, ws);
auto gtid_next = ws.str() + ":" + std::to_string(gtid.seq_no);
header.dump(out);
out << "GTID Next: " << gtid_next << std::endl;

View File

@ -499,7 +499,7 @@ namespace MySQLReplication
virtual BinlogEventPtr readOneEvent() = 0;
virtual void setReplicateDatabase(String db) = 0;
virtual void setGTIDSets(GTIDSets sets) = 0;
virtual ~IFlavor() = default;
virtual ~IFlavor() override = default;
};
class MySQLFlavor : public IFlavor

View File

@ -10,17 +10,36 @@ namespace DB
namespace QueryProcessingStage
{
/// Numbers matter - the later stage has a larger number.
///
/// It is part of Protocol ABI, add values only to the end.
/// Also keep in mind that the code may depends on the order of fields, so be double aware when you will add new values.
enum Enum
{
FetchColumns = 0, /// Only read/have been read the columns specified in the query.
WithMergeableState = 1, /// Until the stage where the results of processing on different servers can be combined.
Complete = 2, /// Completely.
/// Only read/have been read the columns specified in the query.
FetchColumns = 0,
/// Until the stage where the results of processing on different servers can be combined.
WithMergeableState = 1,
/// Completely.
Complete = 2,
/// Until the stage where the aggregate functions were calculated and finalized.
///
/// It is used for auto distributed_group_by_no_merge optimization for distributed engine.
/// (See comments in StorageDistributed).
WithMergeableStateAfterAggregation = 3,
MAX = 4,
};
inline const char * toString(UInt64 stage)
{
static const char * data[] = { "FetchColumns", "WithMergeableState", "Complete" };
return stage < 3
static const char * data[] =
{
"FetchColumns",
"WithMergeableState",
"Complete",
"WithMergeableStateAfterAggregation",
};
return stage < MAX
? data[stage]
: "Unknown stage";
}

View File

@ -107,8 +107,8 @@ class IColumn;
\
M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
\
M(Bool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards. If 2 - same as 1 but also apply ORDER BY and LIMIT stages", 0) \
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avodiing costly aggregation on the initiator server).", 0) \
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
@ -380,6 +380,7 @@ class IColumn;
M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \
M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\

View File

@ -58,14 +58,14 @@ using UInt8 = ::UInt8;
using UInt16 = ::UInt16;
using UInt32 = ::UInt32;
using UInt64 = ::UInt64;
using UInt256 = ::bUInt256;
using UInt256 = ::wUInt256;
using Int8 = ::Int8;
using Int16 = ::Int16;
using Int32 = ::Int32;
using Int64 = ::Int64;
using Int128 = ::Int128;
using Int256 = ::bInt256;
using Int256 = ::wInt256;
using Float32 = float;
using Float64 = double;

View File

@ -28,21 +28,13 @@ constexpr size_t min(size_t x, size_t y)
return x < y ? x : y;
}
/// @note There's no auto scale to larger big integer, only for integral ones.
/// It's cause of (U)Int64 backward compatibilty and very big performance penalties.
constexpr size_t nextSize(size_t size)
{
return min(size * 2, 8);
}
template <bool is_signed>
constexpr size_t nextSize2(size_t size)
{
// old way for built-in integers
if (size <= 8) return nextSize(size);
if constexpr (is_signed)
return size <= 32 ? 32 : 48;
else
return size <= 32 ? 16 : 48;
if (size < 8)
return size * 2;
return size;
}
template <bool is_signed, bool is_floating, size_t size>
@ -55,9 +47,8 @@ template <> struct Construct<false, false, 1> { using Type = UInt8; };
template <> struct Construct<false, false, 2> { using Type = UInt16; };
template <> struct Construct<false, false, 4> { using Type = UInt32; };
template <> struct Construct<false, false, 8> { using Type = UInt64; };
template <> struct Construct<false, false, 16> { using Type = UInt256; };
template <> struct Construct<false, false, 16> { using Type = UInt256; }; /// TODO: we cannot use our UInt128 here
template <> struct Construct<false, false, 32> { using Type = UInt256; };
template <> struct Construct<false, false, 48> { using Type = UInt256; };
template <> struct Construct<false, true, 1> { using Type = Float32; };
template <> struct Construct<false, true, 2> { using Type = Float32; };
template <> struct Construct<false, true, 4> { using Type = Float32; };
@ -67,8 +58,7 @@ template <> struct Construct<true, false, 2> { using Type = Int16; };
template <> struct Construct<true, false, 4> { using Type = Int32; };
template <> struct Construct<true, false, 8> { using Type = Int64; };
template <> struct Construct<true, false, 16> { using Type = Int128; };
template <> struct Construct<true, false, 32> { using Type = Int128; };
template <> struct Construct<true, false, 48> { using Type = Int256; };
template <> struct Construct<true, false, 32> { using Type = Int256; };
template <> struct Construct<true, true, 1> { using Type = Float32; };
template <> struct Construct<true, true, 2> { using Type = Float32; };
template <> struct Construct<true, true, 4> { using Type = Float32; };
@ -86,7 +76,7 @@ template <typename A, typename B> struct ResultOfAdditionMultiplication
using Type = typename Construct<
is_signed_v<A> || is_signed_v<B>,
std::is_floating_point_v<A> || std::is_floating_point_v<B>,
nextSize2< is_signed_v<A> || is_signed_v<B> >(max(sizeof(A), sizeof(B)))>::Type;
nextSize(max(sizeof(A), sizeof(B)))>::Type;
};
template <typename A, typename B> struct ResultOfSubtraction
@ -94,7 +84,7 @@ template <typename A, typename B> struct ResultOfSubtraction
using Type = typename Construct<
true,
std::is_floating_point_v<A> || std::is_floating_point_v<B>,
nextSize2< is_signed_v<A> || is_signed_v<B> >(max(sizeof(A), sizeof(B)))>::Type;
nextSize(max(sizeof(A), sizeof(B)))>::Type;
};
/** When dividing, you always get a floating-point number.
@ -127,7 +117,7 @@ template <typename A> struct ResultOfNegate
using Type = typename Construct<
true,
std::is_floating_point_v<A>,
is_signed_v<A> ? sizeof(A) : nextSize2<true>(sizeof(A))>::Type;
is_signed_v<A> ? sizeof(A) : nextSize(sizeof(A))>::Type;
};
template <typename A> struct ResultOfAbs

View File

@ -81,8 +81,10 @@ struct DivideIntegralImpl
/// NOTE: overflow is still possible when dividing large signed number to large unsigned number or vice-versa. But it's less harmful.
if constexpr (is_integer_v<A> && is_integer_v<B> && (is_signed_v<A> || is_signed_v<B>))
{
return checkedDivision(make_signed_t<CastA>(a),
sizeof(A) > sizeof(B) ? make_signed_t<A>(CastB(b)) : make_signed_t<CastB>(b));
using SignedCastA = make_signed_t<CastA>;
using SignedCastB = std::conditional_t<sizeof(A) <= sizeof(B), make_signed_t<CastB>, SignedCastA>;
return bigint_cast<Result>(checkedDivision(bigint_cast<SignedCastA>(a), bigint_cast<SignedCastB>(b)));
}
else
return bigint_cast<Result>(checkedDivision(CastA(a), CastB(b)));
@ -108,7 +110,7 @@ struct ModuloImpl
if constexpr (std::is_floating_point_v<ResultType>)
{
/// This computation is similar to `fmod` but the latter is not inlined and has 40 times worse performance.
return ResultType(a) - trunc(ResultType(a) / ResultType(b)) * ResultType(b);
return bigint_cast<ResultType>(a) - trunc(bigint_cast<ResultType>(a) / bigint_cast<ResultType>(b)) * bigint_cast<ResultType>(b);
}
else
{
@ -125,7 +127,7 @@ struct ModuloImpl
if constexpr (is_big_int_v<IntegerBType> && sizeof(IntegerAType) <= sizeof(IntegerBType))
return bigint_cast<Result>(bigint_cast<CastB>(int_a) % int_b);
else
return bigint_cast<Result>(int_a % int_b);
return bigint_cast<Result>(int_a % bigint_cast<CastA>(int_b));
}
else
return IntegerAType(a) % IntegerBType(b);

View File

@ -361,12 +361,8 @@ private:
return apply(a.value, b);
else if constexpr (IsDecimalNumber<U>)
return apply(a, b.value);
else if constexpr (std::is_same_v<T, UInt8>)
return apply(UInt16(a), b);
else if constexpr (std::is_same_v<U, UInt8>)
return apply(a, UInt16(b));
else
return applyNative(static_cast<NativeResultType>(a), static_cast<NativeResultType>(b));
return applyNative(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b));
}
else
return applyNative(a, b);
@ -381,12 +377,8 @@ private:
return applyScaled<scale_left>(a.value, b, scale);
else if constexpr (IsDecimalNumber<U>)
return applyScaled<scale_left>(a, b.value, scale);
else if constexpr (std::is_same_v<T, UInt8>)
return applyScaled<scale_left>(UInt16(a), b, scale);
else if constexpr (std::is_same_v<U, UInt8>)
return applyScaled<scale_left>(a, UInt16(b), scale);
else
return applyNativeScaled<scale_left>(static_cast<NativeResultType>(a), static_cast<NativeResultType>(b), scale);
return applyNativeScaled<scale_left>(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b), scale);
}
else
return applyNativeScaled<scale_left>(a, b, scale);
@ -401,12 +393,8 @@ private:
return applyScaledDiv(a.value, b, scale);
else if constexpr (IsDecimalNumber<U>)
return applyScaledDiv(a, b.value, scale);
else if constexpr (std::is_same_v<T, UInt8>)
return applyScaledDiv(UInt16(a), b, scale);
else if constexpr (std::is_same_v<U, UInt8>)
return applyScaledDiv(a, UInt16(b), scale);
else
return applyNativeScaledDiv(static_cast<NativeResultType>(a), static_cast<NativeResultType>(b), scale);
return applyNativeScaledDiv(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b), scale);
}
else
return applyNativeScaledDiv(a, b, scale);

View File

@ -67,8 +67,16 @@ public:
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
/// For DateTime, if time zone is specified, attach it to type.
/// If the time zone is specified but empty, throw an exception.
if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
return std::make_shared<ToDataType>(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
{
std::string time_zone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0);
if (time_zone.empty())
throw Exception(
"Function " + getName() + " supports a 2nd argument (optional) that must be non-empty and be a valid time zone",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<ToDataType>(time_zone);
}
if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
{
Int64 scale = DataTypeDateTime64::default_scale;

View File

@ -558,7 +558,7 @@ bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
{
/// TODO: Decimal scale
if constexpr (IsDecimalNumber<T> && IsDecimalNumber<U>)
return accurate::equalsOp(typename T::NativeType(first.data[first_ind]), typename U::NativeType(second.data[second_ind]));
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
else if constexpr (IsDecimalNumber<T> || IsDecimalNumber<U>)
return false;
else
@ -588,7 +588,7 @@ bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
if constexpr (IsDecimalNumber<T>)
return accurate::equalsOp(typename T::NativeType(first.data[first_ind]), typename T::NativeType(first.data[second_ind]));
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
else
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
}

View File

@ -122,9 +122,15 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
}
};
/// The methods can be virtual or not depending on the template parameter. See IStringSource.
#if !__clang__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
#elif __clang_major__ >= 11
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
#pragma GCC diagnostic ignored "-Wsuggest-destructor-override"
#endif
template <typename Base>
@ -205,7 +211,7 @@ struct ConstSource : public Base
}
};
#if !__clang__
#if !__clang__ || __clang_major__ >= 11
#pragma GCC diagnostic pop
#endif

View File

@ -16,11 +16,10 @@ struct AbsImpl
{
if constexpr (IsDecimalNumber<A>)
return a < A(0) ? A(-a) : a;
else if constexpr (is_big_int_v<A>)
// from boost/multiprecision/number.hpp
return static_cast<ResultType>(abs(a));
else if constexpr (is_big_int_v<A> && is_signed_v<A>)
return (a < 0) ? -a : a;
else if constexpr (is_integer_v<A> && is_signed_v<A>)
return a < 0 ? static_cast<ResultType>(~a) + 1 : a;
return a < 0 ? static_cast<ResultType>(~a) + 1 : static_cast<ResultType>(a);
else if constexpr (is_integer_v<A> && is_unsigned_v<A>)
return static_cast<ResultType>(a);
else if constexpr (std::is_floating_point_v<A>)

View File

@ -18,7 +18,7 @@ struct BitRotateLeftImpl
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
{
if constexpr (is_big_int_v<ResultType>)
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
throw Exception("Bit rotate is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
else
return (static_cast<Result>(a) << static_cast<Result>(b))

View File

@ -18,7 +18,7 @@ struct BitRotateRightImpl
template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
{
if constexpr (is_big_int_v<ResultType>)
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
throw Exception("Bit rotate is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
else
return (static_cast<Result>(a) >> static_cast<Result>(b))

View File

@ -19,9 +19,9 @@ struct BitShiftLeftImpl
static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
{
if constexpr (is_big_int_v<B>)
throw Exception("BitShiftLeftImpl is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("BitShiftLeft is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
else if constexpr (is_big_int_v<A>)
return static_cast<Result>(a) << bigint_cast<UInt32>(b);
return bigint_cast<Result>(a) << bigint_cast<UInt32>(b);
else
return static_cast<Result>(a) << static_cast<Result>(b);
}

View File

@ -19,9 +19,9 @@ struct BitShiftRightImpl
static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
{
if constexpr (is_big_int_v<B>)
throw Exception("BitRotate is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("BitShiftRight is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
else if constexpr (is_big_int_v<A>)
return static_cast<Result>(a) >> bigint_cast<UInt32>(b);
return bigint_cast<Result>(a) >> bigint_cast<UInt32>(b);
else
return static_cast<Result>(a) >> static_cast<Result>(b);
}

View File

@ -19,10 +19,8 @@ struct BitTestImpl
template <typename Result = ResultType>
NO_SANITIZE_UNDEFINED static inline Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
{
if constexpr (is_big_int_v<B>)
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
throw Exception("bitTest is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
else if constexpr (is_big_int_v<A>)
return bit_test(a, static_cast<UInt32>(b));
else
return (typename NumberTraits::ToInteger<A>::Type(a) >> typename NumberTraits::ToInteger<B>::Type(b)) & 1;
}

View File

@ -13,6 +13,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -54,7 +55,12 @@ std::string extractTimeZoneNameFromFunctionArguments(const ColumnsWithTypeAndNam
const DateLUTImpl & extractTimeZoneFromFunctionArguments(Block & block, const ColumnNumbers & arguments, size_t time_zone_arg_num, size_t datetime_arg_num)
{
if (arguments.size() == time_zone_arg_num + 1)
return DateLUT::instance(extractTimeZoneNameFromColumn(*block.getByPosition(arguments[time_zone_arg_num]).column));
{
std::string time_zone = extractTimeZoneNameFromColumn(*block.getByPosition(arguments[time_zone_arg_num]).column);
if (time_zone.empty())
throw Exception("Provided time zone must be non-empty and be a valid time zone", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return DateLUT::instance(time_zone);
}
else
{
if (arguments.empty())

View File

@ -20,7 +20,7 @@ struct GCDImpl
template <typename Result = ResultType>
static inline Result apply([[maybe_unused]] A a, [[maybe_unused]] B b)
{
if constexpr (is_big_int_v<A> || is_big_int_v<B>)
if constexpr (is_big_int_v<A> || is_big_int_v<B> || is_big_int_v<Result>)
throw Exception("GCD is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
else
{

View File

@ -40,14 +40,14 @@ struct LCMImpl
static const constexpr bool allow_fixed_string = false;
template <typename Result = ResultType>
static inline std::enable_if_t<is_big_int_v<A> || is_big_int_v<B>, Result>
static inline std::enable_if_t<is_big_int_v<A> || is_big_int_v<B> || is_big_int_v<Result>, Result>
apply([[maybe_unused]] A a, [[maybe_unused]] B b)
{
throw Exception("LCM is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
}
template <typename Result = ResultType>
static inline std::enable_if_t<!is_big_int_v<A> && !is_big_int_v<B>, Result>
static inline std::enable_if_t<!is_big_int_v<A> && !is_big_int_v<B> && !is_big_int_v<Result>, Result>
apply([[maybe_unused]] A a, [[maybe_unused]] B b)
{
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));

View File

@ -6,6 +6,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
template <typename T>
inline std::enable_if_t<std::is_integral_v<T> && (sizeof(T) <= sizeof(UInt32)), T>
roundDownToPowerOfTwo(T x)
@ -48,10 +53,9 @@ roundDownToPowerOfTwo(T x)
template <typename T>
inline std::enable_if_t<is_big_int_v<T>, T>
roundDownToPowerOfTwo(T x)
roundDownToPowerOfTwo(T)
{
// extention from boost/multiprecision/number.hpp
return T(1) << msb(x);
throw Exception("roundToExp2() for big integers is not implemented", ErrorCodes::NOT_IMPLEMENTED);
}
/** For integer data types:

View File

@ -831,6 +831,7 @@ template <> inline void writeText<bool>(const bool & x, WriteBuffer & buf) { wri
inline void writeText(const char * x, WriteBuffer & buf) { writeEscapedString(x, strlen(x), buf); }
inline void writeText(const char * x, size_t size, WriteBuffer & buf) { writeEscapedString(x, size, buf); }
inline void writeText(const DayNum & x, WriteBuffer & buf) { writeDateText(LocalDate(x), buf); }
inline void writeText(const LocalDate & x, WriteBuffer & buf) { writeDateText(x, buf); }
inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTimeText(x, buf); }
inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); }

View File

@ -160,7 +160,7 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
" Expected to read decimal with scale {} and precision {}";
if constexpr (is_big_int_v<typename T::NativeType>)
throw Exception(fmt::format(pattern, digits, x.value.str(), exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
throw Exception(fmt::format(pattern, digits, bigintToString(x.value), exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
else
throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
@ -180,7 +180,7 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
{
/// Too many digits after point. Just cut off excessive digits.
auto divisor = intExp10OfSize<typename T::NativeType>(divisor_exp);
assert(divisor > T(0)); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
assert(divisor > 0); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
x.value /= divisor;
scale = 0;
return;

View File

@ -362,7 +362,9 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
return AggregatedDataVariants::Type::key64;
if (size_of_field == 16)
return AggregatedDataVariants::Type::keys128;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
if (size_of_field == 32)
return AggregatedDataVariants::Type::keys256;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR);
}
/// If all keys fits in N bits, will use hash table with all keys packed (placed contiguously) to single N-bit key.

View File

@ -221,7 +221,9 @@ HashJoin::Type HashJoin::chooseMethod(const ColumnRawPtrs & key_columns, Sizes &
return Type::key64;
if (size_of_field == 16)
return Type::keys128;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
if (size_of_field == 32)
return Type::keys256;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR);
}
/// If the keys fit in N bits, we will use a hash table for N-bit-packed keys

View File

@ -553,6 +553,11 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
return res;
}
if (options.to_stage == QueryProcessingStage::Enum::WithMergeableStateAfterAggregation)
{
return analysis_result.before_order_and_select->getSampleBlock();
}
return analysis_result.final_projection->getSampleBlock();
}
@ -740,6 +745,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
auto & expressions = analysis_result;
const auto & subqueries_for_sets = query_analyzer->getSubqueriesForSets();
bool intermediate_stage = false;
bool to_aggregation_stage = false;
bool from_aggregation_stage = false;
if (options.only_analyze)
{
@ -788,6 +795,14 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
options.to_stage == QueryProcessingStage::WithMergeableState)
intermediate_stage = true;
/// Support optimize_distributed_group_by_sharding_key
/// Is running on the initiating server during distributed processing?
if (from_stage == QueryProcessingStage::WithMergeableStateAfterAggregation)
from_aggregation_stage = true;
/// Is running on remote servers during distributed processing?
if (options.to_stage == QueryProcessingStage::WithMergeableStateAfterAggregation)
to_aggregation_stage = true;
if (storage && expressions.filter_info && expressions.prewhere_info)
throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);
@ -848,6 +863,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
if (expressions.need_aggregate)
executeMergeAggregated(query_plan, aggregate_overflow_row, aggregate_final);
}
if (from_aggregation_stage)
{
if (intermediate_stage || expressions.first_stage || expressions.second_stage)
throw Exception("Query with after aggregation stage cannot have any other stages", ErrorCodes::LOGICAL_ERROR);
}
if (expressions.first_stage)
{
@ -939,9 +960,13 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
}
if (expressions.second_stage)
if (expressions.second_stage || from_aggregation_stage)
{
if (expressions.need_aggregate)
if (from_aggregation_stage)
{
/// No need to aggregate anything, since this was done on remote shards.
}
else if (expressions.need_aggregate)
{
/// If you need to combine aggregated results from multiple servers
if (!expressions.first_stage)
@ -994,7 +1019,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
* limiting the number of rows in each up to `offset + limit`.
*/
bool has_prelimit = false;
if (query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
if (!to_aggregation_stage &&
query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
!query.arrayJoinExpressionList() && !query.distinct && !expressions.hasLimitBy() && !settings.extremes)
{
executePreLimit(query_plan, false);
@ -1023,18 +1049,23 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
has_prelimit = true;
}
/** We must do projection after DISTINCT because projection may remove some columns.
*/
executeProjection(query_plan, expressions.final_projection);
/// Projection not be done on the shards, since then initiator will not find column in blocks.
/// (significant only for WithMergeableStateAfterAggregation).
if (!to_aggregation_stage)
{
/// We must do projection after DISTINCT because projection may remove some columns.
executeProjection(query_plan, expressions.final_projection);
}
/** Extremes are calculated before LIMIT, but after LIMIT BY. This is Ok.
*/
/// Extremes are calculated before LIMIT, but after LIMIT BY. This is Ok.
executeExtremes(query_plan);
if (!has_prelimit) /// Limit is no longer needed if there is prelimit.
/// Limit is no longer needed if there is prelimit.
if (!to_aggregation_stage && !has_prelimit)
executeLimit(query_plan);
executeOffset(query_plan);
if (!to_aggregation_stage)
executeOffset(query_plan);
}
}

View File

@ -110,9 +110,8 @@ typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::choose
size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed();
if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8))
return Type::nullable_keys128;
else
throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.",
ErrorCodes::LOGICAL_ERROR};
/// Pass to more generic method
}
if (all_fixed)
@ -145,7 +144,9 @@ typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::choose
return Type::key64;
if (size_of_field == 16)
return Type::keys128;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
if (size_of_field == 32)
return Type::keys256;
throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR);
}
/// If the keys fit in N bits, we will use a hash table for N-bit-packed keys

View File

@ -402,6 +402,11 @@ void PipelineExecutor::execute(size_t num_threads)
for (auto & node : graph->nodes)
if (node->exception)
std::rethrow_exception(node->exception);
/// Exception which happened in executing thread, but not at processor.
for (auto & executor_context : executor_contexts)
if (executor_context->exception)
std::rethrow_exception(executor_context->exception);
}
catch (...)
{
@ -469,16 +474,7 @@ void PipelineExecutor::wakeUpExecutor(size_t thread_num)
void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads)
{
try
{
executeStepImpl(thread_num, num_threads);
}
catch (...)
{
/// In case of exception from executor itself, stop other threads.
finish();
throw;
}
executeStepImpl(thread_num, num_threads);
#ifndef NDEBUG
auto & context = executor_contexts[thread_num];
@ -735,7 +731,16 @@ void PipelineExecutor::executeImpl(size_t num_threads)
CurrentThread::detachQueryIfNotDetached();
);
executeSingleThread(thread_num, num_threads);
try
{
executeSingleThread(thread_num, num_threads);
}
catch (...)
{
/// In case of exception from executor itself, stop other threads.
finish();
executor_contexts[thread_num]->exception = std::current_exception();
}
});
}

View File

@ -97,6 +97,9 @@ private:
/// Currently processing node.
ExecutingGraph::Node * node = nullptr;
/// Exception from executing thread itself.
std::exception_ptr exception;
#ifndef NDEBUG
/// Time for different processing stages.
UInt64 total_time_ns = 0;

View File

@ -1,5 +1,6 @@
#pragma once
#include <atomic>
#include <memory>
#include <vector>
#include <variant>

View File

@ -394,6 +394,7 @@ static bool isFederatedServerSetupSetCommand(const String & query)
"|(^(SET FOREIGN_KEY_CHECKS(.*)))"
"|(^(SET AUTOCOMMIT(.*)))"
"|(^(SET sql_mode(.*)))"
"|(^(SET @@(.*)))"
"|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))"
, std::regex::icase};
return 1 == std::regex_match(query, expr);

View File

@ -6,14 +6,14 @@
namespace DB
{
AllMergeSelector::PartsInPartition AllMergeSelector::select(
const Partitions & partitions,
AllMergeSelector::PartsRange AllMergeSelector::select(
const PartsRanges & parts_ranges,
const size_t /*max_total_size_to_merge*/)
{
size_t min_partition_size = 0;
Partitions::const_iterator best_partition;
PartsRanges::const_iterator best_partition;
for (auto it = partitions.begin(); it != partitions.end(); ++it)
for (auto it = parts_ranges.begin(); it != parts_ranges.end(); ++it)
{
if (it->size() <= 1)
continue;

View File

@ -11,8 +11,8 @@ class AllMergeSelector : public IMergeSelector
{
public:
/// Parameter max_total_size_to_merge is ignored.
PartsInPartition select(
const Partitions & partitions,
PartsRange select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge) override;
};

View File

@ -1042,6 +1042,37 @@ void IMergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) co
column_to_size[column_name] = size.data_compressed;
}
bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metadata_snapshot) const
{
if (!metadata_snapshot->hasAnyTTL())
return false;
if (metadata_snapshot->hasRowsTTL())
{
if (isEmpty()) /// All rows were finally deleted and we don't store TTL
return true;
else if (ttl_infos.table_ttl.min == 0)
return false;
}
for (const auto & [column, desc] : metadata_snapshot->getColumnTTLs())
{
/// Part has this column, but we don't calculated TTL for it
if (!ttl_infos.columns_ttl.count(column) && getColumns().contains(column))
return false;
}
for (const auto & move_desc : metadata_snapshot->getMoveTTLs())
{
/// Move TTL is not calculated
if (!ttl_infos.moves_ttl.count(move_desc.result_column))
return false;
}
return true;
}
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
{
return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);

View File

@ -344,6 +344,11 @@ public:
static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME = "delete-on-destroy.txt";
/// Checks that all TTLs (table min/max, column ttls, so on) for part
/// calculated. Part without calculated TTL may exist if TTL was added after
/// part creation (using alter query with materialize_ttl setting).
bool checkAllTTLCalculated(const StorageMetadataPtr & metadata_snapshot) const;
protected:
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk

View File

@ -14,7 +14,7 @@ namespace
*/
struct Estimator
{
using Iterator = LevelMergeSelector::PartsInPartition::const_iterator;
using Iterator = LevelMergeSelector::PartsRange::const_iterator;
void consider(Iterator begin, Iterator end, size_t sum_size)
{
@ -28,9 +28,9 @@ struct Estimator
}
}
LevelMergeSelector::PartsInPartition getBest() const
LevelMergeSelector::PartsRange getBest() const
{
return LevelMergeSelector::PartsInPartition(best_begin, best_end);
return LevelMergeSelector::PartsRange(best_begin, best_end);
}
double min_score = 0;
@ -40,7 +40,7 @@ struct Estimator
void selectWithinPartition(
const LevelMergeSelector::PartsInPartition & parts,
const LevelMergeSelector::PartsRange & parts,
const size_t max_total_size_to_merge,
Estimator & estimator,
const LevelMergeSelector::Settings & settings)
@ -103,14 +103,14 @@ void selectWithinPartition(
}
LevelMergeSelector::PartsInPartition LevelMergeSelector::select(
const Partitions & partitions,
LevelMergeSelector::PartsRange LevelMergeSelector::select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge)
{
Estimator estimator;
for (const auto & partition : partitions)
selectWithinPartition(partition, max_total_size_to_merge, estimator, settings);
for (const auto & parts_range: parts_ranges)
selectWithinPartition(parts_range, max_total_size_to_merge, estimator, settings);
return estimator.getBest();
}

View File

@ -19,8 +19,8 @@ public:
explicit LevelMergeSelector(const Settings & settings_) : settings(settings_) {}
PartsInPartition select(
const Partitions & partitions,
PartsRange select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge) override;
private:

View File

@ -48,16 +48,16 @@ public:
};
/// Parts are belong to partitions. Only parts within same partition could be merged.
using PartsInPartition = std::vector<Part>;
using PartsRange = std::vector<Part>;
/// Parts are in some specific order. Parts could be merged only in contiguous ranges.
using Partitions = std::vector<PartsInPartition>;
using PartsRanges = std::vector<PartsRange>;
/** Function could be called at any frequency and it must decide, should you do any merge at all.
* If better not to do any merge, it returns empty result.
*/
virtual PartsInPartition select(
const Partitions & partitions,
virtual PartsRange select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge) = 0;
virtual ~IMergeSelector() = default;

View File

@ -227,13 +227,25 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
time_t current_time = std::time(nullptr);
IMergeSelector::Partitions partitions;
IMergeSelector::PartsRanges parts_ranges;
const String * prev_partition_id = nullptr;
/// Previous part only in boundaries of partition frame
const MergeTreeData::DataPartPtr * prev_part = nullptr;
for (const MergeTreeData::DataPartPtr & part : data_parts)
{
const String & partition_id = part->info.partition_id;
if (!prev_partition_id || partition_id != *prev_partition_id)
{
if (parts_ranges.empty() || !parts_ranges.back().empty())
parts_ranges.emplace_back();
/// New partition frame.
prev_partition_id = &partition_id;
prev_part = nullptr;
}
/// Check predicate only for first part in each partition.
if (!prev_part)
{
@ -244,15 +256,19 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
if (!can_merge_callback(nullptr, part, nullptr))
continue;
}
const String & partition_id = part->info.partition_id;
if (!prev_partition_id || partition_id != *prev_partition_id || (prev_part && !can_merge_callback(*prev_part, part, nullptr)))
else
{
if (partitions.empty() || !partitions.back().empty())
partitions.emplace_back();
/// New partition frame.
prev_partition_id = &partition_id;
prev_part = nullptr;
/// If we cannot merge with previous part we had to start new parts
/// interval (in the same partition)
if (!can_merge_callback(*prev_part, part, nullptr))
{
/// Starting new interval in the same partition
if (!parts_ranges.back().empty())
parts_ranges.emplace_back();
/// Now we have no previous part, but it affects only logging
prev_part = nullptr;
}
}
IMergeSelector::Part part_info;
@ -263,7 +279,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
part_info.min_ttl = part->ttl_infos.part_min_ttl;
part_info.max_ttl = part->ttl_infos.part_max_ttl;
partitions.back().emplace_back(part_info);
parts_ranges.back().emplace_back(part_info);
/// Check for consistency of data parts. If assertion is failed, it requires immediate investigation.
if (prev_part && part->info.partition_id == (*prev_part)->info.partition_id
@ -275,7 +291,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
prev_part = &part;
}
IMergeSelector::PartsInPartition parts_to_merge;
IMergeSelector::PartsRange parts_to_merge;
if (!ttl_merges_blocker.isCancelled())
{
@ -284,7 +300,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
current_time,
data_settings->merge_with_ttl_timeout,
data_settings->ttl_only_drop_parts);
parts_to_merge = merge_selector.select(partitions, max_total_size_to_merge);
parts_to_merge = merge_selector.select(parts_ranges, max_total_size_to_merge);
}
if (parts_to_merge.empty())
@ -294,7 +310,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
merge_settings.base = 1;
parts_to_merge = SimpleMergeSelector(merge_settings)
.select(partitions, max_total_size_to_merge);
.select(parts_ranges, max_total_size_to_merge);
/// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl
if (parts_to_merge.size() == 1)
@ -635,8 +651,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
new_data_part->is_temp = true;
bool need_remove_expired_values = false;
bool force_ttl = false;
for (const auto & part : parts)
{
new_data_part->ttl_infos.update(part->ttl_infos);
if (metadata_snapshot->hasAnyTTL() && !part->checkAllTTLCalculated(metadata_snapshot))
{
LOG_INFO(log, "Some TTL values were not calculated for part {}. Will calculate them forcefully during merge.", part->name);
need_remove_expired_values = true;
force_ttl = true;
}
}
const auto & part_min_ttl = new_data_part->ttl_infos.part_min_ttl;
if (part_min_ttl && part_min_ttl <= time_of_merge)
@ -809,7 +834,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, Names());
if (need_remove_expired_values)
merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, false);
merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, force_ttl);
if (metadata_snapshot->hasSecondaryIndices())

View File

@ -38,7 +38,7 @@ struct MergeTreeDataPartTTLInfos
MergeTreeDataPartTTLInfo table_ttl;
/// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts
/// to merge in order to remove expired rows.
/// to merge in order to remove expired rows.
time_t part_min_ttl = 0;
time_t part_max_ttl = 0;
@ -58,7 +58,7 @@ struct MergeTreeDataPartTTLInfos
part_max_ttl = time_max;
}
bool empty()
bool empty() const
{
return !part_min_ttl && moves_ttl.empty();
}

View File

@ -15,7 +15,7 @@ namespace
*/
struct Estimator
{
using Iterator = SimpleMergeSelector::PartsInPartition::const_iterator;
using Iterator = SimpleMergeSelector::PartsRange::const_iterator;
void consider(Iterator begin, Iterator end, size_t sum_size, size_t size_prev_at_left, const SimpleMergeSelector::Settings & settings)
{
@ -42,9 +42,9 @@ struct Estimator
}
}
SimpleMergeSelector::PartsInPartition getBest() const
SimpleMergeSelector::PartsRange getBest() const
{
return SimpleMergeSelector::PartsInPartition(best_begin, best_end);
return SimpleMergeSelector::PartsRange(best_begin, best_end);
}
static double score(double count, double sum_size, double sum_size_fixed_cost)
@ -137,7 +137,7 @@ bool allow(
void selectWithinPartition(
const SimpleMergeSelector::PartsInPartition & parts,
const SimpleMergeSelector::PartsRange & parts,
const size_t max_total_size_to_merge,
Estimator & estimator,
const SimpleMergeSelector::Settings & settings)
@ -185,14 +185,14 @@ void selectWithinPartition(
}
SimpleMergeSelector::PartsInPartition SimpleMergeSelector::select(
const Partitions & partitions,
SimpleMergeSelector::PartsRange SimpleMergeSelector::select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge)
{
Estimator estimator;
for (const auto & partition : partitions)
selectWithinPartition(partition, max_total_size_to_merge, estimator, settings);
for (const auto & part_range : parts_ranges)
selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings);
return estimator.getBest();
}

View File

@ -73,8 +73,8 @@ public:
explicit SimpleMergeSelector(const Settings & settings_) : settings(settings_) {}
PartsInPartition select(
const Partitions & partitions,
PartsRange select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge) override;
private:

View File

@ -15,18 +15,18 @@ const String & getPartitionIdForPart(const TTLMergeSelector::Part & part_info)
}
IMergeSelector::PartsInPartition TTLMergeSelector::select(
const Partitions & partitions,
IMergeSelector::PartsRange TTLMergeSelector::select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge)
{
using Iterator = IMergeSelector::PartsInPartition::const_iterator;
using Iterator = IMergeSelector::PartsRange::const_iterator;
Iterator best_begin;
ssize_t partition_to_merge_index = -1;
time_t partition_to_merge_min_ttl = 0;
for (size_t i = 0; i < partitions.size(); ++i)
for (size_t i = 0; i < parts_ranges.size(); ++i)
{
const auto & mergeable_parts_in_partition = partitions[i];
const auto & mergeable_parts_in_partition = parts_ranges[i];
if (mergeable_parts_in_partition.empty())
continue;
@ -51,7 +51,7 @@ IMergeSelector::PartsInPartition TTLMergeSelector::select(
if (partition_to_merge_index == -1 || partition_to_merge_min_ttl > current_time)
return {};
const auto & best_partition = partitions[partition_to_merge_index];
const auto & best_partition = parts_ranges[partition_to_merge_index];
Iterator best_end = best_begin + 1;
size_t total_size = 0;
@ -88,7 +88,7 @@ IMergeSelector::PartsInPartition TTLMergeSelector::select(
const auto & best_partition_id = getPartitionIdForPart(best_partition.front());
merge_due_times[best_partition_id] = current_time + merge_cooldown_time;
return PartsInPartition(best_begin, best_end);
return PartsRange(best_begin, best_end);
}
}

View File

@ -10,7 +10,7 @@ namespace DB
{
/** Merge selector, which is used to remove values with expired ttl.
* It selects parts to merge by greedy algorithm:
* It selects parts to merge by greedy algorithm:
* 1. Finds part with the most earliest expired ttl and includes it to result.
* 2. Tries to find the longest range of parts with expired ttl, that includes part from step 1.
* Finally, merge selector updates TTL merge timer for the selected partition
@ -26,8 +26,8 @@ public:
merge_cooldown_time(merge_cooldown_time_),
only_drop_parts(only_drop_parts_) {}
PartsInPartition select(
const Partitions & partitions,
PartsRange select(
const PartsRanges & parts_ranges,
const size_t max_total_size_to_merge) override;
private:

View File

@ -56,12 +56,15 @@
#include <memory>
#include <filesystem>
#include <optional>
namespace
{
const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY = 1;
const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS = 2;
const UInt64 DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION = 2;
}
namespace DB
@ -242,22 +245,82 @@ void replaceConstantExpressions(
visitor.visit(node);
}
QueryProcessingStage::Enum getQueryProcessingStageImpl(const Context & context, QueryProcessingStage::Enum to_stage, const ClusterPtr & cluster)
/// Returns one of the following:
/// - QueryProcessingStage::Complete
/// - QueryProcessingStage::WithMergeableStateAfterAggregation
/// - none (in this case regular WithMergeableState should be used)
std::optional<QueryProcessingStage::Enum> getOptimizedQueryProcessingStage(const ASTPtr & query_ptr, bool extremes, const Block & sharding_key_block)
{
const Settings & settings = context.getSettingsRef();
const auto & select = query_ptr->as<ASTSelectQuery &>();
auto sharding_block_has = [&](const auto & exprs, size_t limit = SIZE_MAX) -> bool
{
size_t i = 0;
for (auto & expr : exprs)
{
++i;
if (i > limit)
break;
auto id = expr->template as<ASTIdentifier>();
if (!id)
return false;
/// TODO: if GROUP BY contains multiIf()/if() it should contain only columns from sharding_key
if (!sharding_key_block.has(id->name))
return false;
}
return true;
};
// GROUP BY qualifiers
// - TODO: WITH TOTALS can be implemented
// - TODO: WITH ROLLUP can be implemented (I guess)
if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
return {};
// TODO: extremes support can be implemented
if (extremes)
return {};
// DISTINCT
if (select.distinct)
{
if (!sharding_block_has(select.select()->children))
return {};
}
// GROUP BY
const ASTPtr group_by = select.groupBy();
if (!group_by)
{
if (!select.distinct)
return {};
}
else
{
if (!sharding_block_has(group_by->children, 1))
return {};
}
// ORDER BY
const ASTPtr order_by = select.orderBy();
if (order_by)
return QueryProcessingStage::WithMergeableStateAfterAggregation;
// LIMIT BY
// LIMIT
if (select.limitBy() || select.limitLength())
return QueryProcessingStage::WithMergeableStateAfterAggregation;
// Only simple SELECT FROM GROUP BY sharding_key can use Complete state.
return QueryProcessingStage::Complete;
}
size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & cluster)
{
size_t num_local_shards = cluster->getLocalShardCount();
size_t num_remote_shards = cluster->getRemoteShardCount();
size_t result_size = (num_remote_shards * settings.max_parallel_replicas) + num_local_shards;
if (settings.distributed_group_by_no_merge)
return QueryProcessingStage::Complete;
/// Nested distributed query cannot return Complete stage,
/// since the parent query need to aggregate the results after.
if (to_stage == QueryProcessingStage::WithMergeableState)
return QueryProcessingStage::WithMergeableState;
return result_size == 1 ? QueryProcessingStage::Complete
: QueryProcessingStage::WithMergeableState;
return (num_remote_shards * settings.max_parallel_replicas) + num_local_shards;
}
}
@ -374,87 +437,23 @@ StoragePtr StorageDistributed::createWithOwnCluster(
return res;
}
bool StorageDistributed::canForceGroupByNoMerge(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const
{
const auto & settings = context.getSettingsRef();
std::string reason;
if (settings.distributed_group_by_no_merge)
return true;
if (!settings.optimize_distributed_group_by_sharding_key)
return false;
/// Distributed-over-Distributed (see getQueryProcessingStageImpl())
if (to_stage == QueryProcessingStage::WithMergeableState)
return false;
if (!settings.optimize_skip_unused_shards)
return false;
if (!has_sharding_key)
return false;
const auto & select = query_ptr->as<ASTSelectQuery &>();
if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
return false;
// TODO: The following can be optimized too (but with some caveats, will be addressed later):
// - ORDER BY
// - LIMIT BY
// - LIMIT
if (select.orderBy())
return false;
if (select.limitBy() || select.limitLength())
return false;
if (select.distinct)
{
for (auto & expr : select.select()->children)
{
const auto * id = expr->as<ASTIdentifier>();
if (!id)
return false;
if (!sharding_key_expr->getSampleBlock().has(id->name))
return false;
}
reason = "DISTINCT " + backQuote(serializeAST(*select.select(), true));
}
const ASTPtr group_by = select.groupBy();
if (!group_by)
{
if (!select.distinct)
return false;
}
else
{
// injective functions are optimized out in optimizeGroupBy()
// hence all we need to check is that column in GROUP BY matches sharding expression
auto & group_exprs = group_by->children;
if (group_exprs.empty())
throw Exception("No ASTExpressionList in GROUP BY", ErrorCodes::LOGICAL_ERROR);
const auto * id = group_exprs[0]->as<ASTIdentifier>();
if (!id)
return false;
if (!sharding_key_expr->getSampleBlock().has(id->name))
return false;
reason = "GROUP BY " + backQuote(serializeAST(*group_by, true));
}
LOG_DEBUG(log, "Force distributed_group_by_no_merge for {} (injective)", reason);
return true;
}
QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const
{
const auto & settings = context.getSettingsRef();
auto metadata_snapshot = getInMemoryMetadataPtr();
if (canForceGroupByNoMerge(context, to_stage, query_ptr))
return QueryProcessingStage::Complete;
if (settings.distributed_group_by_no_merge)
{
if (settings.distributed_group_by_no_merge == DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION)
return QueryProcessingStage::WithMergeableStateAfterAggregation;
else
return QueryProcessingStage::Complete;
}
/// Nested distributed query cannot return Complete stage,
/// since the parent query need to aggregate the results after.
if (to_stage == QueryProcessingStage::WithMergeableState)
return QueryProcessingStage::WithMergeableState;
ClusterPtr cluster = getCluster();
if (settings.optimize_skip_unused_shards)
@ -464,7 +463,26 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con
cluster = optimized_cluster;
}
return getQueryProcessingStageImpl(context, to_stage, cluster);
/// If there is only one node, the query can be fully processed by the
/// shard, initiator will work as a proxy only.
if (getClusterQueriedNodes(settings, cluster) == 1)
return QueryProcessingStage::Complete;
if (settings.optimize_skip_unused_shards &&
settings.optimize_distributed_group_by_sharding_key &&
has_sharding_key &&
sharding_key_is_deterministic)
{
Block sharding_key_block = sharding_key_expr->getSampleBlock();
auto stage = getOptimizedQueryProcessingStage(query_ptr, settings.extremes, sharding_key_block);
if (stage)
{
LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage));
return *stage;
}
}
return QueryProcessingStage::WithMergeableState;
}
Pipe StorageDistributed::read(

View File

@ -66,8 +66,6 @@ public:
bool isRemote() const override { return true; }
/// Return true if distributed_group_by_no_merge may be applied.
bool canForceGroupByNoMerge(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const;
QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const override;
Pipe read(

View File

@ -452,6 +452,8 @@ Block StorageMerge::getQueryHeader(
}
case QueryProcessingStage::WithMergeableState:
case QueryProcessingStage::Complete:
case QueryProcessingStage::WithMergeableStateAfterAggregation:
case QueryProcessingStage::MAX:
{
auto query = query_info.query->clone();
removeJoin(*query->as<ASTSelectQuery>());

View File

@ -1,4 +1,5 @@
#include <Common/ProfileEvents.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Storages/System/StorageSystemEvents.h>
@ -15,13 +16,13 @@ NamesAndTypesList StorageSystemEvents::getNamesAndTypes()
};
}
void StorageSystemEvents::fillData(MutableColumns & res_columns, const Context &, const SelectQueryInfo &) const
void StorageSystemEvents::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const
{
for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
{
UInt64 value = ProfileEvents::global_counters[i];
if (0 != value)
if (0 != value || context.getSettingsRef().system_events_show_zero_values)
{
res_columns[0]->insert(ProfileEvents::getName(ProfileEvents::Event(i)));
res_columns[1]->insert(value);

View File

@ -14,8 +14,8 @@ int main(int, char **)
{
using namespace DB;
IMergeSelector::Partitions partitions(1);
IMergeSelector::PartsInPartition & parts = partitions.back();
IMergeSelector::PartsRanges partitions(1);
IMergeSelector::PartsRange & parts = partitions.back();
SimpleMergeSelector::Settings settings;
// settings.base = 2;
@ -53,7 +53,7 @@ int main(int, char **)
while (parts.size() > 1)
{
IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 0);
IMergeSelector::PartsRange selected_parts = selector.select(partitions, 0);
if (selected_parts.empty())
{

View File

@ -19,8 +19,8 @@ int main(int, char **)
{
using namespace DB;
IMergeSelector::Partitions partitions(1);
IMergeSelector::PartsInPartition & parts = partitions.back();
IMergeSelector::PartsRanges partitions(1);
IMergeSelector::PartsRange & parts = partitions.back();
/* SimpleMergeSelector::Settings settings;
SimpleMergeSelector selector(settings);*/
@ -52,7 +52,7 @@ int main(int, char **)
while (parts.size() > 1)
{
IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 100ULL * 1024 * 1024 * 1024);
IMergeSelector::PartsRange selected_parts = selector.select(partitions, 100ULL * 1024 * 1024 * 1024);
if (selected_parts.empty())
{

View File

@ -18,7 +18,7 @@ if(MAKE_STATIC_LIBRARIES AND DOCKER_CMD)
if(NOT INTEGRATION_USE_RUNNER AND DOCKER_COMPOSE_CMD AND PYTEST_CMD)
# To run one test with debug:
# cmake . -DPYTEST_OPT="-ss;test_cluster_copier"
add_test(NAME integration-pytest WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND env ${TEST_USE_BINARIES} "CLICKHOUSE_TESTS_BASE_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/programs/server/" ${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT})
add_test(NAME integration-pytest WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND env ${TEST_USE_BINARIES} "CLICKHOUSE_TESTS_BASE_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/programs/server/" "CLICKHOUSE_TESTS_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/tests/config/" ${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT})
message(STATUS "Using tests in docker DOCKER=${DOCKER_CMD}; DOCKER_COMPOSE=${DOCKER_COMPOSE_CMD}; PYTEST=${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT}")
endif()
endif()

View File

@ -0,0 +1,4 @@
<?xml version="1.0"?>
<yandex>
<dictionaries_config>/etc/clickhouse-server/dictionaries/*.xml</dictionaries_config>
</yandex>

View File

@ -71,7 +71,7 @@ class CommandRequest:
self.stderr_file = tempfile.TemporaryFile()
self.ignore_error = ignore_error
#print " ".join(command)
# print " ".join(command)
# we suppress stderror on client becase sometimes thread sanitizer
# can print some debug information there

View File

@ -1,25 +1,25 @@
import base64
import cassandra.cluster
import distutils.dir_util
import docker
import errno
import httplib
import logging
import os
import os.path as p
import pprint
import psycopg2
import pwd
import pymongo
import pymysql
import re
import requests
import shutil
import socket
import subprocess
import time
import urllib
import httplib
import requests
import xml.dom.minidom
import logging
import docker
import pprint
import psycopg2
import pymongo
import pymysql
import cassandra.cluster
from dicttoxml import dicttoxml
from kazoo.client import KazooClient
from kazoo.exceptions import KazooException
@ -88,12 +88,14 @@ class ClickHouseCluster:
these directories will contain logs, database files, docker-compose config, ClickHouse configs etc.
"""
def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None,
def __init__(self, base_path, name=None, base_config_dir=None, server_bin_path=None, client_bin_path=None,
odbc_bridge_bin_path=None, zookeeper_config_path=None, custom_dockerd_host=None):
for param in os.environ.keys():
print "ENV %40s %s" % (param,os.environ[param])
self.base_dir = p.dirname(base_path)
self.name = name if name is not None else ''
self.base_configs_dir = base_configs_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR',
self.base_config_dir = base_config_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR',
'/etc/clickhouse-server/')
self.server_bin_path = p.realpath(
server_bin_path or os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH', '/usr/bin/clickhouse'))
@ -155,6 +157,7 @@ class ClickHouseCluster:
self.docker_client = None
self.is_up = False
print "CLUSTER INIT base_config_dir:{}".format(self.base_config_dir)
def get_client_cmd(self):
cmd = self.client_bin_path
@ -162,7 +165,7 @@ class ClickHouseCluster:
cmd += " client"
return cmd
def add_instance(self, name, config_dir=None, main_configs=None, user_configs=None, macros=None,
def add_instance(self, name, base_config_dir=None, main_configs=None, user_configs=None, dictionaries = None, macros=None,
with_zookeeper=False, with_mysql=False, with_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None,
with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False,
with_redis=False, with_minio=False, with_cassandra=False,
@ -172,7 +175,7 @@ class ClickHouseCluster:
"""Add an instance to the cluster.
name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
config_dir - a directory with config files which content will be copied to /etc/clickhouse-server/ directory
base_config_dir - a directory with config.xml and users.xml files which will be copied to /etc/clickhouse-server/ directory
main_configs - a list of config files that will be added to config.d/ directory
user_configs - a list of config files that will be added to users.d/ directory
with_zookeeper - if True, add ZooKeeper configuration to configs and ZooKeeper instances to the cluster.
@ -188,14 +191,36 @@ class ClickHouseCluster:
tag = self.docker_base_tag
instance = ClickHouseInstance(
self, self.base_dir, name, config_dir, main_configs or [], user_configs or [], macros or {},
with_zookeeper,
self.zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, with_cassandra,
self.base_configs_dir, self.server_bin_path,
self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname,
env_variables=env_variables or {}, image=image, tag=tag, stay_alive=stay_alive, ipv4_address=ipv4_address,
cluster=self,
base_path=self.base_dir,
name=name,
base_config_dir=base_config_dir if base_config_dir else self.base_config_dir,
custom_main_configs=main_configs or [],
custom_user_configs=user_configs or [],
custom_dictionaries=dictionaries or [],
macros=macros or {},
with_zookeeper=with_zookeeper,
zookeeper_config_path=self.zookeeper_config_path,
with_mysql=with_mysql,
with_kafka=with_kafka,
with_rabbitmq=with_rabbitmq,
with_mongo=with_mongo,
with_redis=with_redis,
with_minio=with_minio,
with_cassandra=with_cassandra,
server_bin_path=self.server_bin_path,
odbc_bridge_bin_path=self.odbc_bridge_bin_path,
clickhouse_path_dir=clickhouse_path_dir,
with_odbc_drivers=with_odbc_drivers,
hostname=hostname,
env_variables=env_variables or {},
image=image,
tag=tag,
stay_alive=stay_alive,
ipv4_address=ipv4_address,
ipv6_address=ipv6_address,
with_installed_binary=with_installed_binary, tmpfs=tmpfs or [])
with_installed_binary=with_installed_binary,
tmpfs=tmpfs or [])
docker_compose_yml_dir = get_docker_compose_path()
@ -462,19 +487,19 @@ class ClickHouseCluster:
try:
minio_client.list_buckets()
logging.info("Connected to Minio.")
print("Connected to Minio.")
if minio_client.bucket_exists(self.minio_bucket):
minio_client.remove_bucket(self.minio_bucket)
minio_client.make_bucket(self.minio_bucket)
logging.info("S3 bucket '%s' created", self.minio_bucket)
print("S3 bucket '%s' created", self.minio_bucket)
self.minio_client = minio_client
return
except Exception as ex:
logging.warning("Can't connect to Minio: %s", str(ex))
print("Can't connect to Minio: %s", str(ex))
time.sleep(1)
raise Exception("Can't wait Minio to start")
@ -486,10 +511,10 @@ class ClickHouseCluster:
try:
sr_client._send_request(sr_client.url)
self.schema_registry_client = sr_client
logging.info("Connected to SchemaRegistry")
print("Connected to SchemaRegistry")
return
except Exception as ex:
logging.warning("Can't connect to SchemaRegistry: %s", str(ex))
print("Can't connect to SchemaRegistry: %s", str(ex))
time.sleep(1)
def wait_cassandra_to_start(self, timeout=30):
@ -505,25 +530,27 @@ class ClickHouseCluster:
time.sleep(1)
def start(self, destroy_dirs=True):
print "Cluster start called. is_up={}, destroy_dirs={}".format(self.is_up, destroy_dirs)
if self.is_up:
return
# Just in case kill unstopped containers from previous launch
try:
logging.info("Trying to kill unstopped containers...")
print("Trying to kill unstopped containers...")
if not subprocess_call(['docker-compose', 'kill']):
subprocess_call(['docker-compose', 'down', '--volumes'])
logging.info("Unstopped containers killed")
print("Unstopped containers killed")
except:
pass
try:
if destroy_dirs and p.exists(self.instances_dir):
logging.info("Removing instances dir %s", self.instances_dir)
print("Removing instances dir %s", self.instances_dir)
shutil.rmtree(self.instances_dir)
for instance in self.instances.values():
print('Setup directory for instance: {} destroy_dirs: {}'.format(instance.name, destroy_dirs))
instance.create_dir(destroy_dir=destroy_dirs)
self.docker_client = docker.from_env(version=self.docker_api_version)
@ -531,6 +558,7 @@ class ClickHouseCluster:
common_opts = ['up', '-d', '--force-recreate']
if self.with_zookeeper and self.base_zookeeper_cmd:
print('Setup ZooKeeper')
env = os.environ.copy()
if not self.zookeeper_use_tmpfs:
env['ZK_FS'] = 'bind'
@ -549,14 +577,17 @@ class ClickHouseCluster:
self.wait_zookeeper_to_start(120)
if self.with_mysql and self.base_mysql_cmd:
print('Setup MySQL')
subprocess_check_call(self.base_mysql_cmd + common_opts)
self.wait_mysql_to_start(120)
if self.with_postgres and self.base_postgres_cmd:
print('Setup Postgres')
subprocess_check_call(self.base_postgres_cmd + common_opts)
self.wait_postgres_to_start(120)
if self.with_kafka and self.base_kafka_cmd:
print('Setup Kafka')
subprocess_check_call(self.base_kafka_cmd + common_opts + ['--renew-anon-volumes'])
self.kafka_docker_id = self.get_instance_docker_id('kafka1')
self.wait_schema_registry_to_start(120)
@ -566,14 +597,17 @@ class ClickHouseCluster:
self.rabbitmq_docker_id = self.get_instance_docker_id('rabbitmq1')
if self.with_hdfs and self.base_hdfs_cmd:
print('Setup HDFS')
subprocess_check_call(self.base_hdfs_cmd + common_opts)
self.wait_hdfs_to_start(120)
if self.with_mongo and self.base_mongo_cmd:
print('Setup Mongo')
subprocess_check_call(self.base_mongo_cmd + common_opts)
self.wait_mongo_to_start(30)
if self.with_redis and self.base_redis_cmd:
print('Setup Redis')
subprocess_check_call(self.base_redis_cmd + ['up', '-d', '--force-recreate'])
time.sleep(10)
@ -612,18 +646,19 @@ class ClickHouseCluster:
self.wait_cassandra_to_start()
clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
print("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
subprocess_check_call(clickhouse_start_cmd)
logging.info("ClickHouse instance created")
print("ClickHouse instance created")
start_deadline = time.time() + 20.0 # seconds
for instance in self.instances.itervalues():
instance.docker_client = self.docker_client
instance.ip_address = self.get_instance_ip(instance.name)
logging.info("Waiting for ClickHouse start...")
print("Waiting for ClickHouse start...")
instance.wait_for_start(start_deadline)
logging.info("ClickHouse started")
print("ClickHouse started")
instance.client = Client(instance.ip_address, command=self.client_bin_path)
@ -637,7 +672,10 @@ class ClickHouseCluster:
def shutdown(self, kill=True):
sanitizer_assert_instance = None
with open(self.docker_logs_path, "w+") as f:
subprocess.check_call(self.base_cmd + ['logs'], stdout=f)
try:
subprocess.check_call(self.base_cmd + ['logs'], stdout=f)
except Exception as e:
print "Unable to get logs from docker."
f.seek(0)
for line in f:
if SANITIZER_SIGN in line:
@ -645,8 +683,15 @@ class ClickHouseCluster:
break
if kill:
subprocess_check_call(self.base_cmd + ['kill'])
subprocess_check_call(self.base_cmd + ['down', '--volumes', '--remove-orphans'])
try:
subprocess_check_call(self.base_cmd + ['kill'])
except Exception as e:
print "Kill command failed durung shutdown. {}".format(repr(e))
try:
subprocess_check_call(self.base_cmd + ['down', '--volumes', '--remove-orphans'])
except Exception as e:
print "Down + remove orphans failed durung shutdown. {}".format(repr(e))
self.is_up = False
@ -711,7 +756,7 @@ services:
image: {image}:{tag}
hostname: {hostname}
volumes:
- {configs_dir}:/etc/clickhouse-server/
- {instance_config_dir}:/etc/clickhouse-server/
- {db_dir}:/var/lib/clickhouse/
- {logs_dir}:/var/log/clickhouse-server/
{binary_volume}
@ -744,10 +789,9 @@ services:
class ClickHouseInstance:
def __init__(
self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros,
with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo,
with_redis, with_minio, with_cassandra, base_configs_dir, server_bin_path, odbc_bridge_bin_path,
clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None,
self, cluster, base_path, name, base_config_dir, custom_main_configs, custom_user_configs, custom_dictionaries,
macros, with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio,
with_cassandra, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None,
image="yandex/clickhouse-integration-test", tag="latest",
stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None):
@ -758,15 +802,15 @@ class ClickHouseInstance:
self.hostname = hostname if hostname is not None else self.name
self.tmpfs = tmpfs or []
self.custom_config_dir = p.abspath(p.join(base_path, custom_config_dir)) if custom_config_dir else None
self.base_config_dir = p.abspath(p.join(base_path, base_config_dir)) if base_config_dir else None
self.custom_main_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_main_configs]
self.custom_user_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_user_configs]
self.custom_dictionaries_paths = [p.abspath(p.join(base_path, c)) for c in custom_dictionaries]
self.clickhouse_path_dir = p.abspath(p.join(base_path, clickhouse_path_dir)) if clickhouse_path_dir else None
self.macros = macros if macros is not None else {}
self.with_zookeeper = with_zookeeper
self.zookeeper_config_path = zookeeper_config_path
self.base_configs_dir = base_configs_dir
self.server_bin_path = server_bin_path
self.odbc_bridge_bin_path = odbc_bridge_bin_path
@ -782,7 +826,7 @@ class ClickHouseInstance:
self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
self.env_variables = env_variables or {}
if with_odbc_drivers:
self.odbc_ini_path = os.path.dirname(self.docker_compose_path) + "/odbc.ini:/etc/odbc.ini"
self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini"
self.with_mysql = True
else:
self.odbc_ini_path = ""
@ -985,7 +1029,7 @@ class ClickHouseInstance:
time_left = deadline - current_time
if deadline is not None and current_time >= deadline:
raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
"Container status: {}".format(self.name, self.ip_address, status))
"Container status: {}, logs: {}".format(self.name, self.ip_address, status, handle.logs()))
# Repeatedly poll the instance address until there is something that listens there.
# Usually it means that ClickHouse is ready to accept queries.
@ -1067,40 +1111,46 @@ class ClickHouseInstance:
os.makedirs(self.path)
configs_dir = p.abspath(p.join(self.path, 'configs'))
os.mkdir(configs_dir)
instance_config_dir = p.abspath(p.join(self.path, 'configs'))
os.makedirs(instance_config_dir)
shutil.copy(p.join(self.base_configs_dir, 'config.xml'), configs_dir)
shutil.copy(p.join(self.base_configs_dir, 'users.xml'), configs_dir)
print "Copy common default production configuration from {}".format(self.base_config_dir)
shutil.copyfile(p.join(self.base_config_dir, 'config.xml'), p.join(instance_config_dir, 'config.xml'))
shutil.copyfile(p.join(self.base_config_dir, 'users.xml'), p.join(instance_config_dir, 'users.xml'))
print "Create directory for configuration generated in this helper"
# used by all utils with any config
conf_d_dir = p.abspath(p.join(configs_dir, 'conf.d'))
# used by server with main config.xml
self.config_d_dir = p.abspath(p.join(configs_dir, 'config.d'))
users_d_dir = p.abspath(p.join(configs_dir, 'users.d'))
conf_d_dir = p.abspath(p.join(instance_config_dir, 'conf.d'))
os.mkdir(conf_d_dir)
os.mkdir(self.config_d_dir)
os.mkdir(users_d_dir)
print "Create directory for common tests configuration"
# used by server with main config.xml
self.config_d_dir = p.abspath(p.join(instance_config_dir, 'config.d'))
os.mkdir(self.config_d_dir)
users_d_dir = p.abspath(p.join(instance_config_dir, 'users.d'))
os.mkdir(users_d_dir)
dictionaries_dir = p.abspath(p.join(instance_config_dir, 'dictionaries'))
os.mkdir(dictionaries_dir)
print "Copy common configuration from helpers"
# The file is named with 0_ prefix to be processed before other configuration overloads.
shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), self.config_d_dir)
shutil.copy(p.join(HELPERS_DIR, '0_common_instance_users.xml'), users_d_dir)
if len(self.custom_dictionaries_paths):
shutil.copy(p.join(HELPERS_DIR, '0_common_enable_dictionaries.xml'), self.config_d_dir)
# Generate and write macros file
print "Generate and write macros file"
macros = self.macros.copy()
macros['instance'] = self.name
with open(p.join(self.config_d_dir, 'macros.xml'), 'w') as macros_config:
with open(p.join(conf_d_dir, 'macros.xml'), 'w') as macros_config:
macros_config.write(self.dict_to_xml({"macros": macros}))
# Put ZooKeeper config
if self.with_zookeeper:
shutil.copy(self.zookeeper_config_path, conf_d_dir)
# Copy config dir
if self.custom_config_dir:
distutils.dir_util.copy_tree(self.custom_config_dir, configs_dir)
# Copy config.d configs
print "Copy custom test config files {} to {}".format(self.custom_main_config_paths, self.config_d_dir)
for path in self.custom_main_config_paths:
shutil.copy(path, self.config_d_dir)
@ -1108,12 +1158,19 @@ class ClickHouseInstance:
for path in self.custom_user_config_paths:
shutil.copy(path, users_d_dir)
# Copy dictionaries configs to configs/dictionaries
for path in self.custom_dictionaries_paths:
shutil.copy(path, dictionaries_dir)
db_dir = p.abspath(p.join(self.path, 'database'))
print "Setup database dir {}".format(db_dir)
os.mkdir(db_dir)
if self.clickhouse_path_dir is not None:
print "Database files taken from {}".format(self.clickhouse_path_dir)
distutils.dir_util.copy_tree(self.clickhouse_path_dir, db_dir)
logs_dir = p.abspath(p.join(self.path, 'logs'))
print "Setup logs dir {}".format(logs_dir)
os.mkdir(logs_dir)
depends_on = []
@ -1138,6 +1195,8 @@ class ClickHouseInstance:
env_file = _create_env_file(os.path.dirname(self.docker_compose_path), self.env_variables)
print "Env {} stored in {}".format(self.env_variables, env_file)
odbc_ini_path = ""
if self.odbc_ini_path:
self._create_odbc_config_file()
@ -1148,6 +1207,8 @@ class ClickHouseInstance:
if self.stay_alive:
entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND
print "Entrypoint cmd: {}".format(entrypoint_cmd)
networks = app_net = ipv4_address = ipv6_address = net_aliases = net_alias1 = ""
if self.ipv4_address is not None or self.ipv6_address is not None or self.hostname != self.name:
networks = "networks:"
@ -1167,6 +1228,7 @@ class ClickHouseInstance:
binary_volume = "- " + self.server_bin_path + ":/usr/share/clickhouse_fresh"
odbc_bridge_volume = "- " + self.odbc_bridge_bin_path + ":/usr/share/clickhouse-odbc-bridge_fresh"
with open(self.docker_compose_path, 'w') as docker_compose:
docker_compose.write(DOCKER_COMPOSE_TEMPLATE.format(
image=self.image,
@ -1175,7 +1237,7 @@ class ClickHouseInstance:
hostname=self.hostname,
binary_volume=binary_volume,
odbc_bridge_volume=odbc_bridge_volume,
configs_dir=configs_dir,
instance_config_dir=instance_config_dir,
config_d_dir=self.config_d_dir,
db_dir=db_dir,
tmpfs=str(self.tmpfs),

View File

@ -0,0 +1,197 @@
<dictionaries>
<dictionary>
<name>flat_decimals</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>decimals</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<flat/>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>d32</name>
<type>Decimal32(4)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d64</name>
<type>Decimal64(6)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d128</name>
<type>Decimal128(1)</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>hashed_decimals</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>decimals</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<hashed/>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>d32</name>
<type>Decimal32(4)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d64</name>
<type>Decimal64(6)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d128</name>
<type>Decimal128(1)</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>cache_decimals</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>decimals</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<cache><size_in_cells>1000</size_in_cells></cache>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>d32</name>
<type>Decimal32(4)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d64</name>
<type>Decimal64(6)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d128</name>
<type>Decimal128(1)</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_hashed_decimals</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>decimals</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_hashed/>
</layout>
<structure>
<key>
<attribute>
<name>key</name>
<type>UInt64</type>
</attribute>
</key>
<attribute>
<name>d32</name>
<type>Decimal32(4)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d64</name>
<type>Decimal64(6)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d128</name>
<type>Decimal128(1)</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_cache_decimals</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>decimals</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
</layout>
<structure>
<key>
<attribute>
<name>key</name>
<type>UInt64</type>
</attribute>
</key>
<attribute>
<name>d32</name>
<type>Decimal32(4)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d64</name>
<type>Decimal64(6)</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>d128</name>
<type>Decimal128(1)</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
</dictionaries>

View File

@ -0,0 +1,514 @@
<dictionaries>
<dictionary>
<name>flat_ints</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<flat/>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>hashed_ints</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<hashed/>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>hashed_sparse_ints</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<sparse_hashed/>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>cache_ints</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<cache><size_in_cells>1000</size_in_cells></cache>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_hashed_ints</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_hashed/>
</layout>
<structure>
<key>
<attribute>
<name>key</name>
<type>UInt64</type>
</attribute>
</key>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_cache_ints</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
</layout>
<structure>
<key>
<attribute>
<name>key</name>
<type>UInt64</type>
</attribute>
</key>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>one_cell_cache_ints</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>test_01054</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<cache><size_in_cells>1</size_in_cells></cache>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>one_cell_cache_ints_overflow</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>test_01054_overflow</db>
<table>ints</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<cache><size_in_cells>1</size_in_cells></cache>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>i8</name>
<type>Int8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i16</name>
<type>Int16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i32</name>
<type>Int32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>i64</name>
<type>Int64</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u8</name>
<type>UInt8</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u16</name>
<type>UInt16</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u32</name>
<type>UInt32</type>
<null_value>0</null_value>
</attribute>
<attribute>
<name>u64</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
</dictionaries>

View File

@ -0,0 +1,209 @@
<dictionaries>
<dictionary>
<name>flat_strings</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>strings</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<flat/>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>str</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>hashed_strings</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>strings</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<hashed/>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>str</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>cache_strings</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>strings</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<cache><size_in_cells>1000</size_in_cells></cache>
</layout>
<structure>
<id>
<name>key</name>
</id>
<attribute>
<name>str</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_hashed_strings</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>strings</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_hashed/>
</layout>
<structure>
<key>
<attribute>
<name>key</name>
<type>UInt64</type>
</attribute>
</key>
<attribute>
<name>str</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_cache_strings</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>strings</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
</layout>
<structure>
<key>
<attribute>
<name>key</name>
<type>UInt64</type>
</attribute>
</key>
<attribute>
<name>str</name>
<type>String</type>
<null_value></null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_hashed_strings_key</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>strings</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_hashed/>
</layout>
<structure>
<key>
<attribute>
<name>str</name>
<type>String</type>
</attribute>
</key>
<attribute>
<name>key</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
<dictionary>
<name>complex_cache_strings_key</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>system</db>
<table>strings</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
</layout>
<structure>
<key>
<attribute>
<name>str</name>
<type>String</type>
</attribute>
</key>
<attribute>
<name>key</name>
<type>UInt64</type>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
</dictionaries>

View File

@ -11,6 +11,9 @@ class TSV:
raw_lines = contents.splitlines(True)
elif isinstance(contents, list):
raw_lines = ['\t'.join(map(str, l)) if isinstance(l, list) else str(l) for l in contents]
elif isinstance(contents, TSV):
self.lines = contents.lines
return
else:
raise TypeError("contents must be either file or string or list, actual type: " + type(contents).__name__)
self.lines = [l.strip() for l in raw_lines if l.strip()]

View File

@ -53,7 +53,7 @@ def check_args_and_update_paths(args):
logging.info("base_configs_dir: {}, binary: {}, cases_dir: {} ".format(args.base_configs_dir, args.binary, args.cases_dir))
for path in [args.binary, args.base_configs_dir, args.cases_dir, CLICKHOUSE_ROOT]:
for path in [args.binary, args.bridge_binary, args.base_configs_dir, args.cases_dir, CLICKHOUSE_ROOT]:
if not os.path.exists(path):
raise Exception("Path {} doesn't exist".format(path))

View File

@ -4,9 +4,9 @@ from helpers.cluster import ClickHouseCluster
from helpers.client import QueryRuntimeException
cluster = ClickHouseCluster(__file__)
ch1 = cluster.add_instance('ch1', config_dir="configs", with_zookeeper=True)
ch2 = cluster.add_instance('ch2', config_dir="configs", with_zookeeper=True)
ch3 = cluster.add_instance('ch3', config_dir="configs", with_zookeeper=True)
ch1 = cluster.add_instance('ch1', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
ch2 = cluster.add_instance('ch2', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
ch3 = cluster.add_instance('ch3', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
@pytest.fixture(scope="module", autouse=True)
def started_cluster():

View File

@ -9,23 +9,23 @@ from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node2 = cluster.add_instance('node2', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node3 = cluster.add_instance('node3', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', with_installed_binary=True)
node4 = cluster.add_instance('node4', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', with_installed_binary=True)
node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node5 = cluster.add_instance('node5', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', with_installed_binary=True)
node6 = cluster.add_instance('node6', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node5 = cluster.add_instance('node5', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', with_installed_binary=True)
node6 = cluster.add_instance('node6', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
node7 = cluster.add_instance('node7', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
node8 = cluster.add_instance('node8', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
node7 = cluster.add_instance('node7', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
node8 = cluster.add_instance('node8', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
node9 = cluster.add_instance('node9', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
node10 = cluster.add_instance('node10', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
node9 = cluster.add_instance('node9', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
node10 = cluster.add_instance('node10', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
node11 = cluster.add_instance('node11', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
node12 = cluster.add_instance('node12', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
node11 = cluster.add_instance('node11', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
node12 = cluster.add_instance('node12', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
def prepare_single_pair_with_setting(first_node, second_node, group):

View File

@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
server = cluster.add_instance('server', config_dir="configs")
server = cluster.add_instance('server', user_configs=["configs/users.d/network.xml"])
clientA1 = cluster.add_instance('clientA1', hostname = 'clientA1.com')
clientA2 = cluster.add_instance('clientA2', hostname = 'clientA2.com')
@ -20,7 +20,12 @@ clientD2 = cluster.add_instance('clientD2', hostname = 'xxx.clientD0002.ru')
clientD3 = cluster.add_instance('clientD3', hostname = 'clientD0003.ru')
def check_clickhouse_is_ok(client_node, server_node):
assert client_node.exec_in_container(["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]) == "Ok.\n"
def query_from_one_node_to_another(client_node, server_node, query):
check_clickhouse_is_ok(client_node, server_node)
return client_node.exec_in_container(["bash", "-c", "/usr/bin/clickhouse client --host {} --query {!r}".format(server_node.hostname, query)])
@ -56,5 +61,6 @@ def test_allowed_host():
for client_node in expected_to_fail:
with pytest.raises(Exception) as e:
query_from_one_node_to_another(client_node, server, "SELECT * FROM test_table")
result = query_from_one_node_to_another(client_node, server, "SELECT * FROM test_table")
print("Client node: {} Server node: {} Result: {}".format(client_node, server_node, result))
assert "default: Authentication failed" in str(e)

View File

@ -40,7 +40,7 @@ def test_config_with_only_regexp_hosts(start_cluster):
assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == ""
assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)")
assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)")
assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)")
def test_config_without_allowed_hosts(start_cluster):
assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == ""
@ -49,18 +49,18 @@ def test_config_without_allowed_hosts(start_cluster):
assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('ftp://something.com', S3)") == ""
def test_table_function_remote(start_cluster):
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-1', system, events", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
assert node6.query("SELECT * FROM remote('localhost', system, events)") != ""
assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != ""
assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remoteSecure('localhost:800', system, events)")
assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remote('localhost:800', system, metrics)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-1', system, events")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)")
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)")
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)")
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)")
def test_redirect(start_cluster):
hdfs_api = HDFSApi("root")

View File

@ -6,11 +6,9 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1',
config_dir='configs',
main_configs=['configs/logs_config.xml'])
node2 = cluster.add_instance('node2',
config_dir='configs',
main_configs=['configs/logs_config.xml'])

View File

@ -6,7 +6,7 @@ from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', config_dir="configs", with_zookeeper=True)
node1 = cluster.add_instance('node1', main_configs=["configs/config.d/zookeeper_session_timeout.xml", "configs/remote_servers.xml"], with_zookeeper=True)
@pytest.fixture(scope="module")

View File

@ -1,80 +1,74 @@
<?xml version="1.0"?>
<yandex>
<remote_servers>
<cluster0>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s0_0_1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_1_0</host>
<port>9000</port>
</replica>
</shard>
</cluster0>
<cluster1>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s1_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s1_0_1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s1_1_0</host>
<port>9000</port>
</replica>
</shard>
</cluster1>
<shard_0_0>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s0_0_1</host>
<port>9000</port>
</replica>
</shard>
</shard_0_0>
<source_trivial_cluster>
<shard>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
</shard>
</source_trivial_cluster>
<destination_trivial_cluster>
<shard>
<replica>
<host>s1_0_0</host>
<port>9000</port>
</replica>
</shard>
</destination_trivial_cluster>
</remote_servers>
<remote_servers>
<cluster0>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s0_0_1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_1_0</host>
<port>9000</port>
</replica>
</shard>
</cluster0>
<cluster1>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s1_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s1_0_1</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s1_1_0</host>
<port>9000</port>
</replica>
</shard>
</cluster1>
<shard_0_0>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
<replica>
<host>s0_0_1</host>
<port>9000</port>
</replica>
</shard>
</shard_0_0>
<source_trivial_cluster>
<shard>
<replica>
<host>s0_0_0</host>
<port>9000</port>
</replica>
</shard>
</source_trivial_cluster>
<destination_trivial_cluster>
<shard>
<replica>
<host>s1_0_0</host>
<port>9000</port>
</replica>
</shard>
</destination_trivial_cluster>
</remote_servers>
</yandex>

View File

@ -54,7 +54,8 @@ def started_cluster():
for replica_name in replicas:
name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name)
cluster.add_instance(name,
config_dir="configs",
main_configs=["configs/conf.d/query_log.xml", "configs/conf.d/ddl.xml", "configs/conf.d/clusters.xml"],
user_configs=["configs/users.xml"],
macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name},
with_zookeeper=True)
@ -226,6 +227,7 @@ def execute_task(task, cmd_options):
zk.ensure_path(zk_task_path)
zk.create(zk_task_path + "/description", task.copier_task_config)
# Run cluster-copier processes on each node
docker_api = docker.from_env().api
copiers_exec_ids = []
@ -241,9 +243,11 @@ def execute_task(task, cmd_options):
for instance_name in copiers:
instance = cluster.instances[instance_name]
container = instance.get_docker_handle()
instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), "/etc/clickhouse-server/config-copier.xml")
print "Copied copier config to {}".format(instance.name)
exec_id = docker_api.exec_create(container.id, cmd, stderr=True)
docker_api.exec_start(exec_id, detach=True)
output = docker_api.exec_start(exec_id).decode('utf8')
print(output)
copiers_exec_ids.append(exec_id)
print "Copier for {} ({}) has started".format(instance.name, instance.ip_address)

View File

@ -34,7 +34,7 @@ def started_cluster():
for replica_name in replicas:
name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name)
cluster.add_instance(name,
config_dir="configs",
main_configs=[], user_configs=[],
macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name},
with_zookeeper=True)

Some files were not shown because too many files have changed in this diff Show More