mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 10:10:50 +00:00
Merge master
This commit is contained in:
commit
35ab56ac95
4
.gitignore
vendored
4
.gitignore
vendored
@ -33,6 +33,10 @@ CTestTestfile.cmake
|
||||
*.a
|
||||
*.o
|
||||
|
||||
# Python cache
|
||||
*.pyc
|
||||
__pycache__
|
||||
|
||||
# ignore generated files
|
||||
*-metrika-yandex
|
||||
|
||||
|
150
contrib/libboost/boost_1_62_0/boost/type_traits.hpp
Normal file
150
contrib/libboost/boost_1_62_0/boost/type_traits.hpp
Normal file
@ -0,0 +1,150 @@
|
||||
// (C) Copyright John Maddock 2000.
|
||||
// Use, modification and distribution are subject to the Boost Software License,
|
||||
// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt).
|
||||
//
|
||||
// See http://www.boost.org/libs/type_traits for most recent version including documentation.
|
||||
|
||||
// See boost/type_traits/*.hpp for full copyright notices.
|
||||
|
||||
#ifndef BOOST_TYPE_TRAITS_HPP
|
||||
#define BOOST_TYPE_TRAITS_HPP
|
||||
|
||||
#include <boost/type_traits/add_const.hpp>
|
||||
#include <boost/type_traits/add_cv.hpp>
|
||||
#include <boost/type_traits/add_lvalue_reference.hpp>
|
||||
#include <boost/type_traits/add_pointer.hpp>
|
||||
#include <boost/type_traits/add_reference.hpp>
|
||||
#include <boost/type_traits/add_rvalue_reference.hpp>
|
||||
#include <boost/type_traits/add_volatile.hpp>
|
||||
#include <boost/type_traits/aligned_storage.hpp>
|
||||
#include <boost/type_traits/alignment_of.hpp>
|
||||
#include <boost/type_traits/common_type.hpp>
|
||||
#include <boost/type_traits/conditional.hpp>
|
||||
#include <boost/type_traits/copy_cv.hpp>
|
||||
#include <boost/type_traits/decay.hpp>
|
||||
#include <boost/type_traits/declval.hpp>
|
||||
#include <boost/type_traits/extent.hpp>
|
||||
#include <boost/type_traits/floating_point_promotion.hpp>
|
||||
#include <boost/type_traits/function_traits.hpp>
|
||||
|
||||
#include <boost/type_traits/has_bit_and.hpp>
|
||||
#include <boost/type_traits/has_bit_and_assign.hpp>
|
||||
#include <boost/type_traits/has_bit_or.hpp>
|
||||
#include <boost/type_traits/has_bit_or_assign.hpp>
|
||||
#include <boost/type_traits/has_bit_xor.hpp>
|
||||
#include <boost/type_traits/has_bit_xor_assign.hpp>
|
||||
#include <boost/type_traits/has_complement.hpp>
|
||||
#include <boost/type_traits/has_dereference.hpp>
|
||||
#include <boost/type_traits/has_divides.hpp>
|
||||
#include <boost/type_traits/has_divides_assign.hpp>
|
||||
#include <boost/type_traits/has_equal_to.hpp>
|
||||
#include <boost/type_traits/has_greater.hpp>
|
||||
#include <boost/type_traits/has_greater_equal.hpp>
|
||||
#include <boost/type_traits/has_left_shift.hpp>
|
||||
#include <boost/type_traits/has_left_shift_assign.hpp>
|
||||
#include <boost/type_traits/has_less.hpp>
|
||||
#include <boost/type_traits/has_less_equal.hpp>
|
||||
#include <boost/type_traits/has_logical_and.hpp>
|
||||
#include <boost/type_traits/has_logical_not.hpp>
|
||||
#include <boost/type_traits/has_logical_or.hpp>
|
||||
#include <boost/type_traits/has_minus.hpp>
|
||||
#include <boost/type_traits/has_minus_assign.hpp>
|
||||
#include <boost/type_traits/has_modulus.hpp>
|
||||
#include <boost/type_traits/has_modulus_assign.hpp>
|
||||
#include <boost/type_traits/has_multiplies.hpp>
|
||||
#include <boost/type_traits/has_multiplies_assign.hpp>
|
||||
#include <boost/type_traits/has_negate.hpp>
|
||||
#if !defined(__BORLANDC__) && !defined(__CUDACC__)
|
||||
#include <boost/type_traits/has_new_operator.hpp>
|
||||
#endif
|
||||
#include <boost/type_traits/has_not_equal_to.hpp>
|
||||
#include <boost/type_traits/has_nothrow_assign.hpp>
|
||||
#include <boost/type_traits/has_nothrow_constructor.hpp>
|
||||
#include <boost/type_traits/has_nothrow_copy.hpp>
|
||||
#include <boost/type_traits/has_nothrow_destructor.hpp>
|
||||
#include <boost/type_traits/has_plus.hpp>
|
||||
#include <boost/type_traits/has_plus_assign.hpp>
|
||||
#include <boost/type_traits/has_post_decrement.hpp>
|
||||
#include <boost/type_traits/has_post_increment.hpp>
|
||||
#include <boost/type_traits/has_pre_decrement.hpp>
|
||||
#include <boost/type_traits/has_pre_increment.hpp>
|
||||
#include <boost/type_traits/has_right_shift.hpp>
|
||||
#include <boost/type_traits/has_right_shift_assign.hpp>
|
||||
#include <boost/type_traits/has_trivial_assign.hpp>
|
||||
#include <boost/type_traits/has_trivial_constructor.hpp>
|
||||
#include <boost/type_traits/has_trivial_copy.hpp>
|
||||
#include <boost/type_traits/has_trivial_destructor.hpp>
|
||||
#include <boost/type_traits/has_trivial_move_assign.hpp>
|
||||
#include <boost/type_traits/has_trivial_move_constructor.hpp>
|
||||
#include <boost/type_traits/has_unary_minus.hpp>
|
||||
#include <boost/type_traits/has_unary_plus.hpp>
|
||||
#include <boost/type_traits/has_virtual_destructor.hpp>
|
||||
|
||||
#include <boost/type_traits/integral_constant.hpp>
|
||||
|
||||
#include <boost/type_traits/is_abstract.hpp>
|
||||
#include <boost/type_traits/is_arithmetic.hpp>
|
||||
#include <boost/type_traits/is_array.hpp>
|
||||
#include <boost/type_traits/is_assignable.hpp>
|
||||
#include <boost/type_traits/is_base_and_derived.hpp>
|
||||
#include <boost/type_traits/is_base_of.hpp>
|
||||
#include <boost/type_traits/is_class.hpp>
|
||||
#include <boost/type_traits/is_complex.hpp>
|
||||
#include <boost/type_traits/is_compound.hpp>
|
||||
#include <boost/type_traits/is_const.hpp>
|
||||
#include <boost/type_traits/is_constructible.hpp>
|
||||
#include <boost/type_traits/is_convertible.hpp>
|
||||
#include <boost/type_traits/is_copy_assignable.hpp>
|
||||
#include <boost/type_traits/is_copy_constructible.hpp>
|
||||
#include <boost/type_traits/is_default_constructible.hpp>
|
||||
#include <boost/type_traits/is_destructible.hpp>
|
||||
#include <boost/type_traits/is_empty.hpp>
|
||||
#include <boost/type_traits/is_enum.hpp>
|
||||
#include <boost/type_traits/is_final.hpp>
|
||||
#include <boost/type_traits/is_float.hpp>
|
||||
#include <boost/type_traits/is_floating_point.hpp>
|
||||
#include <boost/type_traits/is_function.hpp>
|
||||
#include <boost/type_traits/is_fundamental.hpp>
|
||||
#include <boost/type_traits/is_integral.hpp>
|
||||
#include <boost/type_traits/is_lvalue_reference.hpp>
|
||||
#include <boost/type_traits/is_member_function_pointer.hpp>
|
||||
#include <boost/type_traits/is_member_object_pointer.hpp>
|
||||
#include <boost/type_traits/is_member_pointer.hpp>
|
||||
#include <boost/type_traits/is_nothrow_move_assignable.hpp>
|
||||
#include <boost/type_traits/is_nothrow_move_constructible.hpp>
|
||||
#include <boost/type_traits/is_object.hpp>
|
||||
#include <boost/type_traits/is_pod.hpp>
|
||||
#include <boost/type_traits/is_pointer.hpp>
|
||||
#include <boost/type_traits/is_polymorphic.hpp>
|
||||
#include <boost/type_traits/is_reference.hpp>
|
||||
#include <boost/type_traits/is_rvalue_reference.hpp>
|
||||
#include <boost/type_traits/is_same.hpp>
|
||||
#include <boost/type_traits/is_scalar.hpp>
|
||||
#include <boost/type_traits/is_signed.hpp>
|
||||
#include <boost/type_traits/is_stateless.hpp>
|
||||
#include <boost/type_traits/is_union.hpp>
|
||||
#include <boost/type_traits/is_unsigned.hpp>
|
||||
#include <boost/type_traits/is_virtual_base_of.hpp>
|
||||
#include <boost/type_traits/is_void.hpp>
|
||||
#include <boost/type_traits/is_volatile.hpp>
|
||||
#include <boost/type_traits/make_signed.hpp>
|
||||
#include <boost/type_traits/make_unsigned.hpp>
|
||||
#include <boost/type_traits/rank.hpp>
|
||||
#include <boost/type_traits/remove_all_extents.hpp>
|
||||
#include <boost/type_traits/remove_bounds.hpp>
|
||||
#include <boost/type_traits/remove_const.hpp>
|
||||
#include <boost/type_traits/remove_cv.hpp>
|
||||
#include <boost/type_traits/remove_extent.hpp>
|
||||
#include <boost/type_traits/remove_pointer.hpp>
|
||||
#include <boost/type_traits/remove_reference.hpp>
|
||||
#include <boost/type_traits/remove_volatile.hpp>
|
||||
#include <boost/type_traits/type_identity.hpp>
|
||||
#include <boost/type_traits/type_with_alignment.hpp>
|
||||
|
||||
#if !(defined(__sgi) && defined(__EDG_VERSION__) && (__EDG_VERSION__ == 238))
|
||||
#include <boost/type_traits/integral_promotion.hpp>
|
||||
#include <boost/type_traits/promote.hpp>
|
||||
#endif
|
||||
|
||||
#endif // BOOST_TYPE_TRAITS_HPP
|
@ -67,8 +67,8 @@ add_headers_only(dbms src/Server)
|
||||
list (APPEND dbms_sources ${CONFIG_BUILD})
|
||||
list (APPEND dbms_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
|
||||
|
||||
list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/DataTypeTraits.cpp)
|
||||
list (APPEND dbms_headers src/Functions/IFunction.h src/Functions/FunctionFactory.h src/Functions/DataTypeTraits.h)
|
||||
list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp)
|
||||
list (APPEND dbms_headers src/Functions/IFunction.h src/Functions/FunctionFactory.h)
|
||||
|
||||
list (APPEND dbms_sources
|
||||
src/AggregateFunctions/AggregateFunctionFactory.cpp
|
||||
@ -98,6 +98,7 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions
|
||||
list(REMOVE_ITEM dbms_sources
|
||||
src/Client/Client.cpp
|
||||
src/Client/Benchmark.cpp
|
||||
src/Client/PerformanceTest.cpp
|
||||
src/Storages/StorageCloud.cpp
|
||||
src/Databases/DatabaseCloud.cpp
|
||||
src/Common/StringUtils.cpp)
|
||||
|
@ -5,6 +5,9 @@ install (FILES config.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client CO
|
||||
add_library (clickhouse-benchmark Benchmark.cpp)
|
||||
target_link_libraries (clickhouse-benchmark dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
|
||||
|
||||
add_library (clickhouse-performance-test PerformanceTest.cpp)
|
||||
target_link_libraries (clickhouse-performance-test dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
add_subdirectory (tests)
|
||||
endif ()
|
||||
|
@ -28,8 +28,9 @@ public:
|
||||
public:
|
||||
virtual ~IConnectionPool() {}
|
||||
|
||||
/** Selects the connection to work. */
|
||||
virtual Entry get(const Settings * settings = nullptr) = 0;
|
||||
/// Selects the connection to work.
|
||||
/// If force_connected is false, the client must manually ensure that returned connection is good.
|
||||
virtual Entry get(const Settings * settings = nullptr, bool force_connected = true) = 0;
|
||||
};
|
||||
|
||||
using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
|
||||
@ -77,12 +78,18 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
Entry get(const Settings * settings = nullptr) override
|
||||
Entry get(const Settings * settings = nullptr, bool force_connected = true) override
|
||||
{
|
||||
Entry entry;
|
||||
if (settings)
|
||||
return Base::get(settings->queue_max_wait_ms.totalMilliseconds());
|
||||
entry = Base::get(settings->queue_max_wait_ms.totalMilliseconds());
|
||||
else
|
||||
return Base::get(-1);
|
||||
entry = Base::get(-1);
|
||||
|
||||
if (force_connected)
|
||||
entry->forceConnected();
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
const std::string & getHost() const
|
||||
|
@ -42,7 +42,7 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
|
||||
}
|
||||
}
|
||||
|
||||
IConnectionPool::Entry ConnectionPoolWithFailover::get(const Settings * settings)
|
||||
IConnectionPool::Entry ConnectionPoolWithFailover::get(const Settings * settings, bool force_connected)
|
||||
{
|
||||
TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
|
||||
{
|
||||
@ -131,7 +131,7 @@ ConnectionPoolWithFailover::tryGetEntry(
|
||||
TryResult result;
|
||||
try
|
||||
{
|
||||
result.entry = pool.get(settings);
|
||||
result.entry = pool.get(settings, /* force_connected = */ false);
|
||||
|
||||
String server_name;
|
||||
UInt64 server_version_major;
|
||||
|
@ -47,7 +47,7 @@ public:
|
||||
using Entry = IConnectionPool::Entry;
|
||||
|
||||
/** Allocates connection to work. */
|
||||
Entry get(const Settings * settings = nullptr) override; /// From IConnectionPool
|
||||
Entry get(const Settings * settings = nullptr, bool force_connected = true) override; /// From IConnectionPool
|
||||
|
||||
/** Allocates up to the specified number of connections to work.
|
||||
* Connections provide access to different replicas of one shard.
|
||||
|
1567
dbms/src/Client/PerformanceTest.cpp
Normal file
1567
dbms/src/Client/PerformanceTest.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -7,6 +7,8 @@
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
@ -83,28 +85,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
try
|
||||
{
|
||||
std::string text = e.displayText();
|
||||
|
||||
bool has_embedded_stack_trace = false;
|
||||
if (check_embedded_stacktrace)
|
||||
{
|
||||
auto embedded_stack_trace_pos = text.find("Stack trace");
|
||||
has_embedded_stack_trace = embedded_stack_trace_pos != std::string::npos;
|
||||
if (!with_stacktrace && has_embedded_stack_trace)
|
||||
{
|
||||
text.resize(embedded_stack_trace_pos);
|
||||
Poco::trimRightInPlace(text);
|
||||
}
|
||||
}
|
||||
|
||||
stream << "Code: " << e.code() << ", e.displayText() = " << text << ", e.what() = " << e.what();
|
||||
|
||||
if (with_stacktrace && !has_embedded_stack_trace)
|
||||
stream << ", Stack trace:\n\n" << e.getStackTrace().toString();
|
||||
}
|
||||
catch (...) {}
|
||||
stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace);
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
@ -230,6 +211,36 @@ void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::str
|
||||
}
|
||||
}
|
||||
|
||||
std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace)
|
||||
{
|
||||
std::stringstream stream;
|
||||
|
||||
try
|
||||
{
|
||||
std::string text = e.displayText();
|
||||
|
||||
bool has_embedded_stack_trace = false;
|
||||
if (check_embedded_stacktrace)
|
||||
{
|
||||
auto embedded_stack_trace_pos = text.find("Stack trace");
|
||||
has_embedded_stack_trace = embedded_stack_trace_pos != std::string::npos;
|
||||
if (!with_stacktrace && has_embedded_stack_trace)
|
||||
{
|
||||
text.resize(embedded_stack_trace_pos);
|
||||
Poco::trimRightInPlace(text);
|
||||
}
|
||||
}
|
||||
|
||||
stream << "Code: " << e.code() << ", e.displayText() = " << text << ", e.what() = " << e.what();
|
||||
|
||||
if (with_stacktrace && !has_embedded_stack_trace)
|
||||
stream << ", Stack trace:\n\n" << e.getStackTrace().toString();
|
||||
}
|
||||
catch (...) {}
|
||||
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace)
|
||||
{
|
||||
try
|
||||
@ -243,4 +254,26 @@ std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace)
|
||||
}
|
||||
|
||||
|
||||
std::string ExecutionStatus::serializeText() const
|
||||
{
|
||||
std::string res;
|
||||
{
|
||||
WriteBufferFromString wb(res);
|
||||
wb << code << "\n" << escape << message;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void ExecutionStatus::deserializeText(const std::string & data)
|
||||
{
|
||||
ReadBufferFromString rb(data);
|
||||
rb >> code >> "\n" >> escape >> message;
|
||||
}
|
||||
|
||||
ExecutionStatus ExecutionStatus::fromCurrentException(const std::string & start_of_message)
|
||||
{
|
||||
return ExecutionStatus(getCurrentExceptionCode(), start_of_message + ": " + getCurrentExceptionMessage(false, true));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -79,6 +79,7 @@ void throwFromErrno(const std::string & s, int code = 0, int the_errno = errno);
|
||||
void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
|
||||
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");
|
||||
|
||||
|
||||
/** Prints current exception in canonical format.
|
||||
* with_stacktrace - prints stack trace for DB::Exception.
|
||||
* check_embedded_stacktrace - if DB::Exception has embedded stacktrace then
|
||||
@ -89,9 +90,30 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
|
||||
/// Returns error code from ErrorCodes
|
||||
int getCurrentExceptionCode();
|
||||
|
||||
|
||||
/// An execution status of any piece of code, contains return code and optional error
|
||||
struct ExecutionStatus
|
||||
{
|
||||
int code = 0;
|
||||
std::string message;
|
||||
|
||||
ExecutionStatus() = default;
|
||||
|
||||
explicit ExecutionStatus(int return_code, const std::string & exception_message = "")
|
||||
: code(return_code), message(exception_message) {}
|
||||
|
||||
static ExecutionStatus fromCurrentException(const std::string & start_of_message = "");
|
||||
|
||||
std::string serializeText() const;
|
||||
|
||||
void deserializeText(const std::string & data);
|
||||
};
|
||||
|
||||
|
||||
void tryLogException(std::exception_ptr e, const char * log_name, const std::string & start_of_message = "");
|
||||
void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::string & start_of_message = "");
|
||||
|
||||
std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace = false);
|
||||
std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace);
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <cstdlib>
|
||||
#include <climits>
|
||||
#include <random>
|
||||
#include <functional>
|
||||
#include <common/Types.h>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <Core/Types.h>
|
||||
|
125
dbms/src/Common/formatIPv6.cpp
Normal file
125
dbms/src/Common/formatIPv6.cpp
Normal file
@ -0,0 +1,125 @@
|
||||
#include <Common/formatIPv6.h>
|
||||
#include <Common/hex.h>
|
||||
#include <ext/range.h>
|
||||
#include <array>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
|
||||
static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false)
|
||||
{
|
||||
return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry;
|
||||
}
|
||||
|
||||
/// print integer in desired base, faster than sprintf
|
||||
template <uint32_t base, typename T, uint32_t buffer_size = sizeof(T) * int_log(256, base, false)>
|
||||
static void print_integer(char *& out, T value)
|
||||
{
|
||||
if (value == 0)
|
||||
*out++ = '0';
|
||||
else
|
||||
{
|
||||
char buf[buffer_size];
|
||||
auto ptr = buf;
|
||||
|
||||
while (value > 0)
|
||||
{
|
||||
*ptr++ = hexLowercase(value % base);
|
||||
value /= base;
|
||||
}
|
||||
|
||||
while (ptr != buf)
|
||||
*out++ = *--ptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// print IPv4 address as %u.%u.%u.%u
|
||||
static void formatIPv4(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
|
||||
{
|
||||
const auto limit = IPV4_BINARY_LENGTH - zeroed_tail_bytes_count;
|
||||
|
||||
for (const auto i : ext::range(0, IPV4_BINARY_LENGTH))
|
||||
{
|
||||
UInt8 byte = (i < limit) ? src[i] : 0;
|
||||
print_integer<10, UInt8>(dst, byte);
|
||||
|
||||
if (i != IPV4_BINARY_LENGTH - 1)
|
||||
*dst++ = '.';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
|
||||
{
|
||||
struct { int base, len; } best{-1}, cur{-1};
|
||||
std::array<uint16_t, IPV6_BINARY_LENGTH / sizeof(uint16_t)> words{};
|
||||
|
||||
/** Preprocess:
|
||||
* Copy the input (bytewise) array into a wordwise array.
|
||||
* Find the longest run of 0x00's in src[] for :: shorthanding. */
|
||||
for (const auto i : ext::range(0, IPV6_BINARY_LENGTH - zeroed_tail_bytes_count))
|
||||
words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
|
||||
|
||||
for (const auto i : ext::range(0, words.size()))
|
||||
{
|
||||
if (words[i] == 0) {
|
||||
if (cur.base == -1)
|
||||
cur.base = i, cur.len = 1;
|
||||
else
|
||||
cur.len++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cur.base != -1)
|
||||
{
|
||||
if (best.base == -1 || cur.len > best.len)
|
||||
best = cur;
|
||||
cur.base = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cur.base != -1)
|
||||
{
|
||||
if (best.base == -1 || cur.len > best.len)
|
||||
best = cur;
|
||||
}
|
||||
|
||||
if (best.base != -1 && best.len < 2)
|
||||
best.base = -1;
|
||||
|
||||
/// Format the result.
|
||||
for (const int i : ext::range(0, words.size()))
|
||||
{
|
||||
/// Are we inside the best run of 0x00's?
|
||||
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
|
||||
{
|
||||
if (i == best.base)
|
||||
*dst++ = ':';
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Are we following an initial run of 0x00s or any real hex?
|
||||
if (i != 0)
|
||||
*dst++ = ':';
|
||||
|
||||
/// Is this address an encapsulated IPv4?
|
||||
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
|
||||
{
|
||||
formatIPv4(src + 12, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(IPV4_BINARY_LENGTH)));
|
||||
break;
|
||||
}
|
||||
|
||||
print_integer<16>(dst, words[i]);
|
||||
}
|
||||
|
||||
/// Was it a trailing run of 0x00's?
|
||||
if (best.base != -1 && (best.base + best.len) == words.size())
|
||||
*dst++ = ':';
|
||||
|
||||
*dst++ = '\0';
|
||||
}
|
||||
|
||||
}
|
21
dbms/src/Common/formatIPv6.h
Normal file
21
dbms/src/Common/formatIPv6.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/Types.h>
|
||||
|
||||
#define IPV4_BINARY_LENGTH 4
|
||||
#define IPV6_BINARY_LENGTH 16
|
||||
#define IPV4_MAX_TEXT_LENGTH 15 /// Does not count tail zero byte.
|
||||
#define IPV6_MAX_TEXT_LENGTH 39
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
|
||||
* performs significantly faster than the reference implementation due to the absence of sprintf calls,
|
||||
* bounds checking, unnecessary string copying and length calculation.
|
||||
*/
|
||||
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0);
|
||||
|
||||
}
|
@ -22,6 +22,10 @@ struct ColumnWithTypeAndName
|
||||
ColumnWithTypeAndName(const ColumnPtr & column_, const DataTypePtr & type_, const String name_)
|
||||
: column(column_), type(type_), name(name_) {}
|
||||
|
||||
/// Uses type->createColumn() to create column
|
||||
ColumnWithTypeAndName(const DataTypePtr & type_, const String name_)
|
||||
: column(type_->createColumn()), type(type_), name(name_) {}
|
||||
|
||||
ColumnWithTypeAndName cloneEmpty() const;
|
||||
bool operator==(const ColumnWithTypeAndName & other) const;
|
||||
String prettyPrint() const;
|
||||
|
@ -373,9 +373,10 @@ namespace ErrorCodes
|
||||
extern const int BAD_CAST = 368;
|
||||
extern const int ALL_REPLICAS_ARE_STALE = 369;
|
||||
extern const int DATA_TYPE_CANNOT_BE_USED_IN_TABLES = 370;
|
||||
extern const int SESSION_NOT_FOUND = 371;
|
||||
extern const int SESSION_IS_LOCKED = 372;
|
||||
extern const int INVALID_SESSION_TIMEOUT = 373;
|
||||
extern const int INCONSISTENT_CLUSTER_DEFINITION = 371;
|
||||
extern const int SESSION_NOT_FOUND = 372;
|
||||
extern const int SESSION_IS_LOCKED = 373;
|
||||
extern const int INVALID_SESSION_TIMEOUT = 374;
|
||||
|
||||
extern const int KEEPER_EXCEPTION = 999;
|
||||
extern const int POCO_EXCEPTION = 1000;
|
||||
|
@ -1,6 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/MetrikaTypes.h>
|
||||
#include <common/LocalDate.h>
|
||||
#include <common/LocalDateTime.h>
|
||||
#include <Core/Field.h>
|
||||
|
@ -25,7 +25,7 @@ struct BlockIO
|
||||
Block out_sample; /// Example of a block to be written to `out`.
|
||||
|
||||
/// Callbacks for query logging could be set here.
|
||||
std::function<void(IBlockInputStream *, IBlockOutputStream *)> finish_callback;
|
||||
std::function<void(IBlockInputStream *, IBlockOutputStream *)> finish_callback;
|
||||
std::function<void()> exception_callback;
|
||||
|
||||
/// Call these functions if you want to log the request.
|
||||
@ -44,18 +44,18 @@ struct BlockIO
|
||||
BlockIO & operator= (const BlockIO & rhs)
|
||||
{
|
||||
/// We provide the correct order of destruction.
|
||||
out = nullptr;
|
||||
in = nullptr;
|
||||
process_list_entry = nullptr;
|
||||
out = nullptr;
|
||||
in = nullptr;
|
||||
process_list_entry = nullptr;
|
||||
|
||||
process_list_entry = rhs.process_list_entry;
|
||||
in = rhs.in;
|
||||
out = rhs.out;
|
||||
in_sample = rhs.in_sample;
|
||||
out_sample = rhs.out_sample;
|
||||
process_list_entry = rhs.process_list_entry;
|
||||
in = rhs.in;
|
||||
out = rhs.out;
|
||||
in_sample = rhs.in_sample;
|
||||
out_sample = rhs.out_sample;
|
||||
|
||||
finish_callback = rhs.finish_callback;
|
||||
exception_callback = rhs.exception_callback;
|
||||
finish_callback = rhs.finish_callback;
|
||||
exception_callback = rhs.exception_callback;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
@ -15,6 +15,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
PrettyBlockOutputStream::PrettyBlockOutputStream(WriteBuffer & ostr_, bool no_escapes_, size_t max_rows_, const Context & context_)
|
||||
: ostr(ostr_), max_rows(max_rows_), no_escapes(no_escapes_), context(context_)
|
||||
{
|
||||
|
@ -101,7 +101,7 @@ void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & ist
|
||||
if (*istr.position() != '\\')
|
||||
{
|
||||
safeDeserialize(column,
|
||||
[&istr] { return false; },
|
||||
[] { return false; },
|
||||
[this, &istr] (IColumn & nested) { nested_data_type->deserializeTextEscaped(nested, istr); } );
|
||||
}
|
||||
else
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
|
||||
namespace DB { namespace DataTypeTraits {
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <DataTypes/EnrichedDataTypePtr.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
@ -1,6 +1,9 @@
|
||||
#include <functional>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
@ -8,6 +11,9 @@
|
||||
#include <Common/ProfilingScopedRWLock.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Dictionaries/CacheDictionary.h>
|
||||
#include <Dictionaries/DictionaryBlockInputStream.h>
|
||||
#include <ext/size.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/map.h>
|
||||
@ -418,11 +424,11 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
|
||||
|
||||
void CacheDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
const auto attributes_size = dict_struct.attributes.size();
|
||||
attributes.reserve(attributes_size);
|
||||
|
||||
bytes_allocated += size * sizeof(CellMetadata);
|
||||
bytes_allocated += size * sizeof(attributes.front());
|
||||
bytes_allocated += attributes_size * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
@ -957,4 +963,33 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
bool CacheDictionary::isEmptyCell(const UInt64 idx) const
|
||||
{
|
||||
return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data
|
||||
== ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
|
||||
}
|
||||
|
||||
PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
PaddedPODArray<Key> array;
|
||||
for (size_t idx = 0; idx < cells.size(); ++idx)
|
||||
{
|
||||
auto & cell = cells[idx];
|
||||
if (!isEmptyCell(idx) && !cells[idx].isDefault())
|
||||
{
|
||||
array.push_back(cell.id);
|
||||
}
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -137,6 +137,8 @@ public:
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
template <typename Value> using ContainerType = Value[];
|
||||
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
|
||||
@ -208,6 +210,10 @@ private:
|
||||
const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated,
|
||||
AbsentIdHandler && on_id_not_found) const;
|
||||
|
||||
PaddedPODArray<Key> getCachedIds() const;
|
||||
|
||||
bool isEmptyCell(const UInt64 idx) const;
|
||||
|
||||
UInt64 getCellIdx(const Key id) const;
|
||||
|
||||
void setDefaultAttributeValue(Attribute & attribute, const Key idx) const;
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Dictionaries/ComplexKeyCacheDictionary.h>
|
||||
#include <Dictionaries/DictionaryBlockInputStream.h>
|
||||
#include <Common/BitHelpers.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
@ -265,7 +266,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
@ -320,11 +321,11 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
|
||||
void ComplexKeyCacheDictionary::createAttributes()
|
||||
{
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
const auto attributes_size = dict_struct.attributes.size();
|
||||
attributes.reserve(attributes_size);
|
||||
|
||||
bytes_allocated += size * sizeof(CellMetadata);
|
||||
bytes_allocated += size * sizeof(attributes.front());
|
||||
bytes_allocated += attributes_size * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : dict_struct.attributes)
|
||||
{
|
||||
@ -457,7 +458,7 @@ void ComplexKeyCacheDictionary::getItemsNumberImpl(
|
||||
/// fetch up-to-date values, decide which ones require update
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
@ -536,7 +537,7 @@ void ComplexKeyCacheDictionary::getItemsString(
|
||||
/// fetch up-to-date values, discard on fail
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
@ -581,7 +582,7 @@ void ComplexKeyCacheDictionary::getItemsString(
|
||||
const auto now = std::chrono::system_clock::now();
|
||||
for (const auto row : ext::range(0, rows_num))
|
||||
{
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
|
||||
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
|
||||
keys_array[row] = key;
|
||||
const auto find_result = findCellIdx(key, now);
|
||||
|
||||
@ -899,7 +900,7 @@ StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns &
|
||||
if (key_size_is_fixed)
|
||||
return placeKeysInFixedSizePool(row, key_columns);
|
||||
|
||||
return placeKeysInPool(row, key_columns, keys, *keys_pool);
|
||||
return placeKeysInPool(row, key_columns, keys, *dict_struct.key, *keys_pool);
|
||||
}
|
||||
|
||||
void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
|
||||
@ -910,28 +911,49 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
|
||||
keys_pool->free(const_cast<char *>(key.data), key.size);
|
||||
}
|
||||
|
||||
template <typename Arena>
|
||||
template <typename Pool>
|
||||
StringRef ComplexKeyCacheDictionary::placeKeysInPool(
|
||||
const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool)
|
||||
const size_t row, const Columns & key_columns, StringRefs & keys,
|
||||
const std::vector<DictionaryAttribute> & key_attributes, Pool & pool)
|
||||
{
|
||||
const auto keys_size = key_columns.size();
|
||||
size_t sum_keys_size{};
|
||||
for (const auto i : ext::range(0, keys_size))
|
||||
{
|
||||
keys[i] = key_columns[i]->getDataAtWithTerminatingZero(row);
|
||||
sum_keys_size += keys[i].size;
|
||||
}
|
||||
|
||||
const auto res = pool.alloc(sum_keys_size);
|
||||
auto place = res;
|
||||
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
memcpy(place, keys[j].data, keys[j].size);
|
||||
place += keys[j].size;
|
||||
keys[j] = key_columns[j]->getDataAt(row);
|
||||
sum_keys_size += keys[j].size;
|
||||
if (key_attributes[j].underlying_type == AttributeUnderlyingType::String)
|
||||
sum_keys_size += sizeof(size_t) + 1;
|
||||
}
|
||||
|
||||
return { res, sum_keys_size };
|
||||
auto place = pool.alloc(sum_keys_size);
|
||||
|
||||
auto key_start = place;
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
if (key_attributes[j].underlying_type == AttributeUnderlyingType::String)
|
||||
{
|
||||
auto start = key_start;
|
||||
auto key_size = keys[j].size + 1;
|
||||
memcpy(key_start, &key_size, sizeof(size_t));
|
||||
key_start += sizeof(size_t);
|
||||
memcpy(key_start, keys[j].data, keys[j].size);
|
||||
key_start += keys[j].size;
|
||||
*key_start = '\0';
|
||||
++key_start;
|
||||
keys[j].data = start;
|
||||
keys[j].size += sizeof(size_t) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(key_start, keys[j].data, keys[j].size);
|
||||
keys[j].data = key_start;
|
||||
key_start += keys[j].size;
|
||||
}
|
||||
}
|
||||
|
||||
return { place, sum_keys_size };
|
||||
}
|
||||
|
||||
StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(
|
||||
@ -965,4 +987,26 @@ StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const
|
||||
return { res, key.size };
|
||||
}
|
||||
|
||||
bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const
|
||||
{
|
||||
return (cells[idx].key == StringRef{} && (idx != zero_cell_idx
|
||||
|| cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t())));
|
||||
}
|
||||
|
||||
BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
std::vector<StringRef> keys;
|
||||
{
|
||||
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
|
||||
|
||||
for (auto idx : ext::range(0, cells.size()))
|
||||
if (!isEmptyCell(idx)
|
||||
&& !cells[idx].isDefault())
|
||||
keys.push_back(cells[idx].key);
|
||||
}
|
||||
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -147,6 +147,8 @@ public:
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
template <typename Value> using MapType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
|
||||
template <typename Value> using ContainerType = Value[];
|
||||
@ -233,7 +235,8 @@ private:
|
||||
|
||||
template <typename Arena>
|
||||
static StringRef placeKeysInPool(
|
||||
const std::size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
|
||||
const std::size_t row, const Columns & key_columns, StringRefs & keys,
|
||||
const std::vector<DictionaryAttribute> & key_attributes, Arena & pool);
|
||||
|
||||
StringRef placeKeysInFixedSizePool(
|
||||
const std::size_t row, const Columns & key_columns) const;
|
||||
@ -255,6 +258,8 @@ private:
|
||||
return findCellIdx(key, now, hash);
|
||||
};
|
||||
|
||||
bool isEmptyCell(const UInt64 idx) const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <Dictionaries/ComplexKeyHashedDictionary.h>
|
||||
#include <Dictionaries/DictionaryBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -460,22 +461,22 @@ StringRef ComplexKeyHashedDictionary::placeKeysInPool(
|
||||
{
|
||||
const auto keys_size = key_columns.size();
|
||||
size_t sum_keys_size{};
|
||||
for (const auto i : ext::range(0, keys_size))
|
||||
{
|
||||
keys[i] = key_columns[i]->getDataAtWithTerminatingZero(row);
|
||||
sum_keys_size += keys[i].size;
|
||||
}
|
||||
|
||||
const auto res = pool.alloc(sum_keys_size);
|
||||
auto place = res;
|
||||
|
||||
const char * block_start = nullptr;
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
memcpy(place, keys[j].data, keys[j].size);
|
||||
place += keys[j].size;
|
||||
keys[j] = key_columns[j]->serializeValueIntoArena(row, pool, block_start);
|
||||
sum_keys_size += keys[j].size;
|
||||
}
|
||||
|
||||
return { res, sum_keys_size };
|
||||
auto key_start = block_start;
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
keys[j].data = key_start;
|
||||
key_start += keys[j].size;
|
||||
}
|
||||
|
||||
return { block_start, sum_keys_size };
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
@ -502,4 +503,44 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
|
||||
{
|
||||
const Attribute & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8: return getKeys<UInt8>(attribute); break;
|
||||
case AttributeUnderlyingType::UInt16: return getKeys<UInt16>(attribute); break;
|
||||
case AttributeUnderlyingType::UInt32: return getKeys<UInt32>(attribute); break;
|
||||
case AttributeUnderlyingType::UInt64: return getKeys<UInt64>(attribute); break;
|
||||
case AttributeUnderlyingType::Int8: return getKeys<Int8>(attribute); break;
|
||||
case AttributeUnderlyingType::Int16: return getKeys<Int16>(attribute); break;
|
||||
case AttributeUnderlyingType::Int32: return getKeys<Int32>(attribute); break;
|
||||
case AttributeUnderlyingType::Int64: return getKeys<Int64>(attribute); break;
|
||||
case AttributeUnderlyingType::Float32: return getKeys<Float32>(attribute); break;
|
||||
case AttributeUnderlyingType::Float64: return getKeys<Float64>(attribute); break;
|
||||
case AttributeUnderlyingType::String: return getKeys<StringRef>(attribute); break;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & attribute) const
|
||||
{
|
||||
const ContainerType<T> & attr = *std::get<ContainerPtrType<T>>(attribute.maps);
|
||||
std::vector<StringRef> keys;
|
||||
keys.reserve(attr.size());
|
||||
for (const auto & key : attr)
|
||||
keys.push_back(key.first);
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
class ComplexKeyHashedDictionary final : public IDictionaryBase
|
||||
{
|
||||
public:
|
||||
@ -125,6 +126,8 @@ public:
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
template <typename Value> using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
|
||||
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
|
||||
@ -188,6 +191,11 @@ private:
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
std::vector<StringRef> getKeys() const;
|
||||
|
||||
template <typename T>
|
||||
std::vector<StringRef> getKeys(const Attribute & attribute) const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
|
417
dbms/src/Dictionaries/DictionaryBlockInputStream.h
Normal file
417
dbms/src/Dictionaries/DictionaryBlockInputStream.h
Normal file
@ -0,0 +1,417 @@
|
||||
#pragma once
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <ext/range.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <Core/Names.h>
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* BlockInputStream implementation for external dictionaries
|
||||
* read() returns single block consisting of the in-memory contents of the dictionaries
|
||||
*/
|
||||
template <class DictionaryType, class Key>
|
||||
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
||||
{
|
||||
public:
|
||||
using DictionatyPtr = std::shared_ptr<DictionaryType const>;
|
||||
|
||||
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
|
||||
PaddedPODArray<Key> && ids, const Names & column_names);
|
||||
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
|
||||
const std::vector<StringRef> & keys, const Names & column_names);
|
||||
|
||||
using GetColumnsFunction =
|
||||
std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute>& attributes)>;
|
||||
// Used to separate key columns format for storage and view.
|
||||
// Calls get_key_columns_function to get key column for dictionary get fuction call
|
||||
// and get_view_columns_function to get key representation.
|
||||
// Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
|
||||
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
|
||||
const Columns & data_columns, const Names & column_names,
|
||||
GetColumnsFunction && get_key_columns_function,
|
||||
GetColumnsFunction && get_view_columns_function);
|
||||
|
||||
String getName() const override {
|
||||
return "DictionaryBlockInputStream";
|
||||
}
|
||||
|
||||
protected:
|
||||
Block getBlock(size_t start, size_t size) const override;
|
||||
|
||||
private:
|
||||
// pointer types to getXXX functions
|
||||
// for single key dictionaries
|
||||
template <class Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
|
||||
using DictionaryStringGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
|
||||
// for complex complex key dictionaries
|
||||
template <class Type>
|
||||
using GetterByKey = void (DictionaryType::*)(
|
||||
const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
|
||||
using StringGetterByKey = void (DictionaryType::*)(
|
||||
const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
|
||||
|
||||
// call getXXX
|
||||
// for single key dictionaries
|
||||
template <class Type, class Container>
|
||||
void callGetter(DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
|
||||
template <class Container>
|
||||
void callGetter(DictionaryStringGetter getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
|
||||
// for complex complex key dictionaries
|
||||
template <class Type, class Container>
|
||||
void callGetter(GetterByKey<Type> getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
|
||||
template <class Container>
|
||||
void callGetter(StringGetterByKey getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
|
||||
|
||||
template <template <class> class Getter, class StringGetter>
|
||||
Block fillBlock(const PaddedPODArray<Key> & ids, const Columns & keys,
|
||||
const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
|
||||
template <class AttributeType, class Getter>
|
||||
ColumnPtr getColumnFromAttribute(Getter getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
|
||||
template <class Getter>
|
||||
ColumnPtr getColumnFromStringAttribute(Getter getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
|
||||
ColumnPtr getColumnFromIds(const PaddedPODArray<Key>& ids) const;
|
||||
|
||||
void fillKeyColumns(const std::vector<StringRef> & keys, size_t start, size_t size,
|
||||
const DictionaryStructure& dictionary_structure, ColumnsWithTypeAndName & columns) const;
|
||||
|
||||
DictionatyPtr dictionary;
|
||||
Names column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
ColumnsWithTypeAndName key_columns;
|
||||
Poco::Logger * logger;
|
||||
Block (DictionaryBlockInputStream<DictionaryType, Key>::*fillBlockFunction)(
|
||||
const PaddedPODArray<Key>& ids, const Columns& keys,
|
||||
const DataTypes & types, ColumnsWithTypeAndName && view) const;
|
||||
|
||||
Columns data_columns;
|
||||
GetColumnsFunction get_key_columns_function;
|
||||
GetColumnsFunction get_view_columns_function;
|
||||
};
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
|
||||
PaddedPODArray<Key> && ids, const Names& column_names)
|
||||
: DictionaryBlockInputStreamBase(ids.size(), max_block_size),
|
||||
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)),
|
||||
column_names(column_names), ids(std::move(ids)),
|
||||
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
|
||||
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryStringGetter>)
|
||||
{
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
|
||||
const std::vector<StringRef> & keys, const Names& column_names)
|
||||
: DictionaryBlockInputStreamBase(keys.size(), max_block_size),
|
||||
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names),
|
||||
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
|
||||
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, StringGetterByKey>)
|
||||
{
|
||||
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
|
||||
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
|
||||
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
|
||||
const Columns & data_columns, const Names & column_names,
|
||||
GetColumnsFunction && get_key_columns_function,
|
||||
GetColumnsFunction && get_view_columns_function)
|
||||
: DictionaryBlockInputStreamBase(data_columns.front()->size(), max_block_size),
|
||||
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names),
|
||||
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
|
||||
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, StringGetterByKey>),
|
||||
data_columns(data_columns),
|
||||
get_key_columns_function(get_key_columns_function), get_view_columns_function(get_view_columns_function)
|
||||
{
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
|
||||
{
|
||||
if (!key_columns.empty())
|
||||
{
|
||||
Columns columns;
|
||||
ColumnsWithTypeAndName view_columns;
|
||||
columns.reserve(key_columns.size());
|
||||
for (const auto & key_column : key_columns)
|
||||
{
|
||||
auto column = key_column.column->cut(start, length);
|
||||
columns.emplace_back(column);
|
||||
view_columns.emplace_back(column, key_column.type, key_column.name);
|
||||
}
|
||||
return (this->*fillBlockFunction)({}, columns, {}, std::move(view_columns));
|
||||
}
|
||||
else if(!ids.empty())
|
||||
{
|
||||
PaddedPODArray<Key> block_ids(ids.begin() + start, ids.begin() + start + length);
|
||||
return (this->*fillBlockFunction)(block_ids, {}, {}, {});
|
||||
}
|
||||
else
|
||||
{
|
||||
Columns columns;
|
||||
columns.reserve(data_columns.size());
|
||||
for (const auto & data_column : data_columns)
|
||||
columns.push_back(data_column->cut(start, length));
|
||||
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
|
||||
const auto & attributes = *dictionaty_structure.key;
|
||||
ColumnsWithTypeAndName keys_with_type_and_name = get_key_columns_function(columns, attributes);
|
||||
ColumnsWithTypeAndName view_with_type_and_name = get_view_columns_function(columns, attributes);
|
||||
DataTypes types;
|
||||
columns.clear();
|
||||
for (const auto & key_column : keys_with_type_and_name)
|
||||
{
|
||||
columns.push_back(key_column.column);
|
||||
types.push_back(key_column.type);
|
||||
}
|
||||
return (this->*fillBlockFunction)({}, columns, types, std::move(view_with_type_and_name));
|
||||
}
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class Type, class Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
|
||||
{
|
||||
(dictionary.*getter)(attribute.name, ids, container);
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
DictionaryStringGetter getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
|
||||
{
|
||||
(dictionary.*getter)(attribute.name, ids, container);
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class Type, class Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
GetterByKey<Type> getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
|
||||
{
|
||||
(dictionary.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class Container>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
|
||||
StringGetterByKey getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
|
||||
{
|
||||
(dictionary.*getter)(attribute.name, keys, data_types, container);
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <template <class> class Getter, class StringGetter>
|
||||
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
const PaddedPODArray<Key>& ids, const Columns& keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
|
||||
{
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
|
||||
DataTypes data_types = types;
|
||||
ColumnsWithTypeAndName block_columns;
|
||||
|
||||
data_types.reserve(keys.size());
|
||||
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
|
||||
if (data_types.empty() && dictionaty_structure.key)
|
||||
for (const auto key : *dictionaty_structure.key)
|
||||
data_types.push_back(key.type);
|
||||
|
||||
for (const auto & column : view)
|
||||
if (names.find(column.name) != names.end())
|
||||
block_columns.push_back(column);
|
||||
|
||||
const DictionaryStructure& structure = dictionary->getStructure();
|
||||
|
||||
if (structure.id && names.find(structure.id->name) != names.end())
|
||||
block_columns.emplace_back(getColumnFromIds(ids), std::make_shared<DataTypeUInt64>(), structure.id->name);
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute& attribute = structure.attributes[idx];
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
||||
column = getColumnFromAttribute<TYPE, Getter<TYPE>>( \
|
||||
&DictionaryType::get##TYPE, ids, keys, data_types, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::Float32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::Float64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::String:
|
||||
{
|
||||
column = getColumnFromStringAttribute<StringGetter>(
|
||||
&DictionaryType::getString, ids, keys, data_types, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
block_columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
return Block(block_columns);
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class AttributeType, class Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
|
||||
Getter getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
|
||||
{
|
||||
auto size = ids.size();
|
||||
if (!keys.empty())
|
||||
size = keys.front()->size();
|
||||
auto column_vector = std::make_unique<ColumnVector<AttributeType>>(size);
|
||||
callGetter(getter, ids, keys, data_types, column_vector->getData(), attribute, dictionary);
|
||||
return ColumnPtr(std::move(column_vector));
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class Getter>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
|
||||
Getter getter, const PaddedPODArray<Key> & ids,
|
||||
const Columns & keys, const DataTypes & data_types,
|
||||
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
|
||||
{
|
||||
auto column_string = std::make_shared<ColumnString>();
|
||||
auto ptr = column_string.get();
|
||||
callGetter(getter, ids, keys, data_types, ptr, attribute, dictionary);
|
||||
return column_string;
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key>& ids) const
|
||||
{
|
||||
auto column_vector = std::make_shared<ColumnVector<UInt64>>();
|
||||
column_vector->getData().reserve(ids.size());
|
||||
for (UInt64 id : ids)
|
||||
{
|
||||
column_vector->insert(id);
|
||||
}
|
||||
return column_vector;
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
|
||||
const std::vector<StringRef> & keys, size_t start, size_t size,
|
||||
const DictionaryStructure& dictionary_structure, ColumnsWithTypeAndName & columns) const
|
||||
{
|
||||
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
|
||||
{
|
||||
#define ADD_COLUMN(TYPE) columns.push_back( \
|
||||
ColumnWithTypeAndName(std::make_shared<ColumnVector<TYPE>>(), attribute.type, attribute.name))
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8:
|
||||
ADD_COLUMN(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt16:
|
||||
ADD_COLUMN(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt32:
|
||||
ADD_COLUMN(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt64:
|
||||
ADD_COLUMN(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int8:
|
||||
ADD_COLUMN(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int16:
|
||||
ADD_COLUMN(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int32:
|
||||
ADD_COLUMN(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int64:
|
||||
ADD_COLUMN(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::Float32:
|
||||
ADD_COLUMN(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::Float64:
|
||||
ADD_COLUMN(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::String:
|
||||
{
|
||||
columns.push_back(ColumnWithTypeAndName(std::make_shared<ColumnString>(), attribute.type, attribute.name));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto idx : ext::range(start, size))
|
||||
{
|
||||
const auto & key = keys[idx];
|
||||
auto ptr = key.data;
|
||||
for (const auto & column : columns)
|
||||
ptr = column.column->deserializeAndInsertFromArena(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
29
dbms/src/Dictionaries/DictionaryBlockInputStreamBase.cpp
Normal file
29
dbms/src/Dictionaries/DictionaryBlockInputStreamBase.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
DictionaryBlockInputStreamBase::DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size)
|
||||
: rows_count(rows_count), max_block_size(max_block_size), next_row(0)
|
||||
{
|
||||
}
|
||||
|
||||
String DictionaryBlockInputStreamBase::getID() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << static_cast<const void*>(this);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
Block DictionaryBlockInputStreamBase::readImpl()
|
||||
{
|
||||
if (next_row == rows_count)
|
||||
return Block();
|
||||
|
||||
size_t block_size = std::min<size_t>(max_block_size, rows_count - next_row);
|
||||
Block block = getBlock(next_row, block_size);
|
||||
next_row += block_size;
|
||||
return block;
|
||||
}
|
||||
|
||||
}
|
27
dbms/src/Dictionaries/DictionaryBlockInputStreamBase.h
Normal file
27
dbms/src/Dictionaries/DictionaryBlockInputStreamBase.h
Normal file
@ -0,0 +1,27 @@
|
||||
#pragma once
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class DictionaryBlockInputStreamBase : public IProfilingBlockInputStream
|
||||
{
|
||||
protected:
|
||||
Block block;
|
||||
|
||||
DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size);
|
||||
|
||||
String getID() const override;
|
||||
|
||||
virtual Block getBlock(size_t start, size_t length) const = 0;
|
||||
|
||||
private:
|
||||
const size_t rows_count;
|
||||
const size_t max_block_size;
|
||||
size_t next_row;
|
||||
|
||||
Block readImpl() override;
|
||||
void readPrefixImpl() override { next_row = 0; }
|
||||
};
|
||||
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
#include <Dictionaries/FlatDictionary.h>
|
||||
|
||||
#include <Dictionaries/DictionaryBlockInputStream.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -524,4 +524,26 @@ void FlatDictionary::has(const Attribute & attribute, const PaddedPODArray<Key>
|
||||
query_count.fetch_add(ids_count, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
|
||||
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
|
||||
{
|
||||
const auto ids_count = ext::size(loaded_ids);
|
||||
|
||||
PaddedPODArray<Key> ids;
|
||||
for (auto idx : ext::range(0, ids_count))
|
||||
{
|
||||
if (loaded_ids[idx]) {
|
||||
ids.push_back(idx);
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds() ,column_names);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -125,6 +125,8 @@ public:
|
||||
|
||||
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
template <typename Value> using ContainerType = PaddedPODArray<Value>;
|
||||
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
|
||||
@ -191,6 +193,8 @@ private:
|
||||
const AncestorType & ancestor_ids,
|
||||
PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
PaddedPODArray<Key> getIds() const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <ext/size.h>
|
||||
#include <Dictionaries/HashedDictionary.h>
|
||||
|
||||
#include <Dictionaries/DictionaryBlockInputStream.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -479,4 +479,44 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute & attribute) const
|
||||
{
|
||||
const HashMap<UInt64, T> & attr = *std::get<CollectionPtrType<T>>(attribute.maps);
|
||||
|
||||
PaddedPODArray<Key> ids;
|
||||
ids.reserve(attr.size());
|
||||
for (const auto & value : attr) {
|
||||
ids.push_back(value.first);
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8: return getIds<UInt8>(attribute); break;
|
||||
case AttributeUnderlyingType::UInt16: return getIds<UInt16>(attribute); break;
|
||||
case AttributeUnderlyingType::UInt32: return getIds<UInt32>(attribute); break;
|
||||
case AttributeUnderlyingType::UInt64: return getIds<UInt64>(attribute); break;
|
||||
case AttributeUnderlyingType::Int8: return getIds<Int8>(attribute); break;
|
||||
case AttributeUnderlyingType::Int16: return getIds<Int16>(attribute); break;
|
||||
case AttributeUnderlyingType::Int32: return getIds<Int32>(attribute); break;
|
||||
case AttributeUnderlyingType::Int64: return getIds<Int64>(attribute); break;
|
||||
case AttributeUnderlyingType::Float32: return getIds<Float32>(attribute); break;
|
||||
case AttributeUnderlyingType::Float64: return getIds<Float64>(attribute); break;
|
||||
case AttributeUnderlyingType::String: return getIds<StringRef>(attribute); break;
|
||||
}
|
||||
return PaddedPODArray<Key>();
|
||||
}
|
||||
|
||||
BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -123,6 +123,8 @@ public:
|
||||
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
|
||||
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
template <typename Value> using CollectionType = HashMap<UInt64, Value>;
|
||||
template <typename Value> using CollectionPtrType = std::unique_ptr<CollectionType<Value>>;
|
||||
@ -181,6 +183,11 @@ private:
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
template <typename T>
|
||||
PaddedPODArray<Key> getIds(const Attribute & attribute) const;
|
||||
|
||||
PaddedPODArray<Key> getIds() const;
|
||||
|
||||
template <typename ChildType, typename AncestorType>
|
||||
void isInImpl(
|
||||
const ChildType & child_ids,
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Core/Field.h>
|
||||
#include <Core/StringRef.h>
|
||||
#include <Core/Names.h>
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <memory>
|
||||
@ -19,8 +20,11 @@ struct DictionaryLifetime;
|
||||
struct DictionaryStructure;
|
||||
class ColumnString;
|
||||
|
||||
class IBlockInputStream;
|
||||
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
|
||||
|
||||
struct IDictionaryBase
|
||||
|
||||
struct IDictionaryBase : public std::enable_shared_from_this<IDictionaryBase>
|
||||
{
|
||||
using Key = UInt64;
|
||||
|
||||
@ -53,6 +57,8 @@ struct IDictionaryBase
|
||||
|
||||
virtual bool isInjective(const std::string & attribute_name) const = 0;
|
||||
|
||||
virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
|
||||
|
||||
virtual ~IDictionaryBase() = default;
|
||||
};
|
||||
|
||||
|
214
dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h
Normal file
214
dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h
Normal file
@ -0,0 +1,214 @@
|
||||
#pragma once
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
|
||||
#include <Dictionaries/DictionaryStructure.h>
|
||||
#include <Dictionaries/IDictionary.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* BlockInputStream implementation for external dictionaries
|
||||
* read() returns single block consisting of the in-memory contents of the dictionaries
|
||||
*/
|
||||
template <class DictionaryType, class Key>
|
||||
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
|
||||
{
|
||||
public:
|
||||
using DictionatyPtr = std::shared_ptr<DictionaryType const>;
|
||||
|
||||
RangeDictionaryBlockInputStream(
|
||||
DictionatyPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray<Key> && ids,
|
||||
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates);
|
||||
|
||||
String getName() const override {
|
||||
return "RangeDictionaryBlockInputStream";
|
||||
}
|
||||
|
||||
protected:
|
||||
Block getBlock(size_t start, size_t length) const override;
|
||||
|
||||
private:
|
||||
template <class Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &,
|
||||
const PaddedPODArray<UInt16> &, PaddedPODArray<Type> &) const;
|
||||
|
||||
template <class AttributeType>
|
||||
ColumnPtr getColumnFromAttribute(DictionaryGetter<AttributeType> getter,
|
||||
const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
|
||||
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
|
||||
ColumnPtr getColumnFromAttributeString(const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
|
||||
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
|
||||
template <class T>
|
||||
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T>& array) const;
|
||||
|
||||
template <class T>
|
||||
void addSpecialColumn(
|
||||
const std::experimental::optional<DictionarySpecialAttribute>& attribute, DataTypePtr type,
|
||||
const std::string & default_name, const std::unordered_set<std::string> & column_names,
|
||||
const PaddedPODArray<T> & values, ColumnsWithTypeAndName& columns) const;
|
||||
|
||||
Block fillBlock(const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
|
||||
|
||||
DictionatyPtr dictionary;
|
||||
Names column_names;
|
||||
PaddedPODArray<Key> ids;
|
||||
PaddedPODArray<UInt16> start_dates;
|
||||
PaddedPODArray<UInt16> end_dates;
|
||||
};
|
||||
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
RangeDictionaryBlockInputStream<DictionaryType, Key>::RangeDictionaryBlockInputStream(
|
||||
DictionatyPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray<Key> && ids,
|
||||
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates)
|
||||
: DictionaryBlockInputStreamBase(ids.size(), max_column_size),
|
||||
dictionary(dictionary), column_names(column_names),
|
||||
ids(std::move(ids)), start_dates(std::move(start_dates)), end_dates(std::move(end_dates))
|
||||
{
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
|
||||
{
|
||||
PaddedPODArray<Key> block_ids;
|
||||
PaddedPODArray<UInt16> block_start_dates;
|
||||
PaddedPODArray<UInt16> block_end_dates;
|
||||
block_ids.reserve(length);
|
||||
block_start_dates.reserve(length);
|
||||
block_end_dates.reserve(length);
|
||||
|
||||
for (auto idx : ext::range(start, start + length))
|
||||
{
|
||||
block_ids.push_back(ids[idx]);
|
||||
block_start_dates.push_back(block_start_dates[idx]);
|
||||
block_end_dates.push_back(block_end_dates[idx]);
|
||||
}
|
||||
|
||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class AttributeType>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
|
||||
DictionaryGetter<AttributeType> getter, const PaddedPODArray<Key>& ids,
|
||||
const PaddedPODArray<UInt16> & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
|
||||
{
|
||||
auto column_vector = std::make_unique<ColumnVector<AttributeType>>(ids.size());
|
||||
(dictionary.*getter)(attribute.name, ids, dates, column_vector->getData());
|
||||
return ColumnPtr(std::move(column_vector));
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttributeString(
|
||||
const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
|
||||
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
|
||||
{
|
||||
auto column_string = std::make_unique<ColumnString>();
|
||||
dictionary.getString(attribute.name, ids, dates, column_string.get());
|
||||
return ColumnPtr(std::move(column_string));
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class T>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromPODArray(const PaddedPODArray<T>& array) const
|
||||
{
|
||||
auto column_vector = std::make_unique<ColumnVector<T>>();
|
||||
column_vector->getData().reserve(array.size());
|
||||
for (T value : array)
|
||||
{
|
||||
column_vector->insert(value);
|
||||
}
|
||||
return ColumnPtr(std::move(column_vector));
|
||||
}
|
||||
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
template <class T>
|
||||
void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
|
||||
const std::experimental::optional<DictionarySpecialAttribute> & attribute, DataTypePtr type,
|
||||
const std::string& default_name, const std::unordered_set<std::string> & column_names,
|
||||
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const
|
||||
{
|
||||
std::string name = default_name;
|
||||
if (attribute) {
|
||||
name = attribute->name;
|
||||
}
|
||||
if (column_names.find(name) != column_names.end()) {
|
||||
columns.emplace_back(getColumnFromPODArray(values), type, name);
|
||||
}
|
||||
}
|
||||
|
||||
template <class DictionaryType, class Key>
|
||||
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
|
||||
const PaddedPODArray<Key>& ids,
|
||||
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
|
||||
{
|
||||
ColumnsWithTypeAndName columns;
|
||||
const DictionaryStructure& structure = dictionary->getStructure();
|
||||
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
|
||||
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids, columns);
|
||||
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
|
||||
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute& attribute = structure.attributes[idx];
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::UInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::Int64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::Float32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::Float64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::String:
|
||||
column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
|
||||
columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
return Block(columns);
|
||||
}
|
||||
|
||||
}
|
@ -1,4 +1,5 @@
|
||||
#include <Dictionaries/RangeHashedDictionary.h>
|
||||
#include <Dictionaries/RangeDictionaryBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -353,4 +354,59 @@ const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttributeWith
|
||||
return attribute;
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::getIdsAndDates(PaddedPODArray<Key> & ids,
|
||||
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const
|
||||
{
|
||||
const auto & attribute = attributes.front();
|
||||
|
||||
switch (attribute.type)
|
||||
{
|
||||
case AttributeUnderlyingType::UInt8: getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::UInt16: getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::UInt32: getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::UInt64: getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::Int8: getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::Int16: getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::Int32: getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::Int64: getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::Float32: getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::Float64: getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates); break;
|
||||
case AttributeUnderlyingType::String: getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates); break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPODArray<Key> & ids,
|
||||
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const
|
||||
{
|
||||
const HashMap<UInt64, Values<T>> & attr = *std::get<Ptr<T>>(attribute.maps);
|
||||
|
||||
ids.reserve(attr.size());
|
||||
start_dates.reserve(attr.size());
|
||||
end_dates.reserve(attr.size());
|
||||
|
||||
for (const auto & key : attr) {
|
||||
ids.push_back(key.first);
|
||||
for (const auto & value : key.second)
|
||||
{
|
||||
start_dates.push_back(value.range.first);
|
||||
end_dates.push_back(value.range.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
PaddedPODArray<Key> ids;
|
||||
PaddedPODArray<UInt16> start_dates;
|
||||
PaddedPODArray<UInt16> end_dates;
|
||||
getIdsAndDates(ids, start_dates, end_dates);
|
||||
|
||||
using BlockInputStreamType = RangeDictionaryBlockInputStream<RangeHashedDictionary, Key>;
|
||||
auto dict_ptr = std::static_pointer_cast<const RangeHashedDictionary>(shared_from_this());
|
||||
return std::make_shared<BlockInputStreamType>(
|
||||
dict_ptr, max_block_size, column_names, std::move(ids), std::move(start_dates), std::move(end_dates));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -79,6 +79,8 @@ public:
|
||||
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
|
||||
ColumnString * out) const;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
struct Range : std::pair<UInt16, UInt16>
|
||||
{
|
||||
@ -166,6 +168,13 @@ private:
|
||||
|
||||
const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
|
||||
|
||||
void getIdsAndDates(PaddedPODArray<Key> & ids,
|
||||
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const;
|
||||
|
||||
template <typename T>
|
||||
void getIdsAndDates(const Attribute & attribute, PaddedPODArray<Key> & ids,
|
||||
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
|
@ -1,10 +1,19 @@
|
||||
#include <stack>
|
||||
#include <ext/map.h>
|
||||
#include <ext/range.h>
|
||||
#include <Poco/Net/IPAddress.h>
|
||||
#include <Poco/ByteOrder.h>
|
||||
#include <Dictionaries/TrieDictionary.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Dictionaries/DictionaryBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/WriteIntText.h>
|
||||
#include <Common/formatIPv6.h>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -20,7 +29,7 @@ TrieDictionary::TrieDictionary(
|
||||
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr,
|
||||
const DictionaryLifetime dict_lifetime, bool require_nonempty)
|
||||
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime),
|
||||
require_nonempty(require_nonempty)
|
||||
require_nonempty(require_nonempty), logger(&Poco::Logger::get("TrieDictionary"))
|
||||
{
|
||||
createAttributes();
|
||||
trie = btrie_create();
|
||||
@ -425,7 +434,7 @@ void TrieDictionary::getItemsImpl(
|
||||
auto addr = first_column->getDataAt(i);
|
||||
if (addr.size != 16)
|
||||
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
|
||||
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
|
||||
set_value(i, slot != BTRIE_NULL ? vec[slot] : get_default(i));
|
||||
}
|
||||
@ -536,12 +545,101 @@ void TrieDictionary::has(const Attribute & attribute, const Columns & key_column
|
||||
auto addr = first_column->getDataAt(i);
|
||||
if (unlikely(addr.size != 16))
|
||||
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
|
||||
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
|
||||
out[i] = (slot != BTRIE_NULL);
|
||||
}
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);}
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename Getter, typename KeyType>
|
||||
void TrieDictionary::trieTraverse(const btrie_t * tree, Getter && getter) const
|
||||
{
|
||||
KeyType key = 0;
|
||||
const KeyType high_bit = ~((~key) >> 1);
|
||||
|
||||
btrie_node_t * node;
|
||||
node = tree->root;
|
||||
|
||||
std::stack<btrie_node_t *> stack;
|
||||
while (node)
|
||||
{
|
||||
stack.push(node);
|
||||
node = node->left;
|
||||
}
|
||||
|
||||
auto getBit = [&high_bit](size_t size) { return size ? (high_bit >> (size - 1)) : 0; };
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
node = stack.top();
|
||||
stack.pop();
|
||||
|
||||
if (node && node->value != BTRIE_NULL)
|
||||
getter(key, stack.size());
|
||||
|
||||
if (node && node->right)
|
||||
{
|
||||
stack.push(NULL);
|
||||
key |= getBit(stack.size());
|
||||
stack.push(node->right);
|
||||
while (stack.top()->left)
|
||||
stack.push(stack.top()->left);
|
||||
}
|
||||
else
|
||||
key &= ~getBit(stack.size());
|
||||
}
|
||||
}
|
||||
|
||||
Columns TrieDictionary::getKeyColumns() const
|
||||
{
|
||||
auto ip_column = std::make_shared<ColumnFixedString>(IPV6_BINARY_LENGTH);
|
||||
auto mask_column = std::make_shared<ColumnVector<UInt8>>();
|
||||
|
||||
auto getter = [& ip_column, & mask_column](__uint128_t ip, size_t mask) {
|
||||
UInt64 * ip_array = reinterpret_cast<UInt64 *>(&ip);
|
||||
ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]);
|
||||
ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]);
|
||||
std::swap(ip_array[0], ip_array[1]);
|
||||
ip_column->insertData(reinterpret_cast<const char *>(ip_array), IPV6_BINARY_LENGTH);
|
||||
mask_column->insert(static_cast<UInt8>(mask));
|
||||
};
|
||||
|
||||
trieTraverse<decltype(getter), __uint128_t>(trie, std::move(getter));
|
||||
return {ip_column, mask_column};
|
||||
}
|
||||
|
||||
BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
|
||||
{
|
||||
using BlockInputStreamType = DictionaryBlockInputStream<TrieDictionary, UInt64>;
|
||||
|
||||
auto getKeys = [](const Columns& columns, const std::vector<DictionaryAttribute>& attributes)
|
||||
{
|
||||
const auto & attr = attributes.front();
|
||||
return ColumnsWithTypeAndName({ColumnWithTypeAndName(columns.front(),
|
||||
std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)});
|
||||
};
|
||||
auto getView = [](const Columns& columns, const std::vector<DictionaryAttribute>& attributes)
|
||||
{
|
||||
auto column = std::make_shared<ColumnString>();
|
||||
auto ip_column = std::static_pointer_cast<ColumnFixedString>(columns.front());
|
||||
auto mask_column = std::static_pointer_cast<ColumnVector<UInt8>>(columns.back());
|
||||
char buffer[48];
|
||||
for (size_t row : ext::range(0, ip_column->size()))
|
||||
{
|
||||
UInt8 mask = mask_column->getElement(row);
|
||||
char * ptr = buffer;
|
||||
formatIPv6(reinterpret_cast<const unsigned char *>(ip_column->getDataAt(row).data), ptr);
|
||||
*(ptr - 1) = '/';
|
||||
auto size = detail::writeUIntText(mask, ptr);
|
||||
column->insertData(buffer, size + (ptr - buffer));
|
||||
}
|
||||
return ColumnsWithTypeAndName{ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), attributes.front().name)};
|
||||
};
|
||||
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeyColumns(), column_names,
|
||||
std::move(getKeys), std::move(getView));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -128,6 +128,8 @@ public:
|
||||
|
||||
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
private:
|
||||
template <typename Value> using ContainerType = std::vector<Value>;
|
||||
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
|
||||
@ -190,6 +192,11 @@ private:
|
||||
template <typename T>
|
||||
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
template <typename Getter, typename KeyType>
|
||||
void trieTraverse(const btrie_t * trie, Getter && getter) const;
|
||||
|
||||
Columns getKeyColumns() const;
|
||||
|
||||
const std::string name;
|
||||
const DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
@ -210,6 +217,8 @@ private:
|
||||
std::chrono::time_point<std::chrono::system_clock> creation_time;
|
||||
|
||||
std::exception_ptr creation_exception;
|
||||
|
||||
Logger * logger;
|
||||
};
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Functions/Conditional/ArgsInfo.h>
|
||||
#include <Functions/Conditional/CondException.h>
|
||||
#include <Functions/Conditional/common.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeFixedString.h>
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Common/memcpySmall.h>
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <Functions/Conditional/common.h>
|
||||
#include <Functions/Conditional/NullMapBuilder.h>
|
||||
#include <Functions/Conditional/CondSource.h>
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
|
@ -4,8 +4,8 @@
|
||||
#include <Functions/Conditional/ArgsInfo.h>
|
||||
#include <Functions/Conditional/NumericEvaluator.h>
|
||||
#include <Functions/Conditional/ArrayEvaluator.h>
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <Functions/Conditional/getArrayType.h>
|
||||
#include <Functions/Conditional/CondException.h>
|
||||
#include <Functions/Conditional/common.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
@ -36,14 +36,14 @@ public:
|
||||
FunctionFactory();
|
||||
|
||||
FunctionPtr get(const std::string & name, const Context & context) const; /// Throws an exception if not found.
|
||||
FunctionPtr tryGet(const std::string & name, const Context & context) const; /// Returns nullptr if not found.
|
||||
FunctionPtr tryGet(const std::string & name, const Context & context) const; /// Returns nullptr if not found.
|
||||
|
||||
/// No locking, you must register all functions before usage of get, tryGet.
|
||||
template <typename Function> void registerFunction()
|
||||
{
|
||||
static_assert(std::is_same<decltype(&Function::create), Creator>::value, "Function::create has incorrect type");
|
||||
|
||||
if (!functions.emplace(Function::name, &Function::create).second)
|
||||
if (!functions.emplace(std::string(Function::name), &Function::create).second)
|
||||
throw Exception("FunctionFactory: the function name '" + std::string(Function::name) + "' is not unique",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <Core/AccurateComparison.h>
|
||||
#include <Core/FieldVisitors.h>
|
||||
|
||||
@ -17,6 +17,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_DIVISION;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
|
@ -384,13 +384,13 @@ public:
|
||||
void update(size_t from)
|
||||
{
|
||||
if (index >= size)
|
||||
throw Exception{"Logical error: index passes to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
|
||||
throw Exception{"Logical error: index passed to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
|
||||
|
||||
bool is_null;
|
||||
if (src_nullable_col != nullptr)
|
||||
is_null = src_nullable_col->isNullAt(from);
|
||||
else
|
||||
is_null = (*src_array)[from].isNull();
|
||||
is_null = from < src_array->size() ? (*src_array)[from].isNull() : true;
|
||||
|
||||
auto & null_map_data = static_cast<ColumnUInt8 &>(*sink_null_map).getData();
|
||||
null_map_data[index] = is_null ? 1 : 0;
|
||||
@ -401,7 +401,7 @@ public:
|
||||
void update()
|
||||
{
|
||||
if (index >= size)
|
||||
throw Exception{"Logical error: index passes to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
|
||||
throw Exception{"Logical error: index passed to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
|
||||
|
||||
auto & null_map_data = static_cast<ColumnUInt8 &>(*sink_null_map).getData();
|
||||
null_map_data[index] = 0;
|
||||
@ -906,7 +906,8 @@ bool FunctionArrayElement::executeConstConst(Block & block, const ColumnNumbers
|
||||
Field value;
|
||||
if (real_index < array_size)
|
||||
value = array.at(real_index);
|
||||
else
|
||||
|
||||
if (value.isNull())
|
||||
value = block.getByPosition(result).type->getDefault();
|
||||
|
||||
block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn(
|
||||
@ -1161,14 +1162,14 @@ void FunctionArrayElement::perform(Block & block, const ColumnNumbers & argument
|
||||
}
|
||||
else if (!block.safeGetByPosition(arguments[1]).column->isConst())
|
||||
{
|
||||
if (!( executeArgument<UInt8> (block, arguments, result, builder)
|
||||
|| executeArgument<UInt16> (block, arguments, result, builder)
|
||||
|| executeArgument<UInt32> (block, arguments, result, builder)
|
||||
|| executeArgument<UInt64> (block, arguments, result, builder)
|
||||
if (!( executeArgument<UInt8> (block, arguments, result, builder)
|
||||
|| executeArgument<UInt16> (block, arguments, result, builder)
|
||||
|| executeArgument<UInt32> (block, arguments, result, builder)
|
||||
|| executeArgument<UInt64> (block, arguments, result, builder)
|
||||
|| executeArgument<Int8> (block, arguments, result, builder)
|
||||
|| executeArgument<Int16> (block, arguments, result, builder)
|
||||
|| executeArgument<Int32> (block, arguments, result, builder)
|
||||
|| executeArgument<Int64> (block, arguments, result, builder)))
|
||||
|| executeArgument<Int16> (block, arguments, result, builder)
|
||||
|| executeArgument<Int32> (block, arguments, result, builder)
|
||||
|| executeArgument<Int64> (block, arguments, result, builder)))
|
||||
throw Exception("Second argument for function " + getName() + " must must have UInt or Int type.",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
@ -1182,19 +1183,19 @@ void FunctionArrayElement::perform(Block & block, const ColumnNumbers & argument
|
||||
if (index == UInt64(0))
|
||||
throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX);
|
||||
|
||||
if (!( executeNumberConst<UInt8> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<UInt16> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<UInt32> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<UInt64> (block, arguments, result, index, builder)
|
||||
if (!( executeNumberConst<UInt8> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<UInt16> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<UInt32> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<UInt64> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Int8> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Int16> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Int32> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Int64> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Float32> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Float64> (block, arguments, result, index, builder)
|
||||
|| executeConstConst (block, arguments, result, index, builder)
|
||||
|| executeStringConst (block, arguments, result, index, builder)
|
||||
|| executeGenericConst (block, arguments, result, index, builder)))
|
||||
|| executeNumberConst<Int16> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Int32> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Int64> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Float32> (block, arguments, result, index, builder)
|
||||
|| executeNumberConst<Float64> (block, arguments, result, index, builder)
|
||||
|| executeConstConst (block, arguments, result, index, builder)
|
||||
|| executeStringConst (block, arguments, result, index, builder)
|
||||
|| executeGenericConst (block, arguments, result, index, builder)))
|
||||
throw Exception("Illegal column " + block.safeGetByPosition(arguments[0]).column->getName()
|
||||
+ " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
@ -2354,11 +2355,13 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
|
||||
auto & out_offsets = out->getOffsets();
|
||||
|
||||
IColumn::Offset_t offset{};
|
||||
for (const auto i : ext::range(0, in->size()))
|
||||
for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx)
|
||||
{
|
||||
std::copy(ext::make_range_iterator(T{}), ext::make_range_iterator(in_data[i]), &out_data[offset]);
|
||||
offset += in_data[i];
|
||||
out_offsets[i] = offset;
|
||||
for (size_t elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx)
|
||||
out_data[offset + elem_idx] = elem_idx;
|
||||
|
||||
offset += in_data[row_idx];
|
||||
out_offsets[row_idx] = offset;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -2369,16 +2372,14 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
|
||||
if ((in_data != 0) && (in->size() > (std::numeric_limits<std::size_t>::max() / in_data)))
|
||||
throw Exception{
|
||||
"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND
|
||||
};
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND};
|
||||
|
||||
const std::size_t total_values = in->size() * in_data;
|
||||
if (total_values > max_elements)
|
||||
throw Exception{
|
||||
"A call to function " + getName() + " would produce " + std::to_string(total_values) +
|
||||
" array elements, which is greater than the allowed maximum of " + std::to_string(max_elements),
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND
|
||||
};
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND};
|
||||
|
||||
const auto data_col = std::make_shared<ColumnVector<T>>(total_values);
|
||||
const auto out = std::make_shared<ColumnArray>(
|
||||
@ -2390,11 +2391,13 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
|
||||
auto & out_offsets = out->getOffsets();
|
||||
|
||||
IColumn::Offset_t offset{};
|
||||
for (const auto i : ext::range(0, in->size()))
|
||||
for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx)
|
||||
{
|
||||
std::copy(ext::make_range_iterator(T{}), ext::make_range_iterator(in_data), &out_data[offset]);
|
||||
for (size_t elem_idx = 0, elems = in_data; elem_idx < elems; ++elem_idx)
|
||||
out_data[offset + elem_idx] = elem_idx;
|
||||
|
||||
offset += in_data;
|
||||
out_offsets[i] = offset;
|
||||
out_offsets[row_idx] = offset;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include <Columns/ColumnNullable.h>
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
#include <Functions/ObjectPool.h>
|
||||
#include <Common/StringUtils.h>
|
||||
|
||||
|
@ -22,6 +22,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int CANNOT_CREATE_CHARSET_CONVERTER;
|
||||
extern const int CANNOT_CONVERT_CHARSET;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/hex.h>
|
||||
#include <Common/formatIPv6.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
@ -17,6 +18,7 @@
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include <ext/range.h>
|
||||
#include <array>
|
||||
|
||||
@ -24,6 +26,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION;
|
||||
}
|
||||
|
||||
|
||||
/** Encoding functions:
|
||||
*
|
||||
* IPv4NumToString (num) - See below.
|
||||
@ -40,133 +48,10 @@ namespace DB
|
||||
*/
|
||||
|
||||
|
||||
const auto ipv4_bytes_length = 4;
|
||||
const auto ipv6_bytes_length = 16;
|
||||
const auto uuid_bytes_length = 16;
|
||||
const auto uuid_text_length = 36;
|
||||
|
||||
class IPv6Format
|
||||
{
|
||||
private:
|
||||
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
|
||||
static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false)
|
||||
{
|
||||
return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry;
|
||||
}
|
||||
|
||||
/// print integer in desired base, faster than sprintf
|
||||
template <uint32_t base, typename T, uint32_t buffer_size = sizeof(T) * int_log(256, base, false)>
|
||||
static void print_integer(char *& out, T value)
|
||||
{
|
||||
if (value == 0)
|
||||
*out++ = '0';
|
||||
else
|
||||
{
|
||||
char buf[buffer_size];
|
||||
auto ptr = buf;
|
||||
|
||||
while (value > 0)
|
||||
{
|
||||
*ptr++ = hexLowercase(value % base);
|
||||
value /= base;
|
||||
}
|
||||
|
||||
while (ptr != buf)
|
||||
*out++ = *--ptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// print IPv4 address as %u.%u.%u.%u
|
||||
static void ipv4_format(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
|
||||
{
|
||||
const auto limit = ipv4_bytes_length - zeroed_tail_bytes_count;
|
||||
|
||||
for (const auto i : ext::range(0, ipv4_bytes_length))
|
||||
{
|
||||
UInt8 byte = (i < limit) ? src[i] : 0;
|
||||
print_integer<10, UInt8>(dst, byte);
|
||||
|
||||
if (i != ipv4_bytes_length - 1)
|
||||
*dst++ = '.';
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
/** rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
|
||||
* performs significantly faster than the reference implementation due to the absence of sprintf calls,
|
||||
* bounds checking, unnecessary string copying and length calculation
|
||||
*/
|
||||
static const void apply(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0)
|
||||
{
|
||||
struct { int base, len; } best{-1}, cur{-1};
|
||||
std::array<uint16_t, ipv6_bytes_length / sizeof(uint16_t)> words{};
|
||||
|
||||
/** Preprocess:
|
||||
* Copy the input (bytewise) array into a wordwise array.
|
||||
* Find the longest run of 0x00's in src[] for :: shorthanding. */
|
||||
for (const auto i : ext::range(0, ipv6_bytes_length - zeroed_tail_bytes_count))
|
||||
words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
|
||||
|
||||
for (const auto i : ext::range(0, words.size()))
|
||||
{
|
||||
if (words[i] == 0) {
|
||||
if (cur.base == -1)
|
||||
cur.base = i, cur.len = 1;
|
||||
else
|
||||
cur.len++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cur.base != -1)
|
||||
{
|
||||
if (best.base == -1 || cur.len > best.len)
|
||||
best = cur;
|
||||
cur.base = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cur.base != -1)
|
||||
{
|
||||
if (best.base == -1 || cur.len > best.len)
|
||||
best = cur;
|
||||
}
|
||||
|
||||
if (best.base != -1 && best.len < 2)
|
||||
best.base = -1;
|
||||
|
||||
/// Format the result.
|
||||
for (const int i : ext::range(0, words.size()))
|
||||
{
|
||||
/// Are we inside the best run of 0x00's?
|
||||
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
|
||||
{
|
||||
if (i == best.base)
|
||||
*dst++ = ':';
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Are we following an initial run of 0x00s or any real hex?
|
||||
if (i != 0)
|
||||
*dst++ = ':';
|
||||
|
||||
/// Is this address an encapsulated IPv4?
|
||||
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
|
||||
{
|
||||
ipv4_format(src + 12, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(ipv4_bytes_length)));
|
||||
break;
|
||||
}
|
||||
|
||||
print_integer<16>(dst, words[i]);
|
||||
}
|
||||
|
||||
/// Was it a trailing run of 0x00's?
|
||||
if (best.base != -1 && (best.base + best.len) == words.size())
|
||||
*dst++ = ':';
|
||||
|
||||
*dst++ = '\0';
|
||||
}
|
||||
};
|
||||
constexpr auto ipv4_bytes_length = 4;
|
||||
constexpr auto ipv6_bytes_length = 16;
|
||||
constexpr auto uuid_bytes_length = 16;
|
||||
constexpr auto uuid_text_length = 36;
|
||||
|
||||
|
||||
class FunctionIPv6NumToString : public IFunction
|
||||
@ -214,7 +99,7 @@ public:
|
||||
|
||||
ColumnString::Chars_t & vec_res = col_res->getChars();
|
||||
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
|
||||
vec_res.resize(size * INET6_ADDRSTRLEN);
|
||||
vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
|
||||
offsets_res.resize(size);
|
||||
|
||||
auto begin = reinterpret_cast<char *>(&vec_res[0]);
|
||||
@ -222,7 +107,7 @@ public:
|
||||
|
||||
for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_bytes_length, ++i)
|
||||
{
|
||||
IPv6Format::apply(&vec_in[offset], pos);
|
||||
formatIPv6(&vec_in[offset], pos);
|
||||
offsets_res[i] = pos - begin;
|
||||
}
|
||||
|
||||
@ -240,9 +125,9 @@ public:
|
||||
|
||||
const auto & data_in = col_in->getData();
|
||||
|
||||
char buf[INET6_ADDRSTRLEN];
|
||||
char buf[IPV6_MAX_TEXT_LENGTH + 1];
|
||||
char * dst = buf;
|
||||
IPv6Format::apply(reinterpret_cast<const unsigned char *>(data_in.data()), dst);
|
||||
formatIPv6(reinterpret_cast<const unsigned char *>(data_in.data()), dst);
|
||||
|
||||
block.safeGetByPosition(result).column = std::make_shared<ColumnConstString>(col_in->size(), buf);
|
||||
}
|
||||
@ -337,7 +222,7 @@ public:
|
||||
|
||||
ColumnString::Chars_t & vec_res = col_res->getChars();
|
||||
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
|
||||
vec_res.resize(size * INET6_ADDRSTRLEN);
|
||||
vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
|
||||
offsets_res.resize(size);
|
||||
|
||||
auto begin = reinterpret_cast<char *>(&vec_res[0]);
|
||||
@ -389,7 +274,7 @@ public:
|
||||
|
||||
const auto & data_in = col_in->getData();
|
||||
|
||||
char buf[INET6_ADDRSTRLEN];
|
||||
char buf[IPV6_MAX_TEXT_LENGTH + 1];
|
||||
char * dst = buf;
|
||||
|
||||
const auto address = reinterpret_cast<const unsigned char *>(data_in.data());
|
||||
@ -413,7 +298,7 @@ private:
|
||||
|
||||
void cutAddress(const unsigned char * address, char *& dst, UInt8 zeroed_tail_bytes_count)
|
||||
{
|
||||
IPv6Format::apply(address, dst, zeroed_tail_bytes_count);
|
||||
formatIPv6(address, dst, zeroed_tail_bytes_count);
|
||||
}
|
||||
};
|
||||
|
||||
@ -703,7 +588,7 @@ public:
|
||||
ColumnString::Chars_t & vec_res = col_res->getChars();
|
||||
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
|
||||
|
||||
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// the longest value is: 255.255.255.255\0
|
||||
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
|
||||
offsets_res.resize(vec_in.size());
|
||||
char * begin = reinterpret_cast<char *>(&vec_res[0]);
|
||||
char * pos = begin;
|
||||
@ -886,7 +771,7 @@ public:
|
||||
ColumnString::Chars_t & vec_res = col_res->getChars();
|
||||
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
|
||||
|
||||
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// the longest value is: 255.255.255.255\0
|
||||
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
|
||||
offsets_res.resize(vec_in.size());
|
||||
char * begin = reinterpret_cast<char *>(&vec_res[0]);
|
||||
char * pos = begin;
|
||||
|
@ -13,8 +13,8 @@
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -23,12 +23,17 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
/** Functions for working with date and time.
|
||||
*
|
||||
* toYear, toMonth, toDayOfMonth, toDayOfWeek, toHour, toMinute, toSecond,
|
||||
* toMonday, toStartOfMonth, toStartOfYear, toStartOfMinute, toStartOfFiveMinute
|
||||
* toStartOfHour, toTime,
|
||||
* now
|
||||
* now, today, yesterday
|
||||
* TODO: makeDate, makeDateTime
|
||||
*
|
||||
* (toDate - located in FunctionConversion.h file)
|
||||
|
@ -31,6 +31,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int DICTIONARIES_WAS_NOT_LOADED;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
/** Functions using Yandex.Metrica dictionaries
|
||||
|
@ -36,6 +36,7 @@ namespace ErrorCodes
|
||||
extern const int DICTIONARIES_WAS_NOT_LOADED;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int UNKNOWN_TYPE;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
/** Functions that use plug-ins (external) dictionaries.
|
||||
|
@ -13,6 +13,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Function for an unusual conversion to a string:
|
||||
*
|
||||
* bitmaskToList - takes an integer - a bitmask, returns a string of degrees of 2 separated by a comma.
|
||||
@ -63,8 +69,8 @@ public:
|
||||
|| executeType<Int32>(block, arguments, result)
|
||||
|| executeType<Int64>(block, arguments, result)))
|
||||
throw Exception("Illegal column " + block.safeGetByPosition(arguments[0]).column->getName()
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -16,6 +16,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
const Float64 EARTH_RADIUS_IN_METERS = 6372797.560856;
|
||||
|
@ -31,6 +31,13 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
/** Hashing functions.
|
||||
*
|
||||
* Half MD5:
|
||||
@ -716,15 +723,13 @@ public:
|
||||
throw Exception{
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " +
|
||||
toString(arg_count) + ", should be 1 or 2.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH
|
||||
};
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
const auto first_arg = arguments.front().get();
|
||||
if (!typeid_cast<const DataTypeString *>(first_arg))
|
||||
throw Exception{
|
||||
"Illegal type " + first_arg->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
|
||||
};
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (arg_count == 2)
|
||||
{
|
||||
@ -739,8 +744,7 @@ public:
|
||||
!typeid_cast<const DataTypeInt64 *>(second_arg))
|
||||
throw Exception{
|
||||
"Illegal type " + second_arg->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
|
||||
};
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
@ -755,7 +759,7 @@ public:
|
||||
else if (arg_count == 2)
|
||||
executeTwoArgs(block, arguments, result);
|
||||
else
|
||||
throw std::logic_error{"got into IFunction::execute with unexpected number of arguments"};
|
||||
throw Exception{"got into IFunction::execute with unexpected number of arguments", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
|
||||
private:
|
||||
@ -797,8 +801,7 @@ private:
|
||||
if (!level_col->isConst())
|
||||
throw Exception{
|
||||
"Second argument of function " + getName() + " must be an integral constant",
|
||||
ErrorCodes::ILLEGAL_COLUMN
|
||||
};
|
||||
ErrorCodes::ILLEGAL_COLUMN};
|
||||
|
||||
const auto level = level_col->get64(0);
|
||||
|
||||
@ -833,10 +836,10 @@ private:
|
||||
};
|
||||
|
||||
|
||||
struct NameHalfMD5 { static constexpr auto name = "halfMD5"; };
|
||||
struct NameSipHash64 { static constexpr auto name = "sipHash64"; };
|
||||
struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
struct NameHalfMD5 { static constexpr auto name = "halfMD5"; };
|
||||
struct NameSipHash64 { static constexpr auto name = "sipHash64"; };
|
||||
struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||
|
||||
struct ImplCityHash64
|
||||
{
|
||||
|
@ -11,6 +11,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
/** Functions are logical links: and, or, not, xor.
|
||||
* Accept any numeric types, return a UInt8 containing 0 or 1.
|
||||
*/
|
||||
|
@ -7,10 +7,10 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
/** More effective implementations of mathematical functions are possible when connecting a separate library
|
||||
* Disabled due licence compatibility limitations
|
||||
/** More efficient implementations of mathematical functions are possible when using a separate library.
|
||||
* Disabled due to licence compatibility limitations.
|
||||
* To enable: download http://www.agner.org/optimize/vectorclass.zip and unpack to contrib/vectorclass
|
||||
* Then rebuild with -DENABLE_VECTORCLASS=1
|
||||
* Then rebuild with -DENABLE_VECTORCLASS=1
|
||||
*/
|
||||
|
||||
#if USE_VECTORCLASS
|
||||
@ -32,6 +32,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
template <typename Impl>
|
||||
class FunctionMathNullaryConstFloat64 : public IFunction
|
||||
{
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
|
||||
|
||||
|
@ -12,6 +12,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
/** Pseudo-random number generation functions.
|
||||
* The function can be called without arguments or with one argument.
|
||||
* The argument is ignored and only serves to ensure that several calls to one function are considered different and do not stick together.
|
||||
@ -42,11 +47,11 @@ namespace detail
|
||||
|
||||
struct LinearCongruentialGenerator
|
||||
{
|
||||
/// Constants from man lrand48_r.
|
||||
/// Constants from `man lrand48_r`.
|
||||
static constexpr UInt64 a = 0x5DEECE66D;
|
||||
static constexpr UInt64 c = 0xB;
|
||||
|
||||
/// And this is from `head -c8 /dev/urandom | Xxd -p`
|
||||
/// And this is from `head -c8 /dev/urandom | xxd -p`
|
||||
UInt64 current = 0x09826f4a081cee35ULL;
|
||||
|
||||
LinearCongruentialGenerator() {}
|
||||
@ -236,11 +241,11 @@ public:
|
||||
|
||||
|
||||
struct NameRand { static constexpr auto name = "rand"; };
|
||||
struct NameRand64 { static constexpr auto name = "rand64"; };
|
||||
struct NameRand64 { static constexpr auto name = "rand64"; };
|
||||
struct NameRandConstant { static constexpr auto name = "randConstant"; };
|
||||
|
||||
using FunctionRand = FunctionRandom<RandImpl, NameRand> ;
|
||||
using FunctionRand64 = FunctionRandom<Rand64Impl, NameRand64>;
|
||||
using FunctionRand = FunctionRandom<RandImpl, NameRand> ;
|
||||
using FunctionRand64 = FunctionRandom<Rand64Impl, NameRand64>;
|
||||
using FunctionRandConstant = FunctionRandomConstant<RandImpl, NameRandConstant>;
|
||||
|
||||
|
||||
|
@ -15,8 +15,14 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
/** Functions for transforming numbers and dates to strings that contain the same set of bytes in the machine representation, and vice versa.
|
||||
*/
|
||||
*/
|
||||
|
||||
|
||||
template<typename Name>
|
||||
@ -104,8 +110,8 @@ public:
|
||||
|| executeType<Float32>(block, arguments, result)
|
||||
|| executeType<Float64>(block, arguments, result)))
|
||||
throw Exception("Illegal column " + block.safeGetByPosition(arguments[0]).column->getName()
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
};
|
||||
|
||||
@ -196,32 +202,32 @@ public:
|
||||
};
|
||||
|
||||
|
||||
struct NameReinterpretAsUInt8 { static constexpr auto name = "reinterpretAsUInt8"; };
|
||||
struct NameReinterpretAsUInt16 { static constexpr auto name = "reinterpretAsUInt16"; };
|
||||
struct NameReinterpretAsUInt32 { static constexpr auto name = "reinterpretAsUInt32"; };
|
||||
struct NameReinterpretAsUInt64 { static constexpr auto name = "reinterpretAsUInt64"; };
|
||||
struct NameReinterpretAsInt8 { static constexpr auto name = "reinterpretAsInt8"; };
|
||||
struct NameReinterpretAsInt16 { static constexpr auto name = "reinterpretAsInt16"; };
|
||||
struct NameReinterpretAsInt32 { static constexpr auto name = "reinterpretAsInt32"; };
|
||||
struct NameReinterpretAsInt64 { static constexpr auto name = "reinterpretAsInt64"; };
|
||||
struct NameReinterpretAsFloat32 { static constexpr auto name = "reinterpretAsFloat32"; };
|
||||
struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; };
|
||||
struct NameReinterpretAsUInt8 { static constexpr auto name = "reinterpretAsUInt8"; };
|
||||
struct NameReinterpretAsUInt16 { static constexpr auto name = "reinterpretAsUInt16"; };
|
||||
struct NameReinterpretAsUInt32 { static constexpr auto name = "reinterpretAsUInt32"; };
|
||||
struct NameReinterpretAsUInt64 { static constexpr auto name = "reinterpretAsUInt64"; };
|
||||
struct NameReinterpretAsInt8 { static constexpr auto name = "reinterpretAsInt8"; };
|
||||
struct NameReinterpretAsInt16 { static constexpr auto name = "reinterpretAsInt16"; };
|
||||
struct NameReinterpretAsInt32 { static constexpr auto name = "reinterpretAsInt32"; };
|
||||
struct NameReinterpretAsInt64 { static constexpr auto name = "reinterpretAsInt64"; };
|
||||
struct NameReinterpretAsFloat32 { static constexpr auto name = "reinterpretAsFloat32"; };
|
||||
struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; };
|
||||
struct NameReinterpretAsDate { static constexpr auto name = "reinterpretAsDate"; };
|
||||
struct NameReinterpretAsDateTime { static constexpr auto name = "reinterpretAsDateTime"; };
|
||||
struct NameReinterpretAsString { static constexpr auto name = "reinterpretAsString"; };
|
||||
struct NameReinterpretAsString { static constexpr auto name = "reinterpretAsString"; };
|
||||
|
||||
using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs<DataTypeUInt8, NameReinterpretAsUInt8> ;
|
||||
using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs<DataTypeUInt16, NameReinterpretAsUInt16>;
|
||||
using FunctionReinterpretAsUInt32 = FunctionReinterpretStringAs<DataTypeUInt32, NameReinterpretAsUInt32>;
|
||||
using FunctionReinterpretAsUInt64 = FunctionReinterpretStringAs<DataTypeUInt64, NameReinterpretAsUInt64>;
|
||||
using FunctionReinterpretAsInt8 = FunctionReinterpretStringAs<DataTypeInt8, NameReinterpretAsInt8> ;
|
||||
using FunctionReinterpretAsInt16 = FunctionReinterpretStringAs<DataTypeInt16, NameReinterpretAsInt16> ;
|
||||
using FunctionReinterpretAsInt32 = FunctionReinterpretStringAs<DataTypeInt32, NameReinterpretAsInt32> ;
|
||||
using FunctionReinterpretAsInt64 = FunctionReinterpretStringAs<DataTypeInt64, NameReinterpretAsInt64> ;
|
||||
using FunctionReinterpretAsFloat32 = FunctionReinterpretStringAs<DataTypeFloat32, NameReinterpretAsFloat32>;
|
||||
using FunctionReinterpretAsFloat64 = FunctionReinterpretStringAs<DataTypeFloat64, NameReinterpretAsFloat64>;
|
||||
using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate, NameReinterpretAsDate> ;
|
||||
using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
|
||||
using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs<DataTypeUInt8, NameReinterpretAsUInt8>;
|
||||
using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs<DataTypeUInt16, NameReinterpretAsUInt16>;
|
||||
using FunctionReinterpretAsUInt32 = FunctionReinterpretStringAs<DataTypeUInt32, NameReinterpretAsUInt32>;
|
||||
using FunctionReinterpretAsUInt64 = FunctionReinterpretStringAs<DataTypeUInt64, NameReinterpretAsUInt64>;
|
||||
using FunctionReinterpretAsInt8 = FunctionReinterpretStringAs<DataTypeInt8, NameReinterpretAsInt8>;
|
||||
using FunctionReinterpretAsInt16 = FunctionReinterpretStringAs<DataTypeInt16, NameReinterpretAsInt16>;
|
||||
using FunctionReinterpretAsInt32 = FunctionReinterpretStringAs<DataTypeInt32, NameReinterpretAsInt32>;
|
||||
using FunctionReinterpretAsInt64 = FunctionReinterpretStringAs<DataTypeInt64, NameReinterpretAsInt64>;
|
||||
using FunctionReinterpretAsFloat32 = FunctionReinterpretStringAs<DataTypeFloat32, NameReinterpretAsFloat32>;
|
||||
using FunctionReinterpretAsFloat64 = FunctionReinterpretStringAs<DataTypeFloat64, NameReinterpretAsFloat64>;
|
||||
using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate, NameReinterpretAsDate>;
|
||||
using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
|
||||
|
||||
using FunctionReinterpretAsString = FunctionReinterpretAsStringImpl<NameReinterpretAsString>;
|
||||
|
||||
|
@ -15,6 +15,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
/** Rounding Functions:
|
||||
* roundToExp2 - down to the nearest power of two;
|
||||
* roundDuration - down to the nearest of: 0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000;
|
||||
|
@ -15,6 +15,14 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
template <bool negative = false>
|
||||
struct EmptyImpl
|
||||
{
|
||||
@ -894,7 +902,7 @@ public:
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
||||
+ ", should be at least 2.",
|
||||
+ ", should be at least 2.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (const auto arg_idx : ext::range(0, arguments.size()))
|
||||
|
@ -13,6 +13,12 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
/** String functions
|
||||
*
|
||||
* length, empty, notEmpty,
|
||||
|
@ -15,6 +15,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
/** Functions that split strings into an array of strings or vice versa.
|
||||
*
|
||||
* splitByChar(sep, s)
|
||||
@ -546,9 +552,9 @@ public:
|
||||
};
|
||||
|
||||
|
||||
using FunctionAlphaTokens = FunctionTokens<AlphaTokensImpl> ;
|
||||
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl> ;
|
||||
using FunctionAlphaTokens = FunctionTokens<AlphaTokensImpl>;
|
||||
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
|
||||
using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
|
||||
using FunctionExtractAll = FunctionTokens<ExtractAllImpl> ;
|
||||
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTransform.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -22,6 +22,8 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
|
||||
|
@ -8,6 +8,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
@ -3,9 +3,9 @@
|
||||
#include <memory>
|
||||
|
||||
#include <Core/Names.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
|
||||
|
||||
@ -15,10 +15,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
|
||||
extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
struct ExpressionAction;
|
||||
|
@ -9,6 +9,13 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
template<typename B>
|
||||
struct AndImpl
|
||||
{
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include <iostream>
|
||||
|
||||
#include <Functions/NumberTraits.h>
|
||||
#include <DataTypes/NumberTraits.h>
|
||||
|
||||
|
||||
void printType(DB::UInt8 x) { std::cout << "UInt8"; }
|
||||
|
@ -3,11 +3,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include <city.h>
|
||||
|
||||
#ifdef USE_QUICKLZ
|
||||
#include <quicklz/quicklz_level1.h>
|
||||
#endif
|
||||
|
||||
#include <lz4.h>
|
||||
#include <zstd.h>
|
||||
|
||||
@ -57,16 +52,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
|
||||
|
||||
size_t & size_compressed = size_compressed_without_checksum;
|
||||
|
||||
if (method < 0x80)
|
||||
{
|
||||
#ifdef USE_QUICKLZ
|
||||
size_compressed = qlz_size_compressed(&own_compressed_buffer[0]);
|
||||
size_decompressed = qlz_size_decompressed(&own_compressed_buffer[0]);
|
||||
#else
|
||||
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
#endif
|
||||
}
|
||||
else if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) || method == static_cast<UInt8>(CompressionMethodByte::ZSTD))
|
||||
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) || method == static_cast<UInt8>(CompressionMethodByte::ZSTD))
|
||||
{
|
||||
size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
|
||||
size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
|
||||
@ -108,18 +94,7 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
|
||||
|
||||
UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h
|
||||
|
||||
if (method < 0x80)
|
||||
{
|
||||
#ifdef USE_QUICKLZ
|
||||
if (!qlz_state)
|
||||
qlz_state = std::make_unique<qlz_state_decompress>();
|
||||
|
||||
qlz_decompress(&compressed_buffer[0], to, qlz_state.get());
|
||||
#else
|
||||
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
#endif
|
||||
}
|
||||
else if (method == static_cast<UInt8>(CompressionMethodByte::LZ4))
|
||||
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4))
|
||||
{
|
||||
if (LZ4_decompress_fast(&compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], to, size_decompressed) < 0)
|
||||
throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS);
|
||||
|
@ -1,9 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef USE_QUICKLZ
|
||||
struct qlz_state_decompress;
|
||||
#endif
|
||||
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
|
||||
@ -25,12 +21,6 @@ protected:
|
||||
/// Points to memory, holding compressed block.
|
||||
char * compressed_buffer = nullptr;
|
||||
|
||||
#ifdef USE_QUICKLZ
|
||||
std::unique_ptr<qlz_state_decompress> qlz_state;
|
||||
#else
|
||||
void * fixed_size_padding = nullptr; /// ABI compatibility for USE_QUICKLZ
|
||||
#endif
|
||||
|
||||
/// Don't checksum on decompressing.
|
||||
bool disable_checksum = false;
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
#define DBMS_MAX_COMPRESSED_SIZE 0x40000000ULL /// 1GB
|
||||
|
||||
#define QUICKLZ_ADDITIONAL_SPACE 400
|
||||
#define COMPRESSED_BLOCK_HEADER_SIZE 9
|
||||
|
||||
|
||||
@ -16,7 +15,6 @@ namespace DB
|
||||
/** Compression method */
|
||||
enum class CompressionMethod
|
||||
{
|
||||
QuickLZ = 0,
|
||||
LZ4 = 1,
|
||||
LZ4HC = 2, /// The format is the same as for LZ4. The difference is only in compression.
|
||||
ZSTD = 3, /// Experimental algorithm: https://github.com/Cyan4973/zstd
|
||||
@ -29,14 +27,6 @@ enum class CompressionMethod
|
||||
*
|
||||
* The next byte specifies the compression algorithm. Then everything depends on the algorithm.
|
||||
*
|
||||
* The first 4 options are compatible with QuickLZ level 1.
|
||||
* That is, if the value of the first byte is < 4, it is enough to use qlz_level1_decompress function to decompress.
|
||||
*
|
||||
* 0x00 - uncompressed data, small block. Next, one byte - compressed data size, including header; one byte - uncompressed data size.
|
||||
* 0x01 - compressed data, QuickLZ level 1, small block. Then two bytes are similar.
|
||||
* 0x02 - uncompressed data, large block. Then 4 bytes - compressed data size, including header; 4 bytes uncompressed data size.
|
||||
* 0x03 - compressed data, QuickLZ level 1, large block. Then 8 bytes are similar.
|
||||
*
|
||||
* 0x82 - LZ4 or LZ4HC (they have the same format).
|
||||
* Next 4 bytes - the size of the compressed data, taking into account the header; 4 bytes is the size of the uncompressed data.
|
||||
*
|
||||
@ -53,8 +43,8 @@ enum class CompressionMethod
|
||||
|
||||
enum class CompressionMethodByte : uint8_t
|
||||
{
|
||||
LZ4 = 0x82,
|
||||
ZSTD = 0x90,
|
||||
LZ4 = 0x82,
|
||||
ZSTD = 0x90,
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,10 +1,5 @@
|
||||
#include <memory>
|
||||
#include <city.h>
|
||||
|
||||
#ifdef USE_QUICKLZ
|
||||
#include <quicklz/quicklz_level1.h>
|
||||
#endif
|
||||
|
||||
#include <lz4.h>
|
||||
#include <lz4hc.h>
|
||||
#include <zstd.h>
|
||||
@ -35,28 +30,10 @@ void CompressedWriteBuffer::nextImpl()
|
||||
char * compressed_buffer_ptr = nullptr;
|
||||
|
||||
/** The format of compressed block - see CompressedStream.h
|
||||
*/
|
||||
*/
|
||||
|
||||
switch (method)
|
||||
{
|
||||
case CompressionMethod::QuickLZ:
|
||||
{
|
||||
#ifdef USE_QUICKLZ
|
||||
compressed_buffer.resize(uncompressed_size + QUICKLZ_ADDITIONAL_SPACE);
|
||||
|
||||
compressed_size = qlz_compress(
|
||||
working_buffer.begin(),
|
||||
&compressed_buffer[0],
|
||||
uncompressed_size,
|
||||
qlz_state.get());
|
||||
|
||||
compressed_buffer[0] &= 3;
|
||||
compressed_buffer_ptr = &compressed_buffer[0];
|
||||
break;
|
||||
#else
|
||||
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
#endif
|
||||
}
|
||||
case CompressionMethod::LZ4:
|
||||
case CompressionMethod::LZ4HC:
|
||||
{
|
||||
@ -137,9 +114,6 @@ CompressedWriteBuffer::CompressedWriteBuffer(
|
||||
CompressionMethod method_,
|
||||
size_t buf_size)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buf_size), out(out_), method(method_)
|
||||
#ifdef USE_QUICKLZ
|
||||
, qlz_state(std::make_unique<qlz_state_compress>())
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -2,10 +2,6 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#ifdef USE_QUICKLZ
|
||||
struct qlz_state_compress;
|
||||
#endif
|
||||
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
@ -24,15 +20,6 @@ private:
|
||||
|
||||
PODArray<char> compressed_buffer;
|
||||
|
||||
#ifdef USE_QUICKLZ
|
||||
std::unique_ptr<qlz_state_compress> qlz_state;
|
||||
#else
|
||||
/// ABI compatibility for USE_QUICKLZ
|
||||
void * fixed_size_padding = nullptr;
|
||||
/// Undoes warning unused-private-field.
|
||||
void * fixed_size_padding_used() const { return fixed_size_padding; }
|
||||
#endif
|
||||
|
||||
void nextImpl() override;
|
||||
|
||||
public:
|
||||
|
@ -1,109 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <mysqlxx/Row.h>
|
||||
#include <mysqlxx/Null.h>
|
||||
#include <mysqlxx/Manip.h>
|
||||
#include <common/MetrikaTypes.h>
|
||||
#include <Core/Field.h>
|
||||
#include <Core/FieldVisitors.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
/// This is for Yandex.Metrica code.
|
||||
|
||||
namespace mysqlxx
|
||||
{
|
||||
inline std::ostream & operator<< (mysqlxx::EscapeManipResult res, const DB::Array & value)
|
||||
{
|
||||
return res.ostr << DB::applyVisitor(DB::FieldVisitorToString(), DB::Field(value));
|
||||
}
|
||||
|
||||
inline std::ostream & operator<< (mysqlxx::QuoteManipResult res, const DB::Array & value)
|
||||
{
|
||||
throw Poco::Exception("Cannot quote Array with mysqlxx::quote.");
|
||||
}
|
||||
|
||||
inline std::istream & operator>> (mysqlxx::UnEscapeManipResult res, DB::Array & value)
|
||||
{
|
||||
throw Poco::Exception("Cannot unescape Array with mysqlxx::unescape.");
|
||||
}
|
||||
|
||||
inline std::istream & operator>> (mysqlxx::UnQuoteManipResult res, DB::Array & value)
|
||||
{
|
||||
throw Poco::Exception("Cannot unquote Array with mysqlxx::unquote.");
|
||||
}
|
||||
|
||||
|
||||
inline std::ostream & operator<< (mysqlxx::EscapeManipResult res, const DB::Tuple & value)
|
||||
{
|
||||
return res.ostr << DB::applyVisitor(DB::FieldVisitorToString(), DB::Field(value));
|
||||
}
|
||||
|
||||
inline std::ostream & operator<< (mysqlxx::QuoteManipResult res, const DB::Tuple & value)
|
||||
{
|
||||
throw Poco::Exception("Cannot quote Tuple with mysqlxx::quote.");
|
||||
}
|
||||
|
||||
inline std::istream & operator>> (mysqlxx::UnEscapeManipResult res, DB::Tuple & value)
|
||||
{
|
||||
throw Poco::Exception("Cannot unescape Tuple with mysqlxx::unescape.");
|
||||
}
|
||||
|
||||
inline std::istream & operator>> (mysqlxx::UnQuoteManipResult res, DB::Tuple & value)
|
||||
{
|
||||
throw Poco::Exception("Cannot unquote Tuple with mysqlxx::unquote.");
|
||||
}
|
||||
|
||||
template <> inline VisitID_t Value::get<VisitID_t>() const { return VisitID_t(getUInt()); }
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Output mysqlxx::Row in tab-separated form
|
||||
inline void writeEscapedRow(const mysqlxx::Row & row, WriteBuffer & buf)
|
||||
{
|
||||
for (size_t i = 0; i < row.size(); ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
buf.write('\t');
|
||||
|
||||
if (unlikely(row[i].isNull()))
|
||||
{
|
||||
buf.write("\\N", 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
writeAnyEscapedString<'\''>(row[i].data(), row[i].data() + row[i].length(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline void writeText(const mysqlxx::Null<T> & x, WriteBuffer & buf)
|
||||
{
|
||||
if (x.isNull())
|
||||
writeCString("\\N", buf);
|
||||
else
|
||||
writeText(static_cast<const T &>(x), buf);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline void writeQuoted(const mysqlxx::Null<T> & x, WriteBuffer & buf)
|
||||
{
|
||||
if (x.isNull())
|
||||
writeCString("NULL", buf);
|
||||
else
|
||||
writeText(static_cast<const T &>(x), buf);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline Field toField(const mysqlxx::Null<T> & x)
|
||||
{
|
||||
return x.isNull() ? Field(Null()) : toField(static_cast<const T &>(x));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <Common/SimpleCache.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <IO/HexWriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <openssl/sha.h>
|
||||
@ -40,15 +41,6 @@ inline bool isLocal(const Cluster::Address & address)
|
||||
return address.default_database.empty() && isLocalAddress(address.resolved_address);
|
||||
}
|
||||
|
||||
inline std::string addressToDirName(const Cluster::Address & address)
|
||||
{
|
||||
return
|
||||
escapeForFileName(address.user) +
|
||||
(address.password.empty() ? "" : (':' + escapeForFileName(address.password))) + '@' +
|
||||
escapeForFileName(address.resolved_address.host().toString()) + ':' +
|
||||
std::to_string(address.resolved_address.port()) +
|
||||
(address.default_database.empty() ? "" : ('#' + escapeForFileName(address.default_database)));
|
||||
}
|
||||
|
||||
/// To cache DNS requests.
|
||||
Poco::Net::SocketAddress resolveSocketAddressImpl1(const String & host, UInt16 port)
|
||||
@ -108,6 +100,29 @@ Cluster::Address::Address(const String & host_port_, const String & user_, const
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
String Cluster::Address::toString() const
|
||||
{
|
||||
return toString(host_name, port);
|
||||
}
|
||||
|
||||
String Cluster::Address::toString(const String & host_name, UInt16 port)
|
||||
{
|
||||
return escapeForFileName(host_name) + ':' + DB::toString(port);
|
||||
}
|
||||
|
||||
|
||||
String Cluster::Address::toStringFull() const
|
||||
{
|
||||
return
|
||||
escapeForFileName(user) +
|
||||
(password.empty() ? "" : (':' + escapeForFileName(password))) + '@' +
|
||||
escapeForFileName(resolved_address.host().toString()) + ':' +
|
||||
std::to_string(resolved_address.port()) +
|
||||
(default_database.empty() ? "" : ('#' + escapeForFileName(default_database)));
|
||||
}
|
||||
|
||||
|
||||
/// Implementation of Clusters class
|
||||
|
||||
Clusters::Clusters(Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_name)
|
||||
@ -195,7 +210,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
|
||||
info.local_addresses.push_back(address);
|
||||
else
|
||||
{
|
||||
info.dir_names.push_back(addressToDirName(address));
|
||||
info.dir_names.push_back(address.toStringFull());
|
||||
ConnectionPoolPtrs pools;
|
||||
pools.push_back(std::make_shared<ConnectionPool>(
|
||||
settings.distributed_connections_pool_size,
|
||||
@ -229,7 +244,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
|
||||
if (weight == 0)
|
||||
continue;
|
||||
|
||||
const auto internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
|
||||
bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
|
||||
|
||||
/** in case of internal_replication we will be appending names to
|
||||
* the first element of vector; otherwise we will just .emplace_back
|
||||
@ -252,14 +267,14 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
|
||||
{
|
||||
if (internal_replication)
|
||||
{
|
||||
auto dir_name = addressToDirName(replica_addresses.back());
|
||||
auto dir_name = replica_addresses.back().toStringFull();
|
||||
if (first)
|
||||
dir_names.emplace_back(std::move(dir_name));
|
||||
else
|
||||
dir_names.front() += "," + dir_name;
|
||||
}
|
||||
else
|
||||
dir_names.emplace_back(addressToDirName(replica_addresses.back()));
|
||||
dir_names.emplace_back(replica_addresses.back().toStringFull());
|
||||
|
||||
if (first) first = false;
|
||||
}
|
||||
@ -296,7 +311,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
|
||||
std::move(replicas), settings.load_balancing, settings.connections_with_failover_max_tries);
|
||||
|
||||
slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
|
||||
shards_info.push_back({std::move(dir_names), current_shard_num, weight, shard_local_addresses, shard_pool});
|
||||
shards_info.push_back({std::move(dir_names), current_shard_num, weight, shard_local_addresses, shard_pool, internal_replication});
|
||||
}
|
||||
else
|
||||
throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
|
||||
|
@ -58,6 +58,14 @@ public:
|
||||
|
||||
Address(Poco::Util::AbstractConfiguration & config, const String & config_prefix);
|
||||
Address(const String & host_port_, const String & user_, const String & password_);
|
||||
|
||||
/// Returns escaped 'host_name:port'
|
||||
String toString() const;
|
||||
|
||||
static String toString(const String & host_name, UInt16 port);
|
||||
|
||||
/// Retrurns escaped user:password@resolved_host_address:resolved_host_port#default_database
|
||||
String toStringFull() const;
|
||||
};
|
||||
|
||||
using Addresses = std::vector<Address>;
|
||||
@ -69,14 +77,17 @@ public:
|
||||
bool isLocal() const { return !local_addresses.empty(); }
|
||||
bool hasRemoteConnections() const { return pool != nullptr; }
|
||||
size_t getLocalNodeCount() const { return local_addresses.size(); }
|
||||
bool hasInternalReplication() const { return has_internal_replication; }
|
||||
|
||||
public:
|
||||
/// contains names of directories for asynchronous write to StorageDistributed
|
||||
/// Contains names of directories for asynchronous write to StorageDistributed
|
||||
std::vector<std::string> dir_names;
|
||||
UInt32 shard_num; /// Shard number, starting with 1.
|
||||
/// Number of the shard, the indexation begins with 1
|
||||
UInt32 shard_num;
|
||||
int weight;
|
||||
Addresses local_addresses;
|
||||
ConnectionPoolWithFailoverPtr pool;
|
||||
bool has_internal_replication;
|
||||
};
|
||||
|
||||
using ShardsInfo = std::vector<ShardInfo>;
|
||||
|
@ -120,8 +120,9 @@ struct ContextShared
|
||||
InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
|
||||
BackgroundProcessingPoolPtr background_pool; /// The thread pool for the background work performed by the tables.
|
||||
ReshardingWorkerPtr resharding_worker;
|
||||
Macros macros; /// Substitutions from config. Can be used for parameters of ReplicatedMergeTree.
|
||||
Macros macros; /// Substitutions extracted from config.
|
||||
std::unique_ptr<Compiler> compiler; /// Used for dynamic compilation of queries' parts if it necessary.
|
||||
std::shared_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
|
||||
/// Rules for selecting the compression method, depending on the size of the part.
|
||||
mutable std::unique_ptr<CompressionMethodSelector> compression_method_selector;
|
||||
std::unique_ptr<MergeTreeSettings> merge_tree_settings; /// Settings of MergeTree* engines.
|
||||
@ -1099,6 +1100,22 @@ ReshardingWorker & Context::getReshardingWorker()
|
||||
return *shared->resharding_worker;
|
||||
}
|
||||
|
||||
void Context::setDDLWorker(std::shared_ptr<DDLWorker> ddl_worker)
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (shared->ddl_worker)
|
||||
throw Exception("DDL background thread has already been initialized.", ErrorCodes::LOGICAL_ERROR);
|
||||
shared->ddl_worker = ddl_worker;
|
||||
}
|
||||
|
||||
DDLWorker & Context::getDDLWorker()
|
||||
{
|
||||
auto lock = getLock();
|
||||
if (!shared->ddl_worker)
|
||||
throw Exception("DDL background thread not initialized.", ErrorCodes::LOGICAL_ERROR);
|
||||
return *shared->ddl_worker;
|
||||
}
|
||||
|
||||
void Context::resetCaches() const
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
@ -53,6 +53,7 @@ class PartLog;
|
||||
struct MergeTreeSettings;
|
||||
class IDatabase;
|
||||
class DDLGuard;
|
||||
class DDLWorker;
|
||||
class IStorage;
|
||||
using StoragePtr = std::shared_ptr<IStorage>;
|
||||
using Tables = std::map<String, StoragePtr>;
|
||||
@ -279,6 +280,9 @@ public:
|
||||
void setReshardingWorker(std::shared_ptr<ReshardingWorker> resharding_worker);
|
||||
ReshardingWorker & getReshardingWorker();
|
||||
|
||||
void setDDLWorker(std::shared_ptr<DDLWorker> ddl_worker);
|
||||
DDLWorker & getDDLWorker();
|
||||
|
||||
/** Clear the caches of the uncompressed blocks and marks.
|
||||
* This is usually done when renaming tables, changing the type of columns, deleting a table.
|
||||
* - since caches are linked to file names, and become incorrect.
|
||||
|
726
dbms/src/Interpreters/DDLWorker.cpp
Normal file
726
dbms/src/Interpreters/DDLWorker.cpp
Normal file
@ -0,0 +1,726 @@
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
|
||||
#include <Common/getFQDNOrHostName.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <zkutil/ZooKeeper.h>
|
||||
#include <zkutil/Lock.h>
|
||||
#include <Poco/Timestamp.h>
|
||||
|
||||
#include <experimental/optional>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
extern const int UNKNOWN_FORMAT_VERSION;
|
||||
extern const int INCONSISTENT_TABLE_ACCROSS_SHARDS;
|
||||
extern const int INCONSISTENT_CLUSTER_DEFINITION;
|
||||
extern const int TIMEOUT_EXCEEDED;
|
||||
extern const int UNFINISHED;
|
||||
}
|
||||
|
||||
|
||||
const size_t DDLWorker::node_max_lifetime_seconds = 7 * 24 * 60 * 60; // week
|
||||
const size_t DDLWorker::cleanup_min_period_seconds = 60; // minute
|
||||
|
||||
|
||||
struct DDLLogEntry
|
||||
{
|
||||
String query;
|
||||
Strings hosts;
|
||||
String initiator; // optional
|
||||
|
||||
static constexpr int CURRENT_VERSION = 1;
|
||||
|
||||
String toString()
|
||||
{
|
||||
String res;
|
||||
{
|
||||
WriteBufferFromString wb(res);
|
||||
|
||||
auto version = CURRENT_VERSION;
|
||||
wb << "version: " << version << "\n";
|
||||
wb << "query: " << escape << query << "\n";
|
||||
wb << "hosts: " << hosts << "\n";
|
||||
wb << "initiator: " << initiator << "\n";
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void parse(const String & data)
|
||||
{
|
||||
ReadBufferFromString rb(data);
|
||||
|
||||
int version;
|
||||
rb >> "version: " >> version >> "\n";
|
||||
|
||||
if (version != CURRENT_VERSION)
|
||||
throw Exception("Unknown DDLLogEntry format version: " + DB::toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION);
|
||||
|
||||
rb >> "query: " >> escape >> query >> "\n";
|
||||
rb >> "hosts: " >> hosts >> "\n";
|
||||
|
||||
if (!rb.eof())
|
||||
rb >> "initiator: " >> initiator >> "\n";
|
||||
else
|
||||
initiator.clear();
|
||||
|
||||
assertEOF(rb);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using ShardAndHostNum = std::experimental::optional<std::pair<size_t, size_t>>;
|
||||
static ShardAndHostNum tryGetShardAndHostNum(const Cluster::AddressesWithFailover & cluster, const String & host_name, UInt16 port)
|
||||
{
|
||||
for (size_t shard_num = 0; shard_num < cluster.size(); ++shard_num)
|
||||
{
|
||||
for (size_t host_num = 0; host_num < cluster[shard_num].size(); ++host_num)
|
||||
{
|
||||
const Cluster::Address & address = cluster[shard_num][host_num];
|
||||
if (address.host_name == host_name && address.port == port)
|
||||
return std::make_pair(shard_num, host_num);
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
static bool isSupportedAlterType(int type)
|
||||
{
|
||||
static const std::unordered_set<int> supported_alter_types{
|
||||
ASTAlterQuery::ADD_COLUMN,
|
||||
ASTAlterQuery::DROP_COLUMN,
|
||||
ASTAlterQuery::MODIFY_COLUMN,
|
||||
ASTAlterQuery::MODIFY_PRIMARY_KEY,
|
||||
ASTAlterQuery::DROP_PARTITION
|
||||
};
|
||||
|
||||
return supported_alter_types.count(type);
|
||||
}
|
||||
|
||||
|
||||
DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_)
|
||||
: context(context_)
|
||||
{
|
||||
queue_dir = zk_root_dir;
|
||||
if (queue_dir.back() == '/')
|
||||
queue_dir.resize(queue_dir.size() - 1);
|
||||
|
||||
host_name = getFQDNOrHostName();
|
||||
port = context.getTCPPort();
|
||||
host_id = Cluster::Address::toString(host_name, port);
|
||||
|
||||
event_queue_updated = std::make_shared<Poco::Event>();
|
||||
|
||||
thread = std::thread(&DDLWorker::run, this);
|
||||
}
|
||||
|
||||
|
||||
DDLWorker::~DDLWorker()
|
||||
{
|
||||
stop_flag = true;
|
||||
event_queue_updated->set();
|
||||
thread.join();
|
||||
}
|
||||
|
||||
|
||||
void DDLWorker::processTasks()
|
||||
{
|
||||
LOG_DEBUG(log, "Processing tasks");
|
||||
|
||||
Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, event_queue_updated);
|
||||
if (queue_nodes.empty())
|
||||
return;
|
||||
|
||||
bool server_startup = last_processed_node_name.empty();
|
||||
|
||||
std::sort(queue_nodes.begin(), queue_nodes.end());
|
||||
auto begin_node = server_startup
|
||||
? queue_nodes.begin()
|
||||
: std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_processed_node_name);
|
||||
|
||||
for (auto it = begin_node; it != queue_nodes.end(); ++it)
|
||||
{
|
||||
const String & node_name = *it;
|
||||
String node_path = queue_dir + "/" + node_name;
|
||||
String node_data;
|
||||
|
||||
if (!zookeeper->tryGet(node_path, node_data))
|
||||
{
|
||||
/// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
|
||||
continue;
|
||||
}
|
||||
|
||||
DDLLogEntry node;
|
||||
node.parse(node_data);
|
||||
|
||||
bool host_in_hostlist = std::find(node.hosts.cbegin(), node.hosts.cend(), host_id) != node.hosts.cend();
|
||||
bool already_processed = zookeeper->exists(node_path + "/finished/" + host_id);
|
||||
|
||||
if (!server_startup && already_processed)
|
||||
{
|
||||
throw Exception(
|
||||
"Server expects that DDL node " + node_name + " should be processed, but it was already processed according to ZK",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
if (host_in_hostlist && !already_processed)
|
||||
{
|
||||
try
|
||||
{
|
||||
processTask(node, node_name);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "An error occurred while processing node " + node_name + " (" + node.query + ")");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG(log, "Node " << node_name << " (" << node.query << ") will not be processed");
|
||||
}
|
||||
|
||||
last_processed_node_name = node_name;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool tryExecuteQuery(const String & query, Context & context, ExecutionStatus & status, Logger * log = nullptr)
|
||||
{
|
||||
try
|
||||
{
|
||||
executeQuery(query, context);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
status = ExecutionStatus::fromCurrentException();
|
||||
|
||||
if (log)
|
||||
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
status = ExecutionStatus(0);
|
||||
if (log)
|
||||
LOG_DEBUG(log, "Executed query: " << query);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void DDLWorker::processTask(const DDLLogEntry & node, const std::string & node_name)
|
||||
{
|
||||
LOG_DEBUG(log, "Processing node " << node_name << " (" << node.query << ")");
|
||||
|
||||
String node_path = queue_dir + "/" + node_name;
|
||||
createStatusDirs(node_path);
|
||||
|
||||
bool should_not_execute = current_node == node_name && current_node_was_executed;
|
||||
|
||||
if (!should_not_execute)
|
||||
{
|
||||
current_node = node_name;
|
||||
current_node_was_executed = false;
|
||||
|
||||
zookeeper->create(node_path + "/active/" + host_id, "", zkutil::CreateMode::Ephemeral);
|
||||
|
||||
try
|
||||
{
|
||||
ASTPtr query_ast;
|
||||
{
|
||||
ParserQuery parser_query;
|
||||
String description;
|
||||
IParser::Pos begin = &node.query.front();
|
||||
query_ast = parseQuery(parser_query, begin, begin + node.query.size(), description);
|
||||
}
|
||||
|
||||
const ASTQueryWithOnCluster * query;
|
||||
if (!query_ast || !(query = dynamic_cast<const ASTQueryWithOnCluster *>(query_ast.get())))
|
||||
throw Exception("Recieved unsupported DDL query", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
String cluster_name = query->cluster;
|
||||
auto cluster = context.getCluster(cluster_name);
|
||||
|
||||
auto shard_host_num = tryGetShardAndHostNum(cluster->getShardsWithFailoverAddresses(), host_name, port);
|
||||
if (!shard_host_num)
|
||||
{
|
||||
throw Exception("Cannot find own address (" + host_id + ") in cluster " + cluster_name + " configuration",
|
||||
ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
|
||||
}
|
||||
|
||||
size_t shard_num = shard_host_num->first;
|
||||
size_t host_num = shard_host_num->second;
|
||||
|
||||
const auto & host_address = cluster->getShardsWithFailoverAddresses().at(shard_num).at(host_num);
|
||||
ASTPtr rewritten_ast = query->getRewrittenASTWithoutOnCluster(host_address.default_database);
|
||||
String rewritten_query = queryToString(rewritten_ast);
|
||||
|
||||
LOG_DEBUG(log, "Executing query: " << rewritten_query);
|
||||
|
||||
if (auto query_alter = dynamic_cast<const ASTAlterQuery *>(rewritten_ast.get()))
|
||||
{
|
||||
processTaskAlter(query_alter, rewritten_query, cluster, shard_num, node_path);
|
||||
}
|
||||
else
|
||||
{
|
||||
tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
|
||||
}
|
||||
}
|
||||
catch (const zkutil::KeeperException & e)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
current_node_execution_status = ExecutionStatus::fromCurrentException("An error occured during query preparation");
|
||||
}
|
||||
|
||||
/// We need to distinguish ZK errors occured before and after query executing
|
||||
current_node_was_executed = true;
|
||||
}
|
||||
|
||||
/// Delete active flag and create finish flag
|
||||
zkutil::Ops ops;
|
||||
ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path + "/active/" + host_id, -1));
|
||||
ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/finished/" + host_id,
|
||||
current_node_execution_status.serializeText(), zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
|
||||
|
||||
int code = zookeeper->tryMultiWithRetries(ops);
|
||||
if (code != ZOK && code != ZNONODE)
|
||||
throw zkutil::KeeperException("Cannot commit executed node " + node_name, code);
|
||||
}
|
||||
|
||||
|
||||
void DDLWorker::processTaskAlter(
|
||||
const ASTAlterQuery * query_alter,
|
||||
const String & rewritten_query,
|
||||
const std::shared_ptr<Cluster> & cluster,
|
||||
ssize_t shard_num,
|
||||
const String & node_path)
|
||||
{
|
||||
String database = query_alter->database.empty() ? context.getCurrentDatabase() : query_alter->database;
|
||||
StoragePtr storage = context.getTable(database, query_alter->table);
|
||||
|
||||
bool execute_once_on_replica = storage->supportsReplication();
|
||||
bool execute_on_leader_replica = false;
|
||||
|
||||
for (const auto & param : query_alter->parameters)
|
||||
{
|
||||
if (!isSupportedAlterType(param.type))
|
||||
throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
if (execute_once_on_replica)
|
||||
execute_on_leader_replica |= param.type == ASTAlterQuery::DROP_PARTITION;
|
||||
}
|
||||
|
||||
const auto & shard_info = cluster->getShardsInfo().at(shard_num);
|
||||
bool config_is_replicated_shard = shard_info.hasInternalReplication();
|
||||
|
||||
if (execute_once_on_replica && !config_is_replicated_shard)
|
||||
{
|
||||
throw Exception("Table " + query_alter->table + " is replicated, but shard #" + toString(shard_num + 1) +
|
||||
" isn't replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
|
||||
}
|
||||
else if (!execute_once_on_replica && config_is_replicated_shard)
|
||||
{
|
||||
throw Exception("Table " + query_alter->table + " isn't replicated, but shard #" + toString(shard_num + 1) +
|
||||
" replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
|
||||
}
|
||||
|
||||
if (execute_once_on_replica)
|
||||
{
|
||||
/// The following code may perform ALTER twice if
|
||||
/// current secver aquires lock, executes replicated alter,
|
||||
/// losts zookeeper connection and doesn't have time to create /executed node, second server executes replicated alter again
|
||||
/// To avoid this problem alter() method of replicated tables should be changed and takes into account ddl query id tag.
|
||||
if (!context.getSettingsRef().distributed_ddl_allow_replicated_alter)
|
||||
throw Exception("Distributed DDL alters don't work properly yet", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
Strings replica_names;
|
||||
for (const auto & address : cluster->getShardsWithFailoverAddresses().at(shard_num))
|
||||
replica_names.emplace_back(address.toString());
|
||||
std::sort(replica_names.begin(), replica_names.end());
|
||||
|
||||
String shard_dir_name;
|
||||
for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
|
||||
shard_dir_name += *it + (std::next(it) != replica_names.end() ? "," : "");
|
||||
|
||||
String shard_path = node_path + "/shards/" + shard_dir_name;
|
||||
String is_executed_path = shard_path + "/executed";
|
||||
zookeeper->createAncestors(shard_path + "/");
|
||||
|
||||
bool alter_executed_by_replica = false;
|
||||
{
|
||||
auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
|
||||
zookeeper_holder->initFromInstance(zookeeper);
|
||||
|
||||
zkutil::Lock lock(zookeeper_holder, shard_path, "lock", host_id);
|
||||
std::mt19937 rng(std::hash<String>{}(host_id) + reinterpret_cast<intptr_t>(&rng));
|
||||
|
||||
for (int num_tries = 0; num_tries < 10; ++num_tries)
|
||||
{
|
||||
if (zookeeper->exists(is_executed_path))
|
||||
{
|
||||
alter_executed_by_replica = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (lock.tryLock())
|
||||
{
|
||||
tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
|
||||
|
||||
if (execute_on_leader_replica && current_node_execution_status.code == ErrorCodes::NOT_IMPLEMENTED)
|
||||
{
|
||||
/// TODO: it is ok to recieve exception "host is not leader"
|
||||
}
|
||||
|
||||
zookeeper->create(is_executed_path, host_id, zkutil::CreateMode::Persistent);
|
||||
lock.unlock();
|
||||
alter_executed_by_replica = true;
|
||||
break;
|
||||
}
|
||||
|
||||
std::this_thread::sleep_for(std::chrono::duration<double>(std::uniform_real_distribution<double>(0, 1)(rng)));
|
||||
}
|
||||
}
|
||||
|
||||
if (!alter_executed_by_replica)
|
||||
current_node_execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED, "Cannot enqueue replicated DDL query");
|
||||
}
|
||||
else
|
||||
{
|
||||
tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
|
||||
{
|
||||
/// Both ZK and Poco use Unix epoch
|
||||
size_t current_time_seconds = Poco::Timestamp().epochTime();
|
||||
constexpr size_t zookeeper_time_resolution = 1000;
|
||||
|
||||
// Too early to check
|
||||
if (last_cleanup_time_seconds && current_time_seconds < last_cleanup_time_seconds + cleanup_min_period_seconds)
|
||||
return;
|
||||
|
||||
last_cleanup_time_seconds = current_time_seconds;
|
||||
|
||||
LOG_DEBUG(log, "Cleaning queue");
|
||||
|
||||
String data;
|
||||
zkutil::Stat stat;
|
||||
DDLLogEntry node;
|
||||
|
||||
Strings node_names_fetched = node_names_to_check ? Strings{} : zookeeper->getChildren(queue_dir);
|
||||
const Strings & node_names = (node_names_to_check) ? *node_names_to_check : node_names_fetched;
|
||||
|
||||
for (const String & node_name : node_names)
|
||||
{
|
||||
try
|
||||
{
|
||||
String node_path = queue_dir + "/" + node_name;
|
||||
if (!zookeeper->tryGet(node_path, data, &stat))
|
||||
continue;
|
||||
|
||||
/// TODO: Add shared lock to avoid rare race counditions.
|
||||
|
||||
size_t zookeeper_time_seconds = stat.mtime / zookeeper_time_resolution;
|
||||
if (zookeeper_time_seconds + node_max_lifetime_seconds < current_time_seconds)
|
||||
{
|
||||
size_t lifetime_seconds = current_time_seconds - zookeeper_time_seconds;
|
||||
LOG_INFO(log, "Lifetime of node " << node_name << " (" << lifetime_seconds << " sec.) is expired, deleting it");
|
||||
zookeeper->removeRecursive(node_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
Strings finished_nodes = zookeeper->getChildren(node_path + "/finished");
|
||||
node.parse(data);
|
||||
|
||||
if (finished_nodes.size() >= node.hosts.size())
|
||||
{
|
||||
LOG_INFO(log, "Node " << node_name << " had been executed by each host, deleting it");
|
||||
zookeeper->removeRecursive(node_path);
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, "An error occured while checking and cleaning node " + node_name + " from queue");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Try to create unexisting "status" dirs for a node
|
||||
void DDLWorker::createStatusDirs(const std::string & node_path)
|
||||
{
|
||||
zkutil::Ops ops;
|
||||
auto acl = zookeeper->getDefaultACL();
|
||||
ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/active", "", acl, zkutil::CreateMode::Persistent));
|
||||
ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/finished", "", acl, zkutil::CreateMode::Persistent));
|
||||
|
||||
int code = zookeeper->tryMulti(ops);
|
||||
if (code != ZOK && code != ZNODEEXISTS)
|
||||
throw zkutil::KeeperException(code);
|
||||
}
|
||||
|
||||
|
||||
String DDLWorker::enqueueQuery(DDLLogEntry & entry)
|
||||
{
|
||||
if (entry.hosts.empty())
|
||||
return {};
|
||||
|
||||
String query_path_prefix = queue_dir + "/query-";
|
||||
zookeeper->createAncestors(query_path_prefix);
|
||||
|
||||
String node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
|
||||
createStatusDirs(node_path);
|
||||
|
||||
return node_path;
|
||||
}
|
||||
|
||||
|
||||
void DDLWorker::run()
|
||||
{
|
||||
setThreadName("DDLWorker");
|
||||
LOG_DEBUG(log, "Started DDLWorker thread");
|
||||
|
||||
zookeeper = context.getZooKeeper();
|
||||
zookeeper->createAncestors(queue_dir + "/");
|
||||
|
||||
while (!stop_flag)
|
||||
{
|
||||
try
|
||||
{
|
||||
processTasks();
|
||||
|
||||
LOG_DEBUG(log, "Waiting watch");
|
||||
event_queue_updated->wait();
|
||||
|
||||
if (stop_flag)
|
||||
break;
|
||||
|
||||
cleanupQueue();
|
||||
}
|
||||
catch (zkutil::KeeperException &)
|
||||
{
|
||||
LOG_DEBUG(log, "Recovering ZooKeeper session");
|
||||
zookeeper = context.getZooKeeper();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class DDLQueryStatusInputSream : public IProfilingBlockInputStream
|
||||
{
|
||||
public:
|
||||
|
||||
DDLQueryStatusInputSream(const String & zk_node_path, Context & context, size_t num_hosts)
|
||||
: node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE)
|
||||
{
|
||||
sample = Block{
|
||||
{std::make_shared<DataTypeString>(), "host"},
|
||||
{std::make_shared<DataTypeUInt64>(), "status"},
|
||||
{std::make_shared<DataTypeString>(), "error"},
|
||||
{std::make_shared<DataTypeUInt64>(), "num_hosts_remaining"},
|
||||
{std::make_shared<DataTypeUInt64>(), "num_hosts_active"},
|
||||
};
|
||||
|
||||
setTotalRowsApprox(num_hosts);
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "DDLQueryStatusInputSream";
|
||||
}
|
||||
|
||||
String getID() const override
|
||||
{
|
||||
return "DDLQueryStatusInputSream(" + node_path + ")";
|
||||
}
|
||||
|
||||
static constexpr size_t timeout_seconds = 120;
|
||||
|
||||
Block readImpl() override
|
||||
{
|
||||
Block res;
|
||||
if (num_hosts_finished >= total_rows_approx)
|
||||
return res;
|
||||
|
||||
auto zookeeper = context.getZooKeeper();
|
||||
size_t try_number = 0;
|
||||
|
||||
while(res.rows() == 0)
|
||||
{
|
||||
if (is_cancelled)
|
||||
return res;
|
||||
|
||||
auto elapsed_seconds = watch.elapsedSeconds();
|
||||
if (elapsed_seconds > timeout_seconds)
|
||||
throw Exception("Watching query is executing too long (" + toString(std::round(elapsed_seconds)) + " sec.)", ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
|
||||
if (num_hosts_finished != 0 || try_number != 0)
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(50 * std::min(20LU, try_number + 1)));
|
||||
|
||||
/// TODO: add shared lock
|
||||
if (!zookeeper->exists(node_path))
|
||||
{
|
||||
throw Exception("Cannot provide query execution status. The query's node " + node_path
|
||||
+ " had been deleted by cleaner since it was finished (or its lifetime is expired)",
|
||||
ErrorCodes::UNFINISHED);
|
||||
}
|
||||
|
||||
Strings new_hosts = getNewAndUpdate(finished_hosts_set, getChildrenAllowNoNode(zookeeper, node_path + "/finished"));
|
||||
++try_number;
|
||||
if (new_hosts.empty())
|
||||
continue;
|
||||
|
||||
Strings cur_active_hosts = getChildrenAllowNoNode(zookeeper, node_path + "/active");
|
||||
|
||||
res = sample.cloneEmpty();
|
||||
for (const String & host : new_hosts)
|
||||
{
|
||||
ExecutionStatus status(1, "Cannot obtain error message");
|
||||
{
|
||||
String status_data;
|
||||
if (zookeeper->tryGet(node_path + "/finished/" + host, status_data))
|
||||
status.deserializeText(status_data);
|
||||
}
|
||||
|
||||
res.getByName("host").column->insert(host);
|
||||
res.getByName("status").column->insert(static_cast<UInt64>(status.code));
|
||||
res.getByName("error").column->insert(status.message);
|
||||
res.getByName("num_hosts_remaining").column->insert(total_rows_approx - (++num_hosts_finished));
|
||||
res.getByName("num_hosts_active").column->insert(cur_active_hosts.size());
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
|
||||
{
|
||||
Strings res;
|
||||
int code = zookeeper->tryGetChildren(node_path, res);
|
||||
if (code != ZOK && code != ZNONODE)
|
||||
throw zkutil::KeeperException(code, node_path);
|
||||
return res;
|
||||
}
|
||||
|
||||
static Strings getNewAndUpdate(NameSet & prev, const Strings & cur_list)
|
||||
{
|
||||
Strings diff;
|
||||
for (const String & elem : cur_list)
|
||||
{
|
||||
if (!prev.count(elem))
|
||||
{
|
||||
diff.emplace_back(elem);
|
||||
prev.emplace(elem);
|
||||
}
|
||||
}
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
~DDLQueryStatusInputSream() override = default;
|
||||
|
||||
Block sample;
|
||||
|
||||
private:
|
||||
String node_path;
|
||||
Context & context;
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
NameSet finished_hosts_set;
|
||||
size_t num_hosts_finished = 0;
|
||||
};
|
||||
|
||||
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
|
||||
{
|
||||
const auto query = dynamic_cast<const ASTQueryWithOnCluster *>(query_ptr.get());
|
||||
if (!query)
|
||||
{
|
||||
throw Exception("Distributed execution is not supported for such DDL queries",
|
||||
ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
auto query_alter = dynamic_cast<const ASTAlterQuery *>(query_ptr.get());
|
||||
if (query_alter)
|
||||
{
|
||||
for (const auto & param : query_alter->parameters)
|
||||
{
|
||||
if (!isSupportedAlterType(param.type))
|
||||
throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
}
|
||||
|
||||
ClusterPtr cluster = context.getCluster(query->cluster);
|
||||
DDLWorker & ddl_worker = context.getDDLWorker();
|
||||
|
||||
DDLLogEntry entry;
|
||||
entry.query = queryToString(query_ptr);
|
||||
entry.initiator = ddl_worker.getHostName();
|
||||
|
||||
Cluster::AddressesWithFailover shards = cluster->getShardsWithFailoverAddresses();
|
||||
for (const auto & shard : shards)
|
||||
{
|
||||
for (const auto & addr : shard)
|
||||
entry.hosts.emplace_back(addr.toString());
|
||||
}
|
||||
|
||||
String node_path = ddl_worker.enqueueQuery(entry);
|
||||
|
||||
BlockIO io;
|
||||
if (node_path.empty())
|
||||
return io;
|
||||
|
||||
auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, context, entry.hosts.size());
|
||||
io.in_sample = stream->sample.cloneEmpty();
|
||||
io.in = std::move(stream);
|
||||
return io;
|
||||
}
|
||||
|
||||
|
||||
}
|
93
dbms/src/Interpreters/DDLWorker.h
Normal file
93
dbms/src/Interpreters/DDLWorker.h
Normal file
@ -0,0 +1,93 @@
|
||||
#pragma once
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTAlterQuery;
|
||||
struct DDLLogEntry;
|
||||
|
||||
|
||||
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context);
|
||||
|
||||
|
||||
class DDLWorker
|
||||
{
|
||||
public:
|
||||
DDLWorker(const std::string & zk_root_dir, Context & context_);
|
||||
~DDLWorker();
|
||||
|
||||
/// Pushes query into DDL queue, returns path to created node
|
||||
String enqueueQuery(DDLLogEntry & entry);
|
||||
|
||||
std::string getHostName() const
|
||||
{
|
||||
return host_id;
|
||||
}
|
||||
|
||||
private:
|
||||
void processTasks();
|
||||
|
||||
void processTask(const DDLLogEntry & node, const std::string & node_path);
|
||||
|
||||
void processTaskAlter(
|
||||
const ASTAlterQuery * query_alter,
|
||||
const String & rewritten_query,
|
||||
const std::shared_ptr<Cluster> & cluster,
|
||||
ssize_t shard_num,
|
||||
const String & node_path);
|
||||
|
||||
/// Checks and cleanups queue's nodes
|
||||
void cleanupQueue(const Strings * node_names_to_check = nullptr);
|
||||
|
||||
void createStatusDirs(const std::string & node_name);
|
||||
ASTPtr getRewrittenQuery(const DDLLogEntry & node);
|
||||
|
||||
void run();
|
||||
|
||||
private:
|
||||
Context & context;
|
||||
Logger * log = &Logger::get("DDLWorker");
|
||||
|
||||
std::string host_id; /// host_name:port
|
||||
std::string host_name;
|
||||
UInt16 port;
|
||||
|
||||
std::string queue_dir; /// dir with queue of queries
|
||||
std::string master_dir; /// dir with queries was initiated by the server
|
||||
|
||||
/// Used to omit already processed nodes. Maybe usage of set is more obvious.
|
||||
std::string last_processed_node_name;
|
||||
|
||||
std::shared_ptr<zkutil::ZooKeeper> zookeeper;
|
||||
|
||||
/// Save state of executed task to avoid duplicate execution on ZK error
|
||||
std::string current_node = {};
|
||||
bool current_node_was_executed = false;
|
||||
ExecutionStatus current_node_execution_status;
|
||||
|
||||
std::shared_ptr<Poco::Event> event_queue_updated;
|
||||
std::atomic<bool> stop_flag{false};
|
||||
std::thread thread;
|
||||
|
||||
size_t last_cleanup_time_seconds = 0;
|
||||
|
||||
/// Delete node if its age is greater than that
|
||||
static const size_t node_max_lifetime_seconds;
|
||||
/// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
|
||||
static const size_t cleanup_min_period_seconds;
|
||||
|
||||
friend class DDLQueryStatusInputSream;
|
||||
};
|
||||
|
||||
|
||||
}
|
@ -209,9 +209,6 @@ void InJoinSubqueriesPreprocessor::process(ASTSelectQuery * query) const
|
||||
|
||||
bool InJoinSubqueriesPreprocessor::hasAtLeastTwoShards(const IStorage & table) const
|
||||
{
|
||||
if (!table.isRemote())
|
||||
return false;
|
||||
|
||||
const StorageDistributed * distributed = typeid_cast<const StorageDistributed *>(&table);
|
||||
if (!distributed)
|
||||
return false;
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Interpreters/InterpreterAlterQuery.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
#include <Parsers/ASTAlterQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
@ -39,6 +40,10 @@ InterpreterAlterQuery::InterpreterAlterQuery(const ASTPtr & query_ptr_, const Co
|
||||
BlockIO InterpreterAlterQuery::execute()
|
||||
{
|
||||
auto & alter = typeid_cast<ASTAlterQuery &>(*query_ptr);
|
||||
|
||||
if (!alter.cluster.empty())
|
||||
return executeDDLQueryOnCluster(query_ptr, context);
|
||||
|
||||
const String & table_name = alter.table;
|
||||
String database_name = alter.database.empty() ? context.getCurrentDatabase() : alter.database;
|
||||
StoragePtr table = context.getTable(database_name, table_name);
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
#include <Storages/StorageFactory.h>
|
||||
#include <Storages/StorageLog.h>
|
||||
@ -31,6 +32,7 @@
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
@ -39,6 +41,7 @@
|
||||
#include <Databases/DatabaseFactory.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
|
||||
#include <zkutil/ZooKeeper.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -63,12 +66,15 @@ InterpreterCreateQuery::InterpreterCreateQuery(const ASTPtr & query_ptr_, Contex
|
||||
}
|
||||
|
||||
|
||||
void InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
|
||||
BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
|
||||
{
|
||||
if (!create.cluster.empty())
|
||||
return executeDDLQueryOnCluster(query_ptr, context);
|
||||
|
||||
String database_name = create.database;
|
||||
|
||||
if (create.if_not_exists && context.isDatabaseExist(database_name))
|
||||
return;
|
||||
return {};
|
||||
|
||||
String database_engine_name;
|
||||
if (!create.storage)
|
||||
@ -147,6 +153,8 @@ void InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
|
||||
|
||||
throw;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
@ -460,6 +468,9 @@ String InterpreterCreateQuery::setEngine(
|
||||
|
||||
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
{
|
||||
if (!create.cluster.empty())
|
||||
return executeDDLQueryOnCluster(query_ptr, context);
|
||||
|
||||
String path = context.getPath();
|
||||
String current_database = context.getCurrentDatabase();
|
||||
|
||||
@ -572,8 +583,7 @@ BlockIO InterpreterCreateQuery::execute()
|
||||
/// CREATE|ATTACH DATABASE
|
||||
if (!create.database.empty() && create.table.empty())
|
||||
{
|
||||
createDatabase(create);
|
||||
return {};
|
||||
return createDatabase(create);
|
||||
}
|
||||
else
|
||||
return createTable(create);
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
static ColumnsInfo getColumnsInfo(const ASTPtr & columns, const Context & context);
|
||||
|
||||
private:
|
||||
void createDatabase(ASTCreateQuery & create);
|
||||
BlockIO createDatabase(ASTCreateQuery & create);
|
||||
BlockIO createTable(ASTCreateQuery & create);
|
||||
|
||||
/// Calculate list of columns of table and return it.
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Interpreters/InterpreterDropQuery.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Databases/IDatabase.h>
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -27,11 +28,14 @@ InterpreterDropQuery::InterpreterDropQuery(const ASTPtr & query_ptr_, Context &
|
||||
|
||||
BlockIO InterpreterDropQuery::execute()
|
||||
{
|
||||
ASTDropQuery & drop = typeid_cast<ASTDropQuery &>(*query_ptr);
|
||||
|
||||
if (!drop.cluster.empty())
|
||||
return executeDDLQueryOnCluster(query_ptr, context);
|
||||
|
||||
String path = context.getPath();
|
||||
String current_database = context.getCurrentDatabase();
|
||||
|
||||
ASTDropQuery & drop = typeid_cast<ASTDropQuery &>(*query_ptr);
|
||||
|
||||
bool drop_database = drop.table.empty() && !drop.database.empty();
|
||||
|
||||
if (drop_database && drop.detach)
|
||||
@ -85,6 +89,13 @@ BlockIO InterpreterDropQuery::execute()
|
||||
|
||||
for (auto & table : tables_to_drop)
|
||||
{
|
||||
if (!drop.detach)
|
||||
{
|
||||
if (!table.first->checkTableCanBeDropped())
|
||||
throw Exception("Table " + database_name + "." + table.first->getTableName() + " couldn't be dropped due to failed pre-drop check",
|
||||
ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
}
|
||||
|
||||
table.first->shutdown();
|
||||
|
||||
/// If table was already dropped by anyone, an exception will be thrown
|
||||
@ -99,10 +110,6 @@ BlockIO InterpreterDropQuery::execute()
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!table.first->checkTableCanBeDropped())
|
||||
throw Exception("Table " + database_name + "." + current_table_name + " couldn't be dropped due to failed pre-drop check",
|
||||
ErrorCodes::TABLE_WAS_NOT_DROPPED);
|
||||
|
||||
/// Delete table metdata and table itself from memory
|
||||
database->removeTable(current_table_name);
|
||||
/// Delete table data
|
||||
|
@ -18,7 +18,7 @@ class InterpreterDropQuery : public IInterpreter
|
||||
public:
|
||||
InterpreterDropQuery(const ASTPtr & query_ptr_, Context & context_);
|
||||
|
||||
/// Drop table.
|
||||
/// Drop table or database.
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InterpreterRenameQuery.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
#include <Interpreters/DDLWorker.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -35,11 +35,14 @@ struct RenameDescription
|
||||
|
||||
BlockIO InterpreterRenameQuery::execute()
|
||||
{
|
||||
ASTRenameQuery & rename = typeid_cast<ASTRenameQuery &>(*query_ptr);
|
||||
|
||||
if (!rename.cluster.empty())
|
||||
return executeDDLQueryOnCluster(query_ptr, context);
|
||||
|
||||
String path = context.getPath();
|
||||
String current_database = context.getCurrentDatabase();
|
||||
|
||||
ASTRenameQuery & rename = typeid_cast<ASTRenameQuery &>(*query_ptr);
|
||||
|
||||
/** In case of error while renaming, it is possible that only part of tables was renamed
|
||||
* or we will be in inconsistent state. (It is worth to be fixed.)
|
||||
*/
|
||||
|
@ -838,24 +838,13 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns()
|
||||
if (max_streams > 1 && !is_remote)
|
||||
max_streams *= settings.max_streams_to_max_threads_ratio;
|
||||
|
||||
ASTPtr actual_query_ptr;
|
||||
if (storage->isRemote())
|
||||
{
|
||||
/// In case of a remote query, we send only SELECT, which will be executed.
|
||||
actual_query_ptr = query.cloneFirstSelect();
|
||||
}
|
||||
else
|
||||
actual_query_ptr = query_ptr;
|
||||
|
||||
/// PREWHERE optimization
|
||||
{
|
||||
auto optimize_prewhere = [&](auto & merge_tree)
|
||||
{
|
||||
const ASTSelectQuery & actual_select = typeid_cast<const ASTSelectQuery &>(*actual_query_ptr);
|
||||
|
||||
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
|
||||
if (settings.optimize_move_to_prewhere && actual_select.where_expression && !actual_select.prewhere_expression && !actual_select.final())
|
||||
MergeTreeWhereOptimizer{actual_query_ptr, context, merge_tree.getData(), required_columns, log};
|
||||
if (settings.optimize_move_to_prewhere && query.where_expression && !query.prewhere_expression && !query.final())
|
||||
MergeTreeWhereOptimizer{query_ptr, context, merge_tree.getData(), required_columns, log};
|
||||
};
|
||||
|
||||
if (const StorageMergeTree * merge_tree = typeid_cast<const StorageMergeTree *>(storage.get()))
|
||||
@ -864,8 +853,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns()
|
||||
optimize_prewhere(*merge_tree);
|
||||
}
|
||||
|
||||
streams = storage->read(required_columns, actual_query_ptr,
|
||||
context, from_stage, max_block_size, max_streams);
|
||||
streams = storage->read(required_columns, query_ptr, context, from_stage, max_block_size, max_streams);
|
||||
|
||||
if (alias_actions)
|
||||
{
|
||||
@ -1316,11 +1304,6 @@ void InterpreterSelectQuery::executeLimit()
|
||||
|
||||
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(SubqueriesForSets & subqueries_for_sets)
|
||||
{
|
||||
/// If the query is not distributed, then remove the creation of temporary tables from subqueries (intended for sending to remote servers).
|
||||
if (!(storage && storage->isRemote()))
|
||||
for (auto & elem : subqueries_for_sets)
|
||||
elem.second.table.reset();
|
||||
|
||||
const Settings & settings = context.getSettingsRef();
|
||||
|
||||
executeUnion();
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
|
@ -275,7 +275,9 @@ struct Settings
|
||||
/** Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. \
|
||||
* If this setting is enabled, the query will be performed anyway, otherwise the error will be reported. \
|
||||
*/ \
|
||||
M(SettingBool, fallback_to_stale_replicas_for_distributed_queries, 1)
|
||||
M(SettingBool, fallback_to_stale_replicas_for_distributed_queries, 1) \
|
||||
/** For development and testing purposes only still */ \
|
||||
M(SettingBool, distributed_ddl_allow_replicated_alter, 0)
|
||||
|
||||
|
||||
/// Possible limits for query execution.
|
||||
|
@ -556,14 +556,6 @@ struct SettingCompressionMethod
|
||||
|
||||
static CompressionMethod getCompressionMethod(const String & s)
|
||||
{
|
||||
if (s == "quicklz")
|
||||
{
|
||||
#ifdef USE_QUICKLZ
|
||||
return CompressionMethod::QuickLZ;
|
||||
#else
|
||||
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
#endif
|
||||
}
|
||||
if (s == "lz4")
|
||||
return CompressionMethod::LZ4;
|
||||
if (s == "lz4hc")
|
||||
@ -571,14 +563,14 @@ struct SettingCompressionMethod
|
||||
if (s == "zstd")
|
||||
return CompressionMethod::ZSTD;
|
||||
|
||||
throw Exception("Unknown compression method: '" + s + "', must be one of 'quicklz', 'lz4', 'lz4hc', 'zstd'", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
throw Exception("Unknown compression method: '" + s + "', must be one of 'lz4', 'lz4hc', 'zstd'", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
}
|
||||
|
||||
String toString() const
|
||||
{
|
||||
const char * strings[] = { "quicklz", "lz4", "lz4hc", "zstd" };
|
||||
const char * strings[] = { nullptr, "lz4", "lz4hc", "zstd" };
|
||||
|
||||
if (value < CompressionMethod::QuickLZ || value > CompressionMethod::ZSTD)
|
||||
if (value < CompressionMethod::LZ4 || value > CompressionMethod::ZSTD)
|
||||
throw Exception("Unknown compression method", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
||||
|
||||
return strings[static_cast<size_t>(value)];
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <Functions/DataTypeTraits.h>
|
||||
#include <DataTypes/DataTypeTraits.h>
|
||||
|
||||
#include <Core/AccurateComparison.h>
|
||||
#include <Core/FieldVisitors.h>
|
||||
|
@ -15,13 +15,13 @@ ASTAlterQuery::Parameters::Parameters() : type(NO_TYPE) {}
|
||||
void ASTAlterQuery::Parameters::clone(Parameters & p) const
|
||||
{
|
||||
p = *this;
|
||||
if (col_decl) p.col_decl = col_decl->clone();
|
||||
if (column) p.column = column->clone();
|
||||
if (partition) p.partition = partition->clone();
|
||||
if (last_partition) p.last_partition = last_partition->clone();
|
||||
if (col_decl) p.col_decl = col_decl->clone();
|
||||
if (column) p.column = column->clone();
|
||||
if (partition) p.partition = partition->clone();
|
||||
if (last_partition) p.last_partition = last_partition->clone();
|
||||
if (weighted_zookeeper_paths) p.weighted_zookeeper_paths = weighted_zookeeper_paths->clone();
|
||||
if (sharding_key_expr) p.sharding_key_expr = sharding_key_expr->clone();
|
||||
if (coordinator) p.coordinator = coordinator->clone();
|
||||
if (sharding_key_expr) p.sharding_key_expr = sharding_key_expr->clone();
|
||||
if (coordinator) p.coordinator = coordinator->clone();
|
||||
}
|
||||
|
||||
void ASTAlterQuery::addParameters(const Parameters & params)
|
||||
@ -63,6 +63,18 @@ ASTPtr ASTAlterQuery::clone() const
|
||||
return res;
|
||||
}
|
||||
|
||||
ASTPtr ASTAlterQuery::getRewrittenASTWithoutOnCluster(const std::string & new_database) const
|
||||
{
|
||||
auto query_ptr = clone();
|
||||
ASTAlterQuery & query = static_cast<ASTAlterQuery &>(*query_ptr);
|
||||
|
||||
query.cluster.clear();
|
||||
if (query.database.empty())
|
||||
query.database = new_database;
|
||||
|
||||
return query_ptr;
|
||||
}
|
||||
|
||||
void ASTAlterQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
|
||||
{
|
||||
frame.need_parens = false;
|
||||
@ -80,6 +92,7 @@ void ASTAlterQuery::formatImpl(const FormatSettings & settings, FormatState & st
|
||||
}
|
||||
settings.ostr << indent_str << backQuoteIfNeed(table);
|
||||
}
|
||||
formatOnCluster(settings);
|
||||
settings.ostr << settings.nl_or_ws;
|
||||
|
||||
for (size_t i = 0; i < parameters.size(); ++i)
|
||||
|
@ -1,24 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** ALTER query
|
||||
/** ALTER query:
|
||||
* ALTER TABLE [db.]name_type
|
||||
* ADD COLUMN col_name type [AFTER col_after],
|
||||
* DROP COLUMN col_drop [FROM PARTITION partition],
|
||||
* MODIFY COLUMN col_name type,
|
||||
* DROP PARTITION partition
|
||||
* RESHARD [COPY] PARTITION partition
|
||||
* TO '/path/to/zookeeper/table' [WEIGHT w], ...
|
||||
* USING expression
|
||||
* [COORDINATE WITH 'coordinator_id']
|
||||
* DROP COLUMN col_drop [FROM PARTITION partition],
|
||||
* MODIFY COLUMN col_name type,
|
||||
* DROP PARTITION partition,
|
||||
* RESHARD [COPY] PARTITION partition
|
||||
* TO '/path/to/zookeeper/table' [WEIGHT w], ...
|
||||
* USING expression
|
||||
* [COORDINATE WITH 'coordinator_id']
|
||||
*/
|
||||
|
||||
class ASTAlterQuery : public IAST
|
||||
class ASTAlterQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
enum ParameterType
|
||||
@ -101,6 +102,8 @@ public:
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const override;
|
||||
|
||||
protected:
|
||||
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
|
||||
};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user