Merge master

This commit is contained in:
Ivan Blinkov 2017-06-16 17:12:16 +03:00
commit 35ab56ac95
260 changed files with 6179 additions and 1034 deletions

4
.gitignore vendored
View File

@ -33,6 +33,10 @@ CTestTestfile.cmake
*.a
*.o
# Python cache
*.pyc
__pycache__
# ignore generated files
*-metrika-yandex

View File

@ -0,0 +1,150 @@
// (C) Copyright John Maddock 2000.
// Use, modification and distribution are subject to the Boost Software License,
// Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt).
//
// See http://www.boost.org/libs/type_traits for most recent version including documentation.
// See boost/type_traits/*.hpp for full copyright notices.
#ifndef BOOST_TYPE_TRAITS_HPP
#define BOOST_TYPE_TRAITS_HPP
#include <boost/type_traits/add_const.hpp>
#include <boost/type_traits/add_cv.hpp>
#include <boost/type_traits/add_lvalue_reference.hpp>
#include <boost/type_traits/add_pointer.hpp>
#include <boost/type_traits/add_reference.hpp>
#include <boost/type_traits/add_rvalue_reference.hpp>
#include <boost/type_traits/add_volatile.hpp>
#include <boost/type_traits/aligned_storage.hpp>
#include <boost/type_traits/alignment_of.hpp>
#include <boost/type_traits/common_type.hpp>
#include <boost/type_traits/conditional.hpp>
#include <boost/type_traits/copy_cv.hpp>
#include <boost/type_traits/decay.hpp>
#include <boost/type_traits/declval.hpp>
#include <boost/type_traits/extent.hpp>
#include <boost/type_traits/floating_point_promotion.hpp>
#include <boost/type_traits/function_traits.hpp>
#include <boost/type_traits/has_bit_and.hpp>
#include <boost/type_traits/has_bit_and_assign.hpp>
#include <boost/type_traits/has_bit_or.hpp>
#include <boost/type_traits/has_bit_or_assign.hpp>
#include <boost/type_traits/has_bit_xor.hpp>
#include <boost/type_traits/has_bit_xor_assign.hpp>
#include <boost/type_traits/has_complement.hpp>
#include <boost/type_traits/has_dereference.hpp>
#include <boost/type_traits/has_divides.hpp>
#include <boost/type_traits/has_divides_assign.hpp>
#include <boost/type_traits/has_equal_to.hpp>
#include <boost/type_traits/has_greater.hpp>
#include <boost/type_traits/has_greater_equal.hpp>
#include <boost/type_traits/has_left_shift.hpp>
#include <boost/type_traits/has_left_shift_assign.hpp>
#include <boost/type_traits/has_less.hpp>
#include <boost/type_traits/has_less_equal.hpp>
#include <boost/type_traits/has_logical_and.hpp>
#include <boost/type_traits/has_logical_not.hpp>
#include <boost/type_traits/has_logical_or.hpp>
#include <boost/type_traits/has_minus.hpp>
#include <boost/type_traits/has_minus_assign.hpp>
#include <boost/type_traits/has_modulus.hpp>
#include <boost/type_traits/has_modulus_assign.hpp>
#include <boost/type_traits/has_multiplies.hpp>
#include <boost/type_traits/has_multiplies_assign.hpp>
#include <boost/type_traits/has_negate.hpp>
#if !defined(__BORLANDC__) && !defined(__CUDACC__)
#include <boost/type_traits/has_new_operator.hpp>
#endif
#include <boost/type_traits/has_not_equal_to.hpp>
#include <boost/type_traits/has_nothrow_assign.hpp>
#include <boost/type_traits/has_nothrow_constructor.hpp>
#include <boost/type_traits/has_nothrow_copy.hpp>
#include <boost/type_traits/has_nothrow_destructor.hpp>
#include <boost/type_traits/has_plus.hpp>
#include <boost/type_traits/has_plus_assign.hpp>
#include <boost/type_traits/has_post_decrement.hpp>
#include <boost/type_traits/has_post_increment.hpp>
#include <boost/type_traits/has_pre_decrement.hpp>
#include <boost/type_traits/has_pre_increment.hpp>
#include <boost/type_traits/has_right_shift.hpp>
#include <boost/type_traits/has_right_shift_assign.hpp>
#include <boost/type_traits/has_trivial_assign.hpp>
#include <boost/type_traits/has_trivial_constructor.hpp>
#include <boost/type_traits/has_trivial_copy.hpp>
#include <boost/type_traits/has_trivial_destructor.hpp>
#include <boost/type_traits/has_trivial_move_assign.hpp>
#include <boost/type_traits/has_trivial_move_constructor.hpp>
#include <boost/type_traits/has_unary_minus.hpp>
#include <boost/type_traits/has_unary_plus.hpp>
#include <boost/type_traits/has_virtual_destructor.hpp>
#include <boost/type_traits/integral_constant.hpp>
#include <boost/type_traits/is_abstract.hpp>
#include <boost/type_traits/is_arithmetic.hpp>
#include <boost/type_traits/is_array.hpp>
#include <boost/type_traits/is_assignable.hpp>
#include <boost/type_traits/is_base_and_derived.hpp>
#include <boost/type_traits/is_base_of.hpp>
#include <boost/type_traits/is_class.hpp>
#include <boost/type_traits/is_complex.hpp>
#include <boost/type_traits/is_compound.hpp>
#include <boost/type_traits/is_const.hpp>
#include <boost/type_traits/is_constructible.hpp>
#include <boost/type_traits/is_convertible.hpp>
#include <boost/type_traits/is_copy_assignable.hpp>
#include <boost/type_traits/is_copy_constructible.hpp>
#include <boost/type_traits/is_default_constructible.hpp>
#include <boost/type_traits/is_destructible.hpp>
#include <boost/type_traits/is_empty.hpp>
#include <boost/type_traits/is_enum.hpp>
#include <boost/type_traits/is_final.hpp>
#include <boost/type_traits/is_float.hpp>
#include <boost/type_traits/is_floating_point.hpp>
#include <boost/type_traits/is_function.hpp>
#include <boost/type_traits/is_fundamental.hpp>
#include <boost/type_traits/is_integral.hpp>
#include <boost/type_traits/is_lvalue_reference.hpp>
#include <boost/type_traits/is_member_function_pointer.hpp>
#include <boost/type_traits/is_member_object_pointer.hpp>
#include <boost/type_traits/is_member_pointer.hpp>
#include <boost/type_traits/is_nothrow_move_assignable.hpp>
#include <boost/type_traits/is_nothrow_move_constructible.hpp>
#include <boost/type_traits/is_object.hpp>
#include <boost/type_traits/is_pod.hpp>
#include <boost/type_traits/is_pointer.hpp>
#include <boost/type_traits/is_polymorphic.hpp>
#include <boost/type_traits/is_reference.hpp>
#include <boost/type_traits/is_rvalue_reference.hpp>
#include <boost/type_traits/is_same.hpp>
#include <boost/type_traits/is_scalar.hpp>
#include <boost/type_traits/is_signed.hpp>
#include <boost/type_traits/is_stateless.hpp>
#include <boost/type_traits/is_union.hpp>
#include <boost/type_traits/is_unsigned.hpp>
#include <boost/type_traits/is_virtual_base_of.hpp>
#include <boost/type_traits/is_void.hpp>
#include <boost/type_traits/is_volatile.hpp>
#include <boost/type_traits/make_signed.hpp>
#include <boost/type_traits/make_unsigned.hpp>
#include <boost/type_traits/rank.hpp>
#include <boost/type_traits/remove_all_extents.hpp>
#include <boost/type_traits/remove_bounds.hpp>
#include <boost/type_traits/remove_const.hpp>
#include <boost/type_traits/remove_cv.hpp>
#include <boost/type_traits/remove_extent.hpp>
#include <boost/type_traits/remove_pointer.hpp>
#include <boost/type_traits/remove_reference.hpp>
#include <boost/type_traits/remove_volatile.hpp>
#include <boost/type_traits/type_identity.hpp>
#include <boost/type_traits/type_with_alignment.hpp>
#if !(defined(__sgi) && defined(__EDG_VERSION__) && (__EDG_VERSION__ == 238))
#include <boost/type_traits/integral_promotion.hpp>
#include <boost/type_traits/promote.hpp>
#endif
#endif // BOOST_TYPE_TRAITS_HPP

View File

@ -67,8 +67,8 @@ add_headers_only(dbms src/Server)
list (APPEND dbms_sources ${CONFIG_BUILD})
list (APPEND dbms_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp src/Functions/DataTypeTraits.cpp)
list (APPEND dbms_headers src/Functions/IFunction.h src/Functions/FunctionFactory.h src/Functions/DataTypeTraits.h)
list (APPEND dbms_sources src/Functions/IFunction.cpp src/Functions/FunctionFactory.cpp)
list (APPEND dbms_headers src/Functions/IFunction.h src/Functions/FunctionFactory.h)
list (APPEND dbms_sources
src/AggregateFunctions/AggregateFunctionFactory.cpp
@ -98,6 +98,7 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions
list(REMOVE_ITEM dbms_sources
src/Client/Client.cpp
src/Client/Benchmark.cpp
src/Client/PerformanceTest.cpp
src/Storages/StorageCloud.cpp
src/Databases/DatabaseCloud.cpp
src/Common/StringUtils.cpp)

View File

@ -5,6 +5,9 @@ install (FILES config.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client CO
add_library (clickhouse-benchmark Benchmark.cpp)
target_link_libraries (clickhouse-benchmark dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
add_library (clickhouse-performance-test PerformanceTest.cpp)
target_link_libraries (clickhouse-performance-test dbms ${Boost_PROGRAM_OPTIONS_LIBRARY})
if (ENABLE_TESTS)
add_subdirectory (tests)
endif ()

View File

@ -28,8 +28,9 @@ public:
public:
virtual ~IConnectionPool() {}
/** Selects the connection to work. */
virtual Entry get(const Settings * settings = nullptr) = 0;
/// Selects the connection to work.
/// If force_connected is false, the client must manually ensure that returned connection is good.
virtual Entry get(const Settings * settings = nullptr, bool force_connected = true) = 0;
};
using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
@ -77,12 +78,18 @@ public:
{
}
Entry get(const Settings * settings = nullptr) override
Entry get(const Settings * settings = nullptr, bool force_connected = true) override
{
Entry entry;
if (settings)
return Base::get(settings->queue_max_wait_ms.totalMilliseconds());
entry = Base::get(settings->queue_max_wait_ms.totalMilliseconds());
else
return Base::get(-1);
entry = Base::get(-1);
if (force_connected)
entry->forceConnected();
return entry;
}
const std::string & getHost() const

View File

@ -42,7 +42,7 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
}
}
IConnectionPool::Entry ConnectionPoolWithFailover::get(const Settings * settings)
IConnectionPool::Entry ConnectionPoolWithFailover::get(const Settings * settings, bool force_connected)
{
TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
{
@ -131,7 +131,7 @@ ConnectionPoolWithFailover::tryGetEntry(
TryResult result;
try
{
result.entry = pool.get(settings);
result.entry = pool.get(settings, /* force_connected = */ false);
String server_name;
UInt64 server_version_major;

View File

@ -47,7 +47,7 @@ public:
using Entry = IConnectionPool::Entry;
/** Allocates connection to work. */
Entry get(const Settings * settings = nullptr) override; /// From IConnectionPool
Entry get(const Settings * settings = nullptr, bool force_connected = true) override; /// From IConnectionPool
/** Allocates up to the specified number of connections to work.
* Connections provide access to different replicas of one shard.

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,8 @@
#include <common/logger_useful.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <Common/Exception.h>
@ -83,28 +85,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
}
catch (const Exception & e)
{
try
{
std::string text = e.displayText();
bool has_embedded_stack_trace = false;
if (check_embedded_stacktrace)
{
auto embedded_stack_trace_pos = text.find("Stack trace");
has_embedded_stack_trace = embedded_stack_trace_pos != std::string::npos;
if (!with_stacktrace && has_embedded_stack_trace)
{
text.resize(embedded_stack_trace_pos);
Poco::trimRightInPlace(text);
}
}
stream << "Code: " << e.code() << ", e.displayText() = " << text << ", e.what() = " << e.what();
if (with_stacktrace && !has_embedded_stack_trace)
stream << ", Stack trace:\n\n" << e.getStackTrace().toString();
}
catch (...) {}
stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace);
}
catch (const Poco::Exception & e)
{
@ -230,6 +211,36 @@ void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::str
}
}
std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace)
{
std::stringstream stream;
try
{
std::string text = e.displayText();
bool has_embedded_stack_trace = false;
if (check_embedded_stacktrace)
{
auto embedded_stack_trace_pos = text.find("Stack trace");
has_embedded_stack_trace = embedded_stack_trace_pos != std::string::npos;
if (!with_stacktrace && has_embedded_stack_trace)
{
text.resize(embedded_stack_trace_pos);
Poco::trimRightInPlace(text);
}
}
stream << "Code: " << e.code() << ", e.displayText() = " << text << ", e.what() = " << e.what();
if (with_stacktrace && !has_embedded_stack_trace)
stream << ", Stack trace:\n\n" << e.getStackTrace().toString();
}
catch (...) {}
return stream.str();
}
std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace)
{
try
@ -243,4 +254,26 @@ std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace)
}
std::string ExecutionStatus::serializeText() const
{
std::string res;
{
WriteBufferFromString wb(res);
wb << code << "\n" << escape << message;
}
return res;
}
void ExecutionStatus::deserializeText(const std::string & data)
{
ReadBufferFromString rb(data);
rb >> code >> "\n" >> escape >> message;
}
ExecutionStatus ExecutionStatus::fromCurrentException(const std::string & start_of_message)
{
return ExecutionStatus(getCurrentExceptionCode(), start_of_message + ": " + getCurrentExceptionMessage(false, true));
}
}

View File

@ -79,6 +79,7 @@ void throwFromErrno(const std::string & s, int code = 0, int the_errno = errno);
void tryLogCurrentException(const char * log_name, const std::string & start_of_message = "");
void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = "");
/** Prints current exception in canonical format.
* with_stacktrace - prints stack trace for DB::Exception.
* check_embedded_stacktrace - if DB::Exception has embedded stacktrace then
@ -89,9 +90,30 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
/// Returns error code from ErrorCodes
int getCurrentExceptionCode();
/// An execution status of any piece of code, contains return code and optional error
struct ExecutionStatus
{
int code = 0;
std::string message;
ExecutionStatus() = default;
explicit ExecutionStatus(int return_code, const std::string & exception_message = "")
: code(return_code), message(exception_message) {}
static ExecutionStatus fromCurrentException(const std::string & start_of_message = "");
std::string serializeText() const;
void deserializeText(const std::string & data);
};
void tryLogException(std::exception_ptr e, const char * log_name, const std::string & start_of_message = "");
void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::string & start_of_message = "");
std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace = false);
std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace);

View File

@ -4,6 +4,7 @@
#include <cstdlib>
#include <climits>
#include <random>
#include <functional>
#include <common/Types.h>
#include <ext/scope_guard.h>
#include <Core/Types.h>

View File

@ -0,0 +1,125 @@
#include <Common/formatIPv6.h>
#include <Common/hex.h>
#include <ext/range.h>
#include <array>
namespace DB
{
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false)
{
return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry;
}
/// print integer in desired base, faster than sprintf
template <uint32_t base, typename T, uint32_t buffer_size = sizeof(T) * int_log(256, base, false)>
static void print_integer(char *& out, T value)
{
if (value == 0)
*out++ = '0';
else
{
char buf[buffer_size];
auto ptr = buf;
while (value > 0)
{
*ptr++ = hexLowercase(value % base);
value /= base;
}
while (ptr != buf)
*out++ = *--ptr;
}
}
/// print IPv4 address as %u.%u.%u.%u
static void formatIPv4(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
{
const auto limit = IPV4_BINARY_LENGTH - zeroed_tail_bytes_count;
for (const auto i : ext::range(0, IPV4_BINARY_LENGTH))
{
UInt8 byte = (i < limit) ? src[i] : 0;
print_integer<10, UInt8>(dst, byte);
if (i != IPV4_BINARY_LENGTH - 1)
*dst++ = '.';
}
}
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
{
struct { int base, len; } best{-1}, cur{-1};
std::array<uint16_t, IPV6_BINARY_LENGTH / sizeof(uint16_t)> words{};
/** Preprocess:
* Copy the input (bytewise) array into a wordwise array.
* Find the longest run of 0x00's in src[] for :: shorthanding. */
for (const auto i : ext::range(0, IPV6_BINARY_LENGTH - zeroed_tail_bytes_count))
words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
for (const auto i : ext::range(0, words.size()))
{
if (words[i] == 0) {
if (cur.base == -1)
cur.base = i, cur.len = 1;
else
cur.len++;
}
else
{
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
cur.base = -1;
}
}
}
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
}
if (best.base != -1 && best.len < 2)
best.base = -1;
/// Format the result.
for (const int i : ext::range(0, words.size()))
{
/// Are we inside the best run of 0x00's?
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
{
if (i == best.base)
*dst++ = ':';
continue;
}
/// Are we following an initial run of 0x00s or any real hex?
if (i != 0)
*dst++ = ':';
/// Is this address an encapsulated IPv4?
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
{
formatIPv4(src + 12, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(IPV4_BINARY_LENGTH)));
break;
}
print_integer<16>(dst, words[i]);
}
/// Was it a trailing run of 0x00's?
if (best.base != -1 && (best.base + best.len) == words.size())
*dst++ = ':';
*dst++ = '\0';
}
}

View File

@ -0,0 +1,21 @@
#pragma once
#include <common/Types.h>
#define IPV4_BINARY_LENGTH 4
#define IPV6_BINARY_LENGTH 16
#define IPV4_MAX_TEXT_LENGTH 15 /// Does not count tail zero byte.
#define IPV6_MAX_TEXT_LENGTH 39
namespace DB
{
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
* performs significantly faster than the reference implementation due to the absence of sprintf calls,
* bounds checking, unnecessary string copying and length calculation.
*/
void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0);
}

View File

@ -22,6 +22,10 @@ struct ColumnWithTypeAndName
ColumnWithTypeAndName(const ColumnPtr & column_, const DataTypePtr & type_, const String name_)
: column(column_), type(type_), name(name_) {}
/// Uses type->createColumn() to create column
ColumnWithTypeAndName(const DataTypePtr & type_, const String name_)
: column(type_->createColumn()), type(type_), name(name_) {}
ColumnWithTypeAndName cloneEmpty() const;
bool operator==(const ColumnWithTypeAndName & other) const;
String prettyPrint() const;

View File

@ -373,9 +373,10 @@ namespace ErrorCodes
extern const int BAD_CAST = 368;
extern const int ALL_REPLICAS_ARE_STALE = 369;
extern const int DATA_TYPE_CANNOT_BE_USED_IN_TABLES = 370;
extern const int SESSION_NOT_FOUND = 371;
extern const int SESSION_IS_LOCKED = 372;
extern const int INVALID_SESSION_TIMEOUT = 373;
extern const int INCONSISTENT_CLUSTER_DEFINITION = 371;
extern const int SESSION_NOT_FOUND = 372;
extern const int SESSION_IS_LOCKED = 373;
extern const int INVALID_SESSION_TIMEOUT = 374;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -1,6 +1,5 @@
#pragma once
#include <common/MetrikaTypes.h>
#include <common/LocalDate.h>
#include <common/LocalDateTime.h>
#include <Core/Field.h>

View File

@ -25,7 +25,7 @@ struct BlockIO
Block out_sample; /// Example of a block to be written to `out`.
/// Callbacks for query logging could be set here.
std::function<void(IBlockInputStream *, IBlockOutputStream *)> finish_callback;
std::function<void(IBlockInputStream *, IBlockOutputStream *)> finish_callback;
std::function<void()> exception_callback;
/// Call these functions if you want to log the request.
@ -44,18 +44,18 @@ struct BlockIO
BlockIO & operator= (const BlockIO & rhs)
{
/// We provide the correct order of destruction.
out = nullptr;
in = nullptr;
process_list_entry = nullptr;
out = nullptr;
in = nullptr;
process_list_entry = nullptr;
process_list_entry = rhs.process_list_entry;
in = rhs.in;
out = rhs.out;
in_sample = rhs.in_sample;
out_sample = rhs.out_sample;
process_list_entry = rhs.process_list_entry;
in = rhs.in;
out = rhs.out;
in_sample = rhs.in_sample;
out_sample = rhs.out_sample;
finish_callback = rhs.finish_callback;
exception_callback = rhs.exception_callback;
finish_callback = rhs.finish_callback;
exception_callback = rhs.exception_callback;
return *this;
}

View File

@ -15,6 +15,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
PrettyBlockOutputStream::PrettyBlockOutputStream(WriteBuffer & ostr_, bool no_escapes_, size_t max_rows_, const Context & context_)
: ostr(ostr_), max_rows(max_rows_), no_escapes(no_escapes_), context(context_)
{

View File

@ -101,7 +101,7 @@ void DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer & ist
if (*istr.position() != '\\')
{
safeDeserialize(column,
[&istr] { return false; },
[] { return false; },
[this, &istr] (IColumn & nested) { nested_data_type->deserializeTextEscaped(nested, istr); } );
}
else

View File

@ -1,4 +1,4 @@
#include <Functions/DataTypeTraits.h>
#include <DataTypes/DataTypeTraits.h>
namespace DB { namespace DataTypeTraits {

View File

@ -1,6 +1,6 @@
#pragma once
#include <Functions/NumberTraits.h>
#include <DataTypes/NumberTraits.h>
#include <DataTypes/EnrichedDataTypePtr.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>

View File

@ -1,6 +1,9 @@
#include <functional>
#include <sstream>
#include <memory>
#include <Columns/ColumnsNumber.h>
#include <Dictionaries/CacheDictionary.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Common/BitHelpers.h>
#include <Common/randomSeed.h>
#include <Common/HashTable/Hash.h>
@ -8,6 +11,9 @@
#include <Common/ProfilingScopedRWLock.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <DataTypes/DataTypesNumber.h>
#include <Dictionaries/CacheDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
#include <ext/size.h>
#include <ext/range.h>
#include <ext/map.h>
@ -418,11 +424,11 @@ void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8>
void CacheDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
const auto attributes_size = dict_struct.attributes.size();
attributes.reserve(attributes_size);
bytes_allocated += size * sizeof(CellMetadata);
bytes_allocated += size * sizeof(attributes.front());
bytes_allocated += attributes_size * sizeof(attributes.front());
for (const auto & attribute : dict_struct.attributes)
{
@ -957,4 +963,33 @@ CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & a
return attributes[it->second];
}
bool CacheDictionary::isEmptyCell(const UInt64 idx) const
{
return (idx != zero_cell_idx && cells[idx].id == 0) || (cells[idx].data
== ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
}
PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
PaddedPODArray<Key> array;
for (size_t idx = 0; idx < cells.size(); ++idx)
{
auto & cell = cells[idx];
if (!isEmptyCell(idx) && !cells[idx].isDefault())
{
array.push_back(cell.id);
}
}
return array;
}
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
}
}

View File

@ -137,6 +137,8 @@ public:
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = Value[];
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -208,6 +210,10 @@ private:
const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated,
AbsentIdHandler && on_id_not_found) const;
PaddedPODArray<Key> getCachedIds() const;
bool isEmptyCell(const UInt64 idx) const;
UInt64 getCellIdx(const Key id) const;
void setDefaultAttributeValue(Attribute & attribute, const Key idx) const;

View File

@ -1,4 +1,5 @@
#include <Dictionaries/ComplexKeyCacheDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
#include <Common/BitHelpers.h>
#include <Common/randomSeed.h>
#include <Common/Stopwatch.h>
@ -265,7 +266,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
const auto & cell_idx = find_result.cell_idx;
@ -320,11 +321,11 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
void ComplexKeyCacheDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
const auto attributes_size = dict_struct.attributes.size();
attributes.reserve(attributes_size);
bytes_allocated += size * sizeof(CellMetadata);
bytes_allocated += size * sizeof(attributes.front());
bytes_allocated += attributes_size * sizeof(attributes.front());
for (const auto & attribute : dict_struct.attributes)
{
@ -457,7 +458,7 @@ void ComplexKeyCacheDictionary::getItemsNumberImpl(
/// fetch up-to-date values, decide which ones require update
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
@ -536,7 +537,7 @@ void ComplexKeyCacheDictionary::getItemsString(
/// fetch up-to-date values, discard on fail
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
SCOPE_EXIT(temporary_keys_pool.rollback(key.size));
const auto find_result = findCellIdx(key, now);
@ -581,7 +582,7 @@ void ComplexKeyCacheDictionary::getItemsString(
const auto now = std::chrono::system_clock::now();
for (const auto row : ext::range(0, rows_num))
{
const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool);
const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool);
keys_array[row] = key;
const auto find_result = findCellIdx(key, now);
@ -899,7 +900,7 @@ StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns &
if (key_size_is_fixed)
return placeKeysInFixedSizePool(row, key_columns);
return placeKeysInPool(row, key_columns, keys, *keys_pool);
return placeKeysInPool(row, key_columns, keys, *dict_struct.key, *keys_pool);
}
void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
@ -910,28 +911,49 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
keys_pool->free(const_cast<char *>(key.data), key.size);
}
template <typename Arena>
template <typename Pool>
StringRef ComplexKeyCacheDictionary::placeKeysInPool(
const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool)
const size_t row, const Columns & key_columns, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, Pool & pool)
{
const auto keys_size = key_columns.size();
size_t sum_keys_size{};
for (const auto i : ext::range(0, keys_size))
{
keys[i] = key_columns[i]->getDataAtWithTerminatingZero(row);
sum_keys_size += keys[i].size;
}
const auto res = pool.alloc(sum_keys_size);
auto place = res;
for (size_t j = 0; j < keys_size; ++j)
{
memcpy(place, keys[j].data, keys[j].size);
place += keys[j].size;
keys[j] = key_columns[j]->getDataAt(row);
sum_keys_size += keys[j].size;
if (key_attributes[j].underlying_type == AttributeUnderlyingType::String)
sum_keys_size += sizeof(size_t) + 1;
}
return { res, sum_keys_size };
auto place = pool.alloc(sum_keys_size);
auto key_start = place;
for (size_t j = 0; j < keys_size; ++j)
{
if (key_attributes[j].underlying_type == AttributeUnderlyingType::String)
{
auto start = key_start;
auto key_size = keys[j].size + 1;
memcpy(key_start, &key_size, sizeof(size_t));
key_start += sizeof(size_t);
memcpy(key_start, keys[j].data, keys[j].size);
key_start += keys[j].size;
*key_start = '\0';
++key_start;
keys[j].data = start;
keys[j].size += sizeof(size_t) + 1;
}
else
{
memcpy(key_start, keys[j].data, keys[j].size);
keys[j].data = key_start;
key_start += keys[j].size;
}
}
return { place, sum_keys_size };
}
StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(
@ -965,4 +987,26 @@ StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const
return { res, key.size };
}
bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const
{
return (cells[idx].key == StringRef{} && (idx != zero_cell_idx
|| cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t())));
}
BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
std::vector<StringRef> keys;
{
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
for (auto idx : ext::range(0, cells.size()))
if (!isEmptyCell(idx)
&& !cells[idx].isDefault())
keys.push_back(cells[idx].key);
}
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyCacheDictionary, UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, keys, column_names);
}
}

View File

@ -147,6 +147,8 @@ public:
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using MapType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value> using ContainerType = Value[];
@ -233,7 +235,8 @@ private:
template <typename Arena>
static StringRef placeKeysInPool(
const std::size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
const std::size_t row, const Columns & key_columns, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, Arena & pool);
StringRef placeKeysInFixedSizePool(
const std::size_t row, const Columns & key_columns) const;
@ -255,6 +258,8 @@ private:
return findCellIdx(key, now, hash);
};
bool isEmptyCell(const UInt64 idx) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -1,6 +1,7 @@
#include <ext/map.h>
#include <ext/range.h>
#include <Dictionaries/ComplexKeyHashedDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
namespace DB
@ -460,22 +461,22 @@ StringRef ComplexKeyHashedDictionary::placeKeysInPool(
{
const auto keys_size = key_columns.size();
size_t sum_keys_size{};
for (const auto i : ext::range(0, keys_size))
{
keys[i] = key_columns[i]->getDataAtWithTerminatingZero(row);
sum_keys_size += keys[i].size;
}
const auto res = pool.alloc(sum_keys_size);
auto place = res;
const char * block_start = nullptr;
for (size_t j = 0; j < keys_size; ++j)
{
memcpy(place, keys[j].data, keys[j].size);
place += keys[j].size;
keys[j] = key_columns[j]->serializeValueIntoArena(row, pool, block_start);
sum_keys_size += keys[j].size;
}
return { res, sum_keys_size };
auto key_start = block_start;
for (size_t j = 0; j < keys_size; ++j)
{
keys[j].data = key_start;
key_start += keys[j].size;
}
return { block_start, sum_keys_size };
}
template <typename T>
@ -502,4 +503,44 @@ void ComplexKeyHashedDictionary::has(const Attribute & attribute, const Columns
query_count.fetch_add(rows, std::memory_order_relaxed);
}
std::vector<StringRef> ComplexKeyHashedDictionary::getKeys() const
{
const Attribute & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: return getKeys<UInt8>(attribute); break;
case AttributeUnderlyingType::UInt16: return getKeys<UInt16>(attribute); break;
case AttributeUnderlyingType::UInt32: return getKeys<UInt32>(attribute); break;
case AttributeUnderlyingType::UInt64: return getKeys<UInt64>(attribute); break;
case AttributeUnderlyingType::Int8: return getKeys<Int8>(attribute); break;
case AttributeUnderlyingType::Int16: return getKeys<Int16>(attribute); break;
case AttributeUnderlyingType::Int32: return getKeys<Int32>(attribute); break;
case AttributeUnderlyingType::Int64: return getKeys<Int64>(attribute); break;
case AttributeUnderlyingType::Float32: return getKeys<Float32>(attribute); break;
case AttributeUnderlyingType::Float64: return getKeys<Float64>(attribute); break;
case AttributeUnderlyingType::String: return getKeys<StringRef>(attribute); break;
}
return {};
}
template <typename T>
std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & attribute) const
{
const ContainerType<T> & attr = *std::get<ContainerPtrType<T>>(attribute.maps);
std::vector<StringRef> keys;
keys.reserve(attr.size());
for (const auto & key : attr)
keys.push_back(key.first);
return keys;
}
BlockInputStreamPtr ComplexKeyHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<ComplexKeyHashedDictionary, UInt64>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeys(), column_names);
}
}

View File

@ -16,6 +16,7 @@
namespace DB
{
class ComplexKeyHashedDictionary final : public IDictionaryBase
{
public:
@ -125,6 +126,8 @@ public:
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -188,6 +191,11 @@ private:
template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
std::vector<StringRef> getKeys() const;
template <typename T>
std::vector<StringRef> getKeys(const Attribute & attribute) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -0,0 +1,417 @@
#pragma once
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/IColumn.h>
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <ext/range.h>
#include <common/logger_useful.h>
#include <Core/Names.h>
#include <memory>
namespace DB
{
/*
* BlockInputStream implementation for external dictionaries
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
template <class DictionaryType, class Key>
class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
using DictionatyPtr = std::shared_ptr<DictionaryType const>;
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
PaddedPODArray<Key> && ids, const Names & column_names);
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const std::vector<StringRef> & keys, const Names & column_names);
using GetColumnsFunction =
std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute>& attributes)>;
// Used to separate key columns format for storage and view.
// Calls get_key_columns_function to get key column for dictionary get fuction call
// and get_view_columns_function to get key representation.
// Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
DictionaryBlockInputStream(std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const Columns & data_columns, const Names & column_names,
GetColumnsFunction && get_key_columns_function,
GetColumnsFunction && get_view_columns_function);
String getName() const override {
return "DictionaryBlockInputStream";
}
protected:
Block getBlock(size_t start, size_t size) const override;
private:
// pointer types to getXXX functions
// for single key dictionaries
template <class Type>
using DictionaryGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
using DictionaryStringGetter = void (DictionaryType::*)(
const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
// for complex complex key dictionaries
template <class Type>
using GetterByKey = void (DictionaryType::*)(
const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
using StringGetterByKey = void (DictionaryType::*)(
const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
// call getXXX
// for single key dictionaries
template <class Type, class Container>
void callGetter(DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <class Container>
void callGetter(DictionaryStringGetter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
// for complex complex key dictionaries
template <class Type, class Container>
void callGetter(GetterByKey<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <class Container>
void callGetter(StringGetterByKey getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <template <class> class Getter, class StringGetter>
Block fillBlock(const PaddedPODArray<Key> & ids, const Columns & keys,
const DataTypes & types, ColumnsWithTypeAndName && view) const;
template <class AttributeType, class Getter>
ColumnPtr getColumnFromAttribute(Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const;
template <class Getter>
ColumnPtr getColumnFromStringAttribute(Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
ColumnPtr getColumnFromIds(const PaddedPODArray<Key>& ids) const;
void fillKeyColumns(const std::vector<StringRef> & keys, size_t start, size_t size,
const DictionaryStructure& dictionary_structure, ColumnsWithTypeAndName & columns) const;
DictionatyPtr dictionary;
Names column_names;
PaddedPODArray<Key> ids;
ColumnsWithTypeAndName key_columns;
Poco::Logger * logger;
Block (DictionaryBlockInputStream<DictionaryType, Key>::*fillBlockFunction)(
const PaddedPODArray<Key>& ids, const Columns& keys,
const DataTypes & types, ColumnsWithTypeAndName && view) const;
Columns data_columns;
GetColumnsFunction get_key_columns_function;
GetColumnsFunction get_view_columns_function;
};
template <class DictionaryType, class Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
PaddedPODArray<Key> && ids, const Names& column_names)
: DictionaryBlockInputStreamBase(ids.size(), max_block_size),
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)),
column_names(column_names), ids(std::move(ids)),
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryStringGetter>)
{
}
template <class DictionaryType, class Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const std::vector<StringRef> & keys, const Names& column_names)
: DictionaryBlockInputStreamBase(keys.size(), max_block_size),
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names),
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, StringGetterByKey>)
{
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
}
template <class DictionaryType, class Key>
DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
std::shared_ptr<const IDictionaryBase> dictionary, size_t max_block_size,
const Columns & data_columns, const Names & column_names,
GetColumnsFunction && get_key_columns_function,
GetColumnsFunction && get_view_columns_function)
: DictionaryBlockInputStreamBase(data_columns.front()->size(), max_block_size),
dictionary(std::static_pointer_cast<const DictionaryType>(dictionary)), column_names(column_names),
logger(&Poco::Logger::get("DictionaryBlockInputStream")),
fillBlockFunction(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, StringGetterByKey>),
data_columns(data_columns),
get_key_columns_function(get_key_columns_function), get_view_columns_function(get_view_columns_function)
{
}
template <class DictionaryType, class Key>
Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
{
if (!key_columns.empty())
{
Columns columns;
ColumnsWithTypeAndName view_columns;
columns.reserve(key_columns.size());
for (const auto & key_column : key_columns)
{
auto column = key_column.column->cut(start, length);
columns.emplace_back(column);
view_columns.emplace_back(column, key_column.type, key_column.name);
}
return (this->*fillBlockFunction)({}, columns, {}, std::move(view_columns));
}
else if(!ids.empty())
{
PaddedPODArray<Key> block_ids(ids.begin() + start, ids.begin() + start + length);
return (this->*fillBlockFunction)(block_ids, {}, {}, {});
}
else
{
Columns columns;
columns.reserve(data_columns.size());
for (const auto & data_column : data_columns)
columns.push_back(data_column->cut(start, length));
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
const auto & attributes = *dictionaty_structure.key;
ColumnsWithTypeAndName keys_with_type_and_name = get_key_columns_function(columns, attributes);
ColumnsWithTypeAndName view_with_type_and_name = get_view_columns_function(columns, attributes);
DataTypes types;
columns.clear();
for (const auto & key_column : keys_with_type_and_name)
{
columns.push_back(key_column.column);
types.push_back(key_column.type);
}
return (this->*fillBlockFunction)({}, columns, types, std::move(view_with_type_and_name));
}
}
template <class DictionaryType, class Key>
template <class Type, class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryGetter<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, ids, container);
}
template <class DictionaryType, class Key>
template <class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
DictionaryStringGetter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, ids, container);
}
template <class DictionaryType, class Key>
template <class Type, class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
GetterByKey<Type> getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, keys, data_types, container);
}
template <class DictionaryType, class Key>
template <class Container>
void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
StringGetterByKey getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
(dictionary.*getter)(attribute.name, keys, data_types, container);
}
template <class DictionaryType, class Key>
template <template <class> class Getter, class StringGetter>
Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
const PaddedPODArray<Key>& ids, const Columns& keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
{
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
DataTypes data_types = types;
ColumnsWithTypeAndName block_columns;
data_types.reserve(keys.size());
const DictionaryStructure& dictionaty_structure = dictionary->getStructure();
if (data_types.empty() && dictionaty_structure.key)
for (const auto key : *dictionaty_structure.key)
data_types.push_back(key.type);
for (const auto & column : view)
if (names.find(column.name) != names.end())
block_columns.push_back(column);
const DictionaryStructure& structure = dictionary->getStructure();
if (structure.id && names.find(structure.id->name) != names.end())
block_columns.emplace_back(getColumnFromIds(ids), std::make_shared<DataTypeUInt64>(), structure.id->name);
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute& attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
column = getColumnFromAttribute<TYPE, Getter<TYPE>>( \
&DictionaryType::get##TYPE, ids, keys, data_types, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::UInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::UInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::UInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::Int8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::Int16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::Int32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::Int64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::Float32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::Float64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::String:
{
column = getColumnFromStringAttribute<StringGetter>(
&DictionaryType::getString, ids, keys, data_types, attribute, *dictionary);
break;
}
}
block_columns.emplace_back(column, attribute.type, attribute.name);
}
}
return Block(block_columns);
}
template <class DictionaryType, class Key>
template <class AttributeType, class Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute & attribute, const DictionaryType & dictionary) const
{
auto size = ids.size();
if (!keys.empty())
size = keys.front()->size();
auto column_vector = std::make_unique<ColumnVector<AttributeType>>(size);
callGetter(getter, ids, keys, data_types, column_vector->getData(), attribute, dictionary);
return ColumnPtr(std::move(column_vector));
}
template <class DictionaryType, class Key>
template <class Getter>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
Getter getter, const PaddedPODArray<Key> & ids,
const Columns & keys, const DataTypes & data_types,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
{
auto column_string = std::make_shared<ColumnString>();
auto ptr = column_string.get();
callGetter(getter, ids, keys, data_types, ptr, attribute, dictionary);
return column_string;
}
template <class DictionaryType, class Key>
ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key>& ids) const
{
auto column_vector = std::make_shared<ColumnVector<UInt64>>();
column_vector->getData().reserve(ids.size());
for (UInt64 id : ids)
{
column_vector->insert(id);
}
return column_vector;
}
template <class DictionaryType, class Key>
void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
const std::vector<StringRef> & keys, size_t start, size_t size,
const DictionaryStructure& dictionary_structure, ColumnsWithTypeAndName & columns) const
{
for (const DictionaryAttribute & attribute : *dictionary_structure.key)
{
#define ADD_COLUMN(TYPE) columns.push_back( \
ColumnWithTypeAndName(std::make_shared<ColumnVector<TYPE>>(), attribute.type, attribute.name))
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
ADD_COLUMN(UInt8);
break;
case AttributeUnderlyingType::UInt16:
ADD_COLUMN(UInt16);
break;
case AttributeUnderlyingType::UInt32:
ADD_COLUMN(UInt32);
break;
case AttributeUnderlyingType::UInt64:
ADD_COLUMN(UInt64);
break;
case AttributeUnderlyingType::Int8:
ADD_COLUMN(Int8);
break;
case AttributeUnderlyingType::Int16:
ADD_COLUMN(Int16);
break;
case AttributeUnderlyingType::Int32:
ADD_COLUMN(Int32);
break;
case AttributeUnderlyingType::Int64:
ADD_COLUMN(Int64);
break;
case AttributeUnderlyingType::Float32:
ADD_COLUMN(Float32);
break;
case AttributeUnderlyingType::Float64:
ADD_COLUMN(Float64);
break;
case AttributeUnderlyingType::String:
{
columns.push_back(ColumnWithTypeAndName(std::make_shared<ColumnString>(), attribute.type, attribute.name));
break;
}
}
}
for (auto idx : ext::range(start, size))
{
const auto & key = keys[idx];
auto ptr = key.data;
for (const auto & column : columns)
ptr = column.column->deserializeAndInsertFromArena(ptr);
}
}
}

View File

@ -0,0 +1,29 @@
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
namespace DB
{
DictionaryBlockInputStreamBase::DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size)
: rows_count(rows_count), max_block_size(max_block_size), next_row(0)
{
}
String DictionaryBlockInputStreamBase::getID() const
{
std::stringstream ss;
ss << static_cast<const void*>(this);
return ss.str();
}
Block DictionaryBlockInputStreamBase::readImpl()
{
if (next_row == rows_count)
return Block();
size_t block_size = std::min<size_t>(max_block_size, rows_count - next_row);
Block block = getBlock(next_row, block_size);
next_row += block_size;
return block;
}
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <DataStreams/IProfilingBlockInputStream.h>
namespace DB
{
class DictionaryBlockInputStreamBase : public IProfilingBlockInputStream
{
protected:
Block block;
DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size);
String getID() const override;
virtual Block getBlock(size_t start, size_t length) const = 0;
private:
const size_t rows_count;
const size_t max_block_size;
size_t next_row;
Block readImpl() override;
void readPrefixImpl() override { next_row = 0; }
};
}

View File

@ -1,5 +1,5 @@
#include <Dictionaries/FlatDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
namespace DB
{
@ -524,4 +524,26 @@ void FlatDictionary::has(const Attribute & attribute, const PaddedPODArray<Key>
query_count.fetch_add(ids_count, std::memory_order_relaxed);
}
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
{
const auto ids_count = ext::size(loaded_ids);
PaddedPODArray<Key> ids;
for (auto idx : ext::range(0, ids_count))
{
if (loaded_ids[idx]) {
ids.push_back(idx);
}
}
return ids;
}
BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds() ,column_names);
}
}

View File

@ -125,6 +125,8 @@ public:
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = PaddedPODArray<Value>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -191,6 +193,8 @@ private:
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const;
PaddedPODArray<Key> getIds() const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -1,6 +1,6 @@
#include <ext/size.h>
#include <Dictionaries/HashedDictionary.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
namespace DB
{
@ -479,4 +479,44 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename T>
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute & attribute) const
{
const HashMap<UInt64, T> & attr = *std::get<CollectionPtrType<T>>(attribute.maps);
PaddedPODArray<Key> ids;
ids.reserve(attr.size());
for (const auto & value : attr) {
ids.push_back(value.first);
}
return ids;
}
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
{
const auto & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: return getIds<UInt8>(attribute); break;
case AttributeUnderlyingType::UInt16: return getIds<UInt16>(attribute); break;
case AttributeUnderlyingType::UInt32: return getIds<UInt32>(attribute); break;
case AttributeUnderlyingType::UInt64: return getIds<UInt64>(attribute); break;
case AttributeUnderlyingType::Int8: return getIds<Int8>(attribute); break;
case AttributeUnderlyingType::Int16: return getIds<Int16>(attribute); break;
case AttributeUnderlyingType::Int32: return getIds<Int32>(attribute); break;
case AttributeUnderlyingType::Int64: return getIds<Int64>(attribute); break;
case AttributeUnderlyingType::Float32: return getIds<Float32>(attribute); break;
case AttributeUnderlyingType::Float64: return getIds<Float64>(attribute); break;
case AttributeUnderlyingType::String: return getIds<StringRef>(attribute); break;
}
return PaddedPODArray<Key>();
}
BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
}
}

View File

@ -123,6 +123,8 @@ public:
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using CollectionType = HashMap<UInt64, Value>;
template <typename Value> using CollectionPtrType = std::unique_ptr<CollectionType<Value>>;
@ -181,6 +183,11 @@ private:
template <typename T>
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
template <typename T>
PaddedPODArray<Key> getIds(const Attribute & attribute) const;
PaddedPODArray<Key> getIds() const;
template <typename ChildType, typename AncestorType>
void isInImpl(
const ChildType & child_ids,

View File

@ -2,6 +2,7 @@
#include <Core/Field.h>
#include <Core/StringRef.h>
#include <Core/Names.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Common/PODArray.h>
#include <memory>
@ -19,8 +20,11 @@ struct DictionaryLifetime;
struct DictionaryStructure;
class ColumnString;
class IBlockInputStream;
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
struct IDictionaryBase
struct IDictionaryBase : public std::enable_shared_from_this<IDictionaryBase>
{
using Key = UInt64;
@ -53,6 +57,8 @@ struct IDictionaryBase
virtual bool isInjective(const std::string & attribute_name) const = 0;
virtual BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const = 0;
virtual ~IDictionaryBase() = default;
};

View File

@ -0,0 +1,214 @@
#pragma once
#include <Columns/ColumnVector.h>
#include <Columns/ColumnString.h>
#include <Columns/IColumn.h>
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <Dictionaries/DictionaryBlockInputStreamBase.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Dictionaries/IDictionary.h>
#include <ext/range.h>
namespace DB
{
/*
* BlockInputStream implementation for external dictionaries
* read() returns single block consisting of the in-memory contents of the dictionaries
*/
template <class DictionaryType, class Key>
class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
{
public:
using DictionatyPtr = std::shared_ptr<DictionaryType const>;
RangeDictionaryBlockInputStream(
DictionatyPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray<Key> && ids,
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates);
String getName() const override {
return "RangeDictionaryBlockInputStream";
}
protected:
Block getBlock(size_t start, size_t length) const override;
private:
template <class Type>
using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &,
const PaddedPODArray<UInt16> &, PaddedPODArray<Type> &) const;
template <class AttributeType>
ColumnPtr getColumnFromAttribute(DictionaryGetter<AttributeType> getter,
const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
ColumnPtr getColumnFromAttributeString(const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const;
template <class T>
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T>& array) const;
template <class T>
void addSpecialColumn(
const std::experimental::optional<DictionarySpecialAttribute>& attribute, DataTypePtr type,
const std::string & default_name, const std::unordered_set<std::string> & column_names,
const PaddedPODArray<T> & values, ColumnsWithTypeAndName& columns) const;
Block fillBlock(const PaddedPODArray<Key> & ids,
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const;
DictionatyPtr dictionary;
Names column_names;
PaddedPODArray<Key> ids;
PaddedPODArray<UInt16> start_dates;
PaddedPODArray<UInt16> end_dates;
};
template <class DictionaryType, class Key>
RangeDictionaryBlockInputStream<DictionaryType, Key>::RangeDictionaryBlockInputStream(
DictionatyPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray<Key> && ids,
PaddedPODArray<UInt16> && start_dates, PaddedPODArray<UInt16> && end_dates)
: DictionaryBlockInputStreamBase(ids.size(), max_column_size),
dictionary(dictionary), column_names(column_names),
ids(std::move(ids)), start_dates(std::move(start_dates)), end_dates(std::move(end_dates))
{
}
template <class DictionaryType, class Key>
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
{
PaddedPODArray<Key> block_ids;
PaddedPODArray<UInt16> block_start_dates;
PaddedPODArray<UInt16> block_end_dates;
block_ids.reserve(length);
block_start_dates.reserve(length);
block_end_dates.reserve(length);
for (auto idx : ext::range(start, start + length))
{
block_ids.push_back(ids[idx]);
block_start_dates.push_back(block_start_dates[idx]);
block_end_dates.push_back(block_end_dates[idx]);
}
return fillBlock(block_ids, block_start_dates, block_end_dates);
}
template <class DictionaryType, class Key>
template <class AttributeType>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
DictionaryGetter<AttributeType> getter, const PaddedPODArray<Key>& ids,
const PaddedPODArray<UInt16> & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
{
auto column_vector = std::make_unique<ColumnVector<AttributeType>>(ids.size());
(dictionary.*getter)(attribute.name, ids, dates, column_vector->getData());
return ColumnPtr(std::move(column_vector));
}
template <class DictionaryType, class Key>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttributeString(
const PaddedPODArray<Key>& ids, const PaddedPODArray<UInt16> & dates,
const DictionaryAttribute& attribute, const DictionaryType& dictionary) const
{
auto column_string = std::make_unique<ColumnString>();
dictionary.getString(attribute.name, ids, dates, column_string.get());
return ColumnPtr(std::move(column_string));
}
template <class DictionaryType, class Key>
template <class T>
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, Key>::getColumnFromPODArray(const PaddedPODArray<T>& array) const
{
auto column_vector = std::make_unique<ColumnVector<T>>();
column_vector->getData().reserve(array.size());
for (T value : array)
{
column_vector->insert(value);
}
return ColumnPtr(std::move(column_vector));
}
template <class DictionaryType, class Key>
template <class T>
void RangeDictionaryBlockInputStream<DictionaryType, Key>::addSpecialColumn(
const std::experimental::optional<DictionarySpecialAttribute> & attribute, DataTypePtr type,
const std::string& default_name, const std::unordered_set<std::string> & column_names,
const PaddedPODArray<T> & values, ColumnsWithTypeAndName & columns) const
{
std::string name = default_name;
if (attribute) {
name = attribute->name;
}
if (column_names.find(name) != column_names.end()) {
columns.emplace_back(getColumnFromPODArray(values), type, name);
}
}
template <class DictionaryType, class Key>
Block RangeDictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
const PaddedPODArray<Key>& ids,
const PaddedPODArray<UInt16> & start_dates, const PaddedPODArray<UInt16> & end_dates) const
{
ColumnsWithTypeAndName columns;
const DictionaryStructure& structure = dictionary->getStructure();
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids, columns);
addSpecialColumn(structure.range_min, std::make_shared<DataTypeDate>(), "Range Start", names, start_dates, columns);
addSpecialColumn(structure.range_max, std::make_shared<DataTypeDate>(), "Range End", names, end_dates, columns);
for (const auto idx : ext::range(0, structure.attributes.size()))
{
const DictionaryAttribute& attribute = structure.attributes[idx];
if (names.find(attribute.name) != names.end())
{
ColumnPtr column;
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE)\
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids, start_dates, attribute, *dictionary)
switch (attribute.underlying_type)
{
case AttributeUnderlyingType::UInt8:
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
break;
case AttributeUnderlyingType::UInt16:
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
break;
case AttributeUnderlyingType::UInt32:
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
break;
case AttributeUnderlyingType::UInt64:
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
break;
case AttributeUnderlyingType::Int8:
GET_COLUMN_FORM_ATTRIBUTE(Int8);
break;
case AttributeUnderlyingType::Int16:
GET_COLUMN_FORM_ATTRIBUTE(Int16);
break;
case AttributeUnderlyingType::Int32:
GET_COLUMN_FORM_ATTRIBUTE(Int32);
break;
case AttributeUnderlyingType::Int64:
GET_COLUMN_FORM_ATTRIBUTE(Int64);
break;
case AttributeUnderlyingType::Float32:
GET_COLUMN_FORM_ATTRIBUTE(Float32);
break;
case AttributeUnderlyingType::Float64:
GET_COLUMN_FORM_ATTRIBUTE(Float64);
break;
case AttributeUnderlyingType::String:
column = getColumnFromAttributeString(ids, start_dates, attribute, *dictionary);
break;
}
columns.emplace_back(column, attribute.type, attribute.name);
}
}
return Block(columns);
}
}

View File

@ -1,4 +1,5 @@
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/RangeDictionaryBlockInputStream.h>
namespace DB
@ -353,4 +354,59 @@ const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttributeWith
return attribute;
}
void RangeHashedDictionary::getIdsAndDates(PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const
{
const auto & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::UInt16: getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::UInt32: getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::UInt64: getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int8: getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int16: getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int32: getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Int64: getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Float32: getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::Float64: getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates); break;
case AttributeUnderlyingType::String: getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates); break;
}
}
template <typename T>
void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const
{
const HashMap<UInt64, Values<T>> & attr = *std::get<Ptr<T>>(attribute.maps);
ids.reserve(attr.size());
start_dates.reserve(attr.size());
end_dates.reserve(attr.size());
for (const auto & key : attr) {
ids.push_back(key.first);
for (const auto & value : key.second)
{
start_dates.push_back(value.range.first);
end_dates.push_back(value.range.second);
}
}
}
BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
PaddedPODArray<Key> ids;
PaddedPODArray<UInt16> start_dates;
PaddedPODArray<UInt16> end_dates;
getIdsAndDates(ids, start_dates, end_dates);
using BlockInputStreamType = RangeDictionaryBlockInputStream<RangeHashedDictionary, Key>;
auto dict_ptr = std::static_pointer_cast<const RangeHashedDictionary>(shared_from_this());
return std::make_shared<BlockInputStreamType>(
dict_ptr, max_block_size, column_names, std::move(ids), std::move(start_dates), std::move(end_dates));
}
}

View File

@ -79,6 +79,8 @@ public:
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<UInt16> & dates,
ColumnString * out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
struct Range : std::pair<UInt16, UInt16>
{
@ -166,6 +168,13 @@ private:
const Attribute & getAttributeWithType(const std::string & name, const AttributeUnderlyingType type) const;
void getIdsAndDates(PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const;
template <typename T>
void getIdsAndDates(const Attribute & attribute, PaddedPODArray<Key> & ids,
PaddedPODArray<UInt16> & start_dates, PaddedPODArray<UInt16> & end_dates) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;

View File

@ -1,10 +1,19 @@
#include <stack>
#include <ext/map.h>
#include <ext/range.h>
#include <Poco/Net/IPAddress.h>
#include <Poco/ByteOrder.h>
#include <Dictionaries/TrieDictionary.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnFixedString.h>
#include <Dictionaries/DictionaryBlockInputStream.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeString.h>
#include <IO/WriteIntText.h>
#include <Common/formatIPv6.h>
#include <iostream>
namespace DB
{
@ -20,7 +29,7 @@ TrieDictionary::TrieDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime, bool require_nonempty)
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime),
require_nonempty(require_nonempty)
require_nonempty(require_nonempty), logger(&Poco::Logger::get("TrieDictionary"))
{
createAttributes();
trie = btrie_create();
@ -425,7 +434,7 @@ void TrieDictionary::getItemsImpl(
auto addr = first_column->getDataAt(i);
if (addr.size != 16)
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
set_value(i, slot != BTRIE_NULL ? vec[slot] : get_default(i));
}
@ -536,12 +545,101 @@ void TrieDictionary::has(const Attribute & attribute, const Columns & key_column
auto addr = first_column->getDataAt(i);
if (unlikely(addr.size != 16))
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
out[i] = (slot != BTRIE_NULL);
}
}
query_count.fetch_add(rows, std::memory_order_relaxed);}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename Getter, typename KeyType>
void TrieDictionary::trieTraverse(const btrie_t * tree, Getter && getter) const
{
KeyType key = 0;
const KeyType high_bit = ~((~key) >> 1);
btrie_node_t * node;
node = tree->root;
std::stack<btrie_node_t *> stack;
while (node)
{
stack.push(node);
node = node->left;
}
auto getBit = [&high_bit](size_t size) { return size ? (high_bit >> (size - 1)) : 0; };
while (!stack.empty())
{
node = stack.top();
stack.pop();
if (node && node->value != BTRIE_NULL)
getter(key, stack.size());
if (node && node->right)
{
stack.push(NULL);
key |= getBit(stack.size());
stack.push(node->right);
while (stack.top()->left)
stack.push(stack.top()->left);
}
else
key &= ~getBit(stack.size());
}
}
Columns TrieDictionary::getKeyColumns() const
{
auto ip_column = std::make_shared<ColumnFixedString>(IPV6_BINARY_LENGTH);
auto mask_column = std::make_shared<ColumnVector<UInt8>>();
auto getter = [& ip_column, & mask_column](__uint128_t ip, size_t mask) {
UInt64 * ip_array = reinterpret_cast<UInt64 *>(&ip);
ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]);
ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]);
std::swap(ip_array[0], ip_array[1]);
ip_column->insertData(reinterpret_cast<const char *>(ip_array), IPV6_BINARY_LENGTH);
mask_column->insert(static_cast<UInt8>(mask));
};
trieTraverse<decltype(getter), __uint128_t>(trie, std::move(getter));
return {ip_column, mask_column};
}
BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
{
using BlockInputStreamType = DictionaryBlockInputStream<TrieDictionary, UInt64>;
auto getKeys = [](const Columns& columns, const std::vector<DictionaryAttribute>& attributes)
{
const auto & attr = attributes.front();
return ColumnsWithTypeAndName({ColumnWithTypeAndName(columns.front(),
std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)});
};
auto getView = [](const Columns& columns, const std::vector<DictionaryAttribute>& attributes)
{
auto column = std::make_shared<ColumnString>();
auto ip_column = std::static_pointer_cast<ColumnFixedString>(columns.front());
auto mask_column = std::static_pointer_cast<ColumnVector<UInt8>>(columns.back());
char buffer[48];
for (size_t row : ext::range(0, ip_column->size()))
{
UInt8 mask = mask_column->getElement(row);
char * ptr = buffer;
formatIPv6(reinterpret_cast<const unsigned char *>(ip_column->getDataAt(row).data), ptr);
*(ptr - 1) = '/';
auto size = detail::writeUIntText(mask, ptr);
column->insertData(buffer, size + (ptr - buffer));
}
return ColumnsWithTypeAndName{ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), attributes.front().name)};
};
return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getKeyColumns(), column_names,
std::move(getKeys), std::move(getView));
}
}

View File

@ -12,7 +12,7 @@
#include <atomic>
#include <memory>
#include <tuple>
#include <common/logger_useful.h>
namespace DB
{
@ -128,6 +128,8 @@ public:
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = std::vector<Value>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
@ -190,6 +192,11 @@ private:
template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
template <typename Getter, typename KeyType>
void trieTraverse(const btrie_t * trie, Getter && getter) const;
Columns getKeyColumns() const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
@ -210,6 +217,8 @@ private:
std::chrono::time_point<std::chrono::system_clock> creation_time;
std::exception_ptr creation_exception;
Logger * logger;
};

View File

@ -1,7 +1,7 @@
#include <Functions/Conditional/ArgsInfo.h>
#include <Functions/Conditional/CondException.h>
#include <Functions/Conditional/common.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/DataTypeTraits.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>

View File

@ -9,7 +9,7 @@
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Common/memcpySmall.h>
#include <Functions/NumberTraits.h>
#include <DataTypes/NumberTraits.h>
namespace DB

View File

@ -4,7 +4,7 @@
#include <Functions/Conditional/common.h>
#include <Functions/Conditional/NullMapBuilder.h>
#include <Functions/Conditional/CondSource.h>
#include <Functions/NumberTraits.h>
#include <DataTypes/NumberTraits.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnConst.h>

View File

@ -4,8 +4,8 @@
#include <Functions/Conditional/ArgsInfo.h>
#include <Functions/Conditional/NumericEvaluator.h>
#include <Functions/Conditional/ArrayEvaluator.h>
#include <Functions/NumberTraits.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/NumberTraits.h>
#include <DataTypes/DataTypeTraits.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <Columns/ColumnVector.h>

View File

@ -1,7 +1,7 @@
#include <Functions/Conditional/getArrayType.h>
#include <Functions/Conditional/CondException.h>
#include <Functions/Conditional/common.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/DataTypeTraits.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>

View File

@ -36,14 +36,14 @@ public:
FunctionFactory();
FunctionPtr get(const std::string & name, const Context & context) const; /// Throws an exception if not found.
FunctionPtr tryGet(const std::string & name, const Context & context) const; /// Returns nullptr if not found.
FunctionPtr tryGet(const std::string & name, const Context & context) const; /// Returns nullptr if not found.
/// No locking, you must register all functions before usage of get, tryGet.
template <typename Function> void registerFunction()
{
static_assert(std::is_same<decltype(&Function::create), Creator>::value, "Function::create has incorrect type");
if (!functions.emplace(Function::name, &Function::create).second)
if (!functions.emplace(std::string(Function::name), &Function::create).second)
throw Exception("FunctionFactory: the function name '" + std::string(Function::name) + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
}

View File

@ -6,7 +6,7 @@
#include <Columns/ColumnVector.h>
#include <Columns/ColumnConst.h>
#include <Functions/IFunction.h>
#include <Functions/NumberTraits.h>
#include <DataTypes/NumberTraits.h>
#include <Core/AccurateComparison.h>
#include <Core/FieldVisitors.h>
@ -17,6 +17,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_DIVISION;
extern const int ILLEGAL_COLUMN;
}

View File

@ -384,13 +384,13 @@ public:
void update(size_t from)
{
if (index >= size)
throw Exception{"Logical error: index passes to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
throw Exception{"Logical error: index passed to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
bool is_null;
if (src_nullable_col != nullptr)
is_null = src_nullable_col->isNullAt(from);
else
is_null = (*src_array)[from].isNull();
is_null = from < src_array->size() ? (*src_array)[from].isNull() : true;
auto & null_map_data = static_cast<ColumnUInt8 &>(*sink_null_map).getData();
null_map_data[index] = is_null ? 1 : 0;
@ -401,7 +401,7 @@ public:
void update()
{
if (index >= size)
throw Exception{"Logical error: index passes to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
throw Exception{"Logical error: index passed to NullMapBuilder is out of range of column.", ErrorCodes::LOGICAL_ERROR};
auto & null_map_data = static_cast<ColumnUInt8 &>(*sink_null_map).getData();
null_map_data[index] = 0;
@ -906,7 +906,8 @@ bool FunctionArrayElement::executeConstConst(Block & block, const ColumnNumbers
Field value;
if (real_index < array_size)
value = array.at(real_index);
else
if (value.isNull())
value = block.getByPosition(result).type->getDefault();
block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn(
@ -1161,14 +1162,14 @@ void FunctionArrayElement::perform(Block & block, const ColumnNumbers & argument
}
else if (!block.safeGetByPosition(arguments[1]).column->isConst())
{
if (!( executeArgument<UInt8> (block, arguments, result, builder)
|| executeArgument<UInt16> (block, arguments, result, builder)
|| executeArgument<UInt32> (block, arguments, result, builder)
|| executeArgument<UInt64> (block, arguments, result, builder)
if (!( executeArgument<UInt8> (block, arguments, result, builder)
|| executeArgument<UInt16> (block, arguments, result, builder)
|| executeArgument<UInt32> (block, arguments, result, builder)
|| executeArgument<UInt64> (block, arguments, result, builder)
|| executeArgument<Int8> (block, arguments, result, builder)
|| executeArgument<Int16> (block, arguments, result, builder)
|| executeArgument<Int32> (block, arguments, result, builder)
|| executeArgument<Int64> (block, arguments, result, builder)))
|| executeArgument<Int16> (block, arguments, result, builder)
|| executeArgument<Int32> (block, arguments, result, builder)
|| executeArgument<Int64> (block, arguments, result, builder)))
throw Exception("Second argument for function " + getName() + " must must have UInt or Int type.",
ErrorCodes::ILLEGAL_COLUMN);
}
@ -1182,19 +1183,19 @@ void FunctionArrayElement::perform(Block & block, const ColumnNumbers & argument
if (index == UInt64(0))
throw Exception("Array indices is 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX);
if (!( executeNumberConst<UInt8> (block, arguments, result, index, builder)
|| executeNumberConst<UInt16> (block, arguments, result, index, builder)
|| executeNumberConst<UInt32> (block, arguments, result, index, builder)
|| executeNumberConst<UInt64> (block, arguments, result, index, builder)
if (!( executeNumberConst<UInt8> (block, arguments, result, index, builder)
|| executeNumberConst<UInt16> (block, arguments, result, index, builder)
|| executeNumberConst<UInt32> (block, arguments, result, index, builder)
|| executeNumberConst<UInt64> (block, arguments, result, index, builder)
|| executeNumberConst<Int8> (block, arguments, result, index, builder)
|| executeNumberConst<Int16> (block, arguments, result, index, builder)
|| executeNumberConst<Int32> (block, arguments, result, index, builder)
|| executeNumberConst<Int64> (block, arguments, result, index, builder)
|| executeNumberConst<Float32> (block, arguments, result, index, builder)
|| executeNumberConst<Float64> (block, arguments, result, index, builder)
|| executeConstConst (block, arguments, result, index, builder)
|| executeStringConst (block, arguments, result, index, builder)
|| executeGenericConst (block, arguments, result, index, builder)))
|| executeNumberConst<Int16> (block, arguments, result, index, builder)
|| executeNumberConst<Int32> (block, arguments, result, index, builder)
|| executeNumberConst<Int64> (block, arguments, result, index, builder)
|| executeNumberConst<Float32> (block, arguments, result, index, builder)
|| executeNumberConst<Float64> (block, arguments, result, index, builder)
|| executeConstConst (block, arguments, result, index, builder)
|| executeStringConst (block, arguments, result, index, builder)
|| executeGenericConst (block, arguments, result, index, builder)))
throw Exception("Illegal column " + block.safeGetByPosition(arguments[0]).column->getName()
+ " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
@ -2354,11 +2355,13 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
auto & out_offsets = out->getOffsets();
IColumn::Offset_t offset{};
for (const auto i : ext::range(0, in->size()))
for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx)
{
std::copy(ext::make_range_iterator(T{}), ext::make_range_iterator(in_data[i]), &out_data[offset]);
offset += in_data[i];
out_offsets[i] = offset;
for (size_t elem_idx = 0, elems = in_data[row_idx]; elem_idx < elems; ++elem_idx)
out_data[offset + elem_idx] = elem_idx;
offset += in_data[row_idx];
out_offsets[row_idx] = offset;
}
return true;
@ -2369,16 +2372,14 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
if ((in_data != 0) && (in->size() > (std::numeric_limits<std::size_t>::max() / in_data)))
throw Exception{
"A call to function " + getName() + " overflows, investigate the values of arguments you are passing",
ErrorCodes::ARGUMENT_OUT_OF_BOUND
};
ErrorCodes::ARGUMENT_OUT_OF_BOUND};
const std::size_t total_values = in->size() * in_data;
if (total_values > max_elements)
throw Exception{
"A call to function " + getName() + " would produce " + std::to_string(total_values) +
" array elements, which is greater than the allowed maximum of " + std::to_string(max_elements),
ErrorCodes::ARGUMENT_OUT_OF_BOUND
};
ErrorCodes::ARGUMENT_OUT_OF_BOUND};
const auto data_col = std::make_shared<ColumnVector<T>>(total_values);
const auto out = std::make_shared<ColumnArray>(
@ -2390,11 +2391,13 @@ bool FunctionRange::executeInternal(Block & block, const IColumn * const arg, co
auto & out_offsets = out->getOffsets();
IColumn::Offset_t offset{};
for (const auto i : ext::range(0, in->size()))
for (size_t row_idx = 0, rows = in->size(); row_idx < rows; ++row_idx)
{
std::copy(ext::make_range_iterator(T{}), ext::make_range_iterator(in_data), &out_data[offset]);
for (size_t elem_idx = 0, elems = in_data; elem_idx < elems; ++elem_idx)
out_data[offset + elem_idx] = elem_idx;
offset += in_data;
out_offsets[i] = offset;
out_offsets[row_idx] = offset;
}
return true;

View File

@ -14,7 +14,7 @@
#include <Columns/ColumnNullable.h>
#include <Functions/IFunction.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/DataTypeTraits.h>
#include <Functions/ObjectPool.h>
#include <Common/StringUtils.h>

View File

@ -22,6 +22,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int CANNOT_CREATE_CHARSET_CONVERTER;
extern const int CANNOT_CONVERT_CHARSET;
extern const int ILLEGAL_COLUMN;
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/hex.h>
#include <Common/formatIPv6.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <DataTypes/DataTypesNumber.h>
@ -17,6 +18,7 @@
#include <Functions/IFunction.h>
#include <arpa/inet.h>
#include <ext/range.h>
#include <array>
@ -24,6 +26,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION;
}
/** Encoding functions:
*
* IPv4NumToString (num) - See below.
@ -40,133 +48,10 @@ namespace DB
*/
const auto ipv4_bytes_length = 4;
const auto ipv6_bytes_length = 16;
const auto uuid_bytes_length = 16;
const auto uuid_text_length = 36;
class IPv6Format
{
private:
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
static constexpr uint32_t int_log(const uint32_t value, const uint32_t base, const bool carry = false)
{
return value >= base ? 1 + int_log(value / base, base, value % base || carry) : value % base > 1 || carry;
}
/// print integer in desired base, faster than sprintf
template <uint32_t base, typename T, uint32_t buffer_size = sizeof(T) * int_log(256, base, false)>
static void print_integer(char *& out, T value)
{
if (value == 0)
*out++ = '0';
else
{
char buf[buffer_size];
auto ptr = buf;
while (value > 0)
{
*ptr++ = hexLowercase(value % base);
value /= base;
}
while (ptr != buf)
*out++ = *--ptr;
}
}
/// print IPv4 address as %u.%u.%u.%u
static void ipv4_format(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count)
{
const auto limit = ipv4_bytes_length - zeroed_tail_bytes_count;
for (const auto i : ext::range(0, ipv4_bytes_length))
{
UInt8 byte = (i < limit) ? src[i] : 0;
print_integer<10, UInt8>(dst, byte);
if (i != ipv4_bytes_length - 1)
*dst++ = '.';
}
}
public:
/** rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
* performs significantly faster than the reference implementation due to the absence of sprintf calls,
* bounds checking, unnecessary string copying and length calculation
*/
static const void apply(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0)
{
struct { int base, len; } best{-1}, cur{-1};
std::array<uint16_t, ipv6_bytes_length / sizeof(uint16_t)> words{};
/** Preprocess:
* Copy the input (bytewise) array into a wordwise array.
* Find the longest run of 0x00's in src[] for :: shorthanding. */
for (const auto i : ext::range(0, ipv6_bytes_length - zeroed_tail_bytes_count))
words[i / 2] |= src[i] << ((1 - (i % 2)) << 3);
for (const auto i : ext::range(0, words.size()))
{
if (words[i] == 0) {
if (cur.base == -1)
cur.base = i, cur.len = 1;
else
cur.len++;
}
else
{
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
cur.base = -1;
}
}
}
if (cur.base != -1)
{
if (best.base == -1 || cur.len > best.len)
best = cur;
}
if (best.base != -1 && best.len < 2)
best.base = -1;
/// Format the result.
for (const int i : ext::range(0, words.size()))
{
/// Are we inside the best run of 0x00's?
if (best.base != -1 && i >= best.base && i < (best.base + best.len))
{
if (i == best.base)
*dst++ = ':';
continue;
}
/// Are we following an initial run of 0x00s or any real hex?
if (i != 0)
*dst++ = ':';
/// Is this address an encapsulated IPv4?
if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu)))
{
ipv4_format(src + 12, dst, std::min(zeroed_tail_bytes_count, static_cast<UInt8>(ipv4_bytes_length)));
break;
}
print_integer<16>(dst, words[i]);
}
/// Was it a trailing run of 0x00's?
if (best.base != -1 && (best.base + best.len) == words.size())
*dst++ = ':';
*dst++ = '\0';
}
};
constexpr auto ipv4_bytes_length = 4;
constexpr auto ipv6_bytes_length = 16;
constexpr auto uuid_bytes_length = 16;
constexpr auto uuid_text_length = 36;
class FunctionIPv6NumToString : public IFunction
@ -214,7 +99,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(size * INET6_ADDRSTRLEN);
vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
offsets_res.resize(size);
auto begin = reinterpret_cast<char *>(&vec_res[0]);
@ -222,7 +107,7 @@ public:
for (size_t offset = 0, i = 0; offset < vec_in.size(); offset += ipv6_bytes_length, ++i)
{
IPv6Format::apply(&vec_in[offset], pos);
formatIPv6(&vec_in[offset], pos);
offsets_res[i] = pos - begin;
}
@ -240,9 +125,9 @@ public:
const auto & data_in = col_in->getData();
char buf[INET6_ADDRSTRLEN];
char buf[IPV6_MAX_TEXT_LENGTH + 1];
char * dst = buf;
IPv6Format::apply(reinterpret_cast<const unsigned char *>(data_in.data()), dst);
formatIPv6(reinterpret_cast<const unsigned char *>(data_in.data()), dst);
block.safeGetByPosition(result).column = std::make_shared<ColumnConstString>(col_in->size(), buf);
}
@ -337,7 +222,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(size * INET6_ADDRSTRLEN);
vec_res.resize(size * (IPV6_MAX_TEXT_LENGTH + 1));
offsets_res.resize(size);
auto begin = reinterpret_cast<char *>(&vec_res[0]);
@ -389,7 +274,7 @@ public:
const auto & data_in = col_in->getData();
char buf[INET6_ADDRSTRLEN];
char buf[IPV6_MAX_TEXT_LENGTH + 1];
char * dst = buf;
const auto address = reinterpret_cast<const unsigned char *>(data_in.data());
@ -413,7 +298,7 @@ private:
void cutAddress(const unsigned char * address, char *& dst, UInt8 zeroed_tail_bytes_count)
{
IPv6Format::apply(address, dst, zeroed_tail_bytes_count);
formatIPv6(address, dst, zeroed_tail_bytes_count);
}
};
@ -703,7 +588,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// the longest value is: 255.255.255.255\0
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(vec_in.size());
char * begin = reinterpret_cast<char *>(&vec_res[0]);
char * pos = begin;
@ -886,7 +771,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// the longest value is: 255.255.255.255\0
vec_res.resize(vec_in.size() * (IPV4_MAX_TEXT_LENGTH + 1)); /// the longest value is: 255.255.255.255\0
offsets_res.resize(vec_in.size());
char * begin = reinterpret_cast<char *>(&vec_res[0]);
char * pos = begin;

View File

@ -13,8 +13,8 @@
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnNullable.h>
#include <Functions/IFunction.h>
#include <Functions/NumberTraits.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/NumberTraits.h>
#include <DataTypes/DataTypeTraits.h>
namespace DB
{

View File

@ -23,12 +23,17 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Functions for working with date and time.
*
* toYear, toMonth, toDayOfMonth, toDayOfWeek, toHour, toMinute, toSecond,
* toMonday, toStartOfMonth, toStartOfYear, toStartOfMinute, toStartOfFiveMinute
* toStartOfHour, toTime,
* now
* now, today, yesterday
* TODO: makeDate, makeDateTime
*
* (toDate - located in FunctionConversion.h file)

View File

@ -31,6 +31,7 @@ namespace ErrorCodes
{
extern const int DICTIONARIES_WAS_NOT_LOADED;
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Functions using Yandex.Metrica dictionaries

View File

@ -36,6 +36,7 @@ namespace ErrorCodes
extern const int DICTIONARIES_WAS_NOT_LOADED;
extern const int UNSUPPORTED_METHOD;
extern const int UNKNOWN_TYPE;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Functions that use plug-ins (external) dictionaries.

View File

@ -13,6 +13,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Function for an unusual conversion to a string:
*
* bitmaskToList - takes an integer - a bitmask, returns a string of degrees of 2 separated by a comma.
@ -63,8 +69,8 @@ public:
|| executeType<Int32>(block, arguments, result)
|| executeType<Int64>(block, arguments, result)))
throw Exception("Illegal column " + block.safeGetByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
private:

View File

@ -16,6 +16,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_COLUMN;
}
const Float64 EARTH_RADIUS_IN_METERS = 6372797.560856;

View File

@ -31,6 +31,13 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Hashing functions.
*
* Half MD5:
@ -716,15 +723,13 @@ public:
throw Exception{
"Number of arguments for function " + getName() + " doesn't match: passed " +
toString(arg_count) + ", should be 1 or 2.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH
};
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
const auto first_arg = arguments.front().get();
if (!typeid_cast<const DataTypeString *>(first_arg))
throw Exception{
"Illegal type " + first_arg->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
};
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (arg_count == 2)
{
@ -739,8 +744,7 @@ public:
!typeid_cast<const DataTypeInt64 *>(second_arg))
throw Exception{
"Illegal type " + second_arg->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
};
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
return std::make_shared<DataTypeUInt64>();
@ -755,7 +759,7 @@ public:
else if (arg_count == 2)
executeTwoArgs(block, arguments, result);
else
throw std::logic_error{"got into IFunction::execute with unexpected number of arguments"};
throw Exception{"got into IFunction::execute with unexpected number of arguments", ErrorCodes::LOGICAL_ERROR};
}
private:
@ -797,8 +801,7 @@ private:
if (!level_col->isConst())
throw Exception{
"Second argument of function " + getName() + " must be an integral constant",
ErrorCodes::ILLEGAL_COLUMN
};
ErrorCodes::ILLEGAL_COLUMN};
const auto level = level_col->get64(0);
@ -833,10 +836,10 @@ private:
};
struct NameHalfMD5 { static constexpr auto name = "halfMD5"; };
struct NameSipHash64 { static constexpr auto name = "sipHash64"; };
struct NameIntHash32 { static constexpr auto name = "intHash32"; };
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
struct NameHalfMD5 { static constexpr auto name = "halfMD5"; };
struct NameSipHash64 { static constexpr auto name = "sipHash64"; };
struct NameIntHash32 { static constexpr auto name = "intHash32"; };
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
struct ImplCityHash64
{

View File

@ -11,6 +11,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Functions are logical links: and, or, not, xor.
* Accept any numeric types, return a UInt8 containing 0 or 1.
*/

View File

@ -7,10 +7,10 @@
#include <Functions/IFunction.h>
#include <Common/config.h>
/** More effective implementations of mathematical functions are possible when connecting a separate library
* Disabled due licence compatibility limitations
/** More efficient implementations of mathematical functions are possible when using a separate library.
* Disabled due to licence compatibility limitations.
* To enable: download http://www.agner.org/optimize/vectorclass.zip and unpack to contrib/vectorclass
* Then rebuild with -DENABLE_VECTORCLASS=1
* Then rebuild with -DENABLE_VECTORCLASS=1
*/
#if USE_VECTORCLASS
@ -32,6 +32,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
template <typename Impl>
class FunctionMathNullaryConstFloat64 : public IFunction
{

View File

@ -5,7 +5,7 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Functions/IFunction.h>
#include <Functions/NumberTraits.h>
#include <DataTypes/NumberTraits.h>
#include <Interpreters/ExpressionActions.h>

View File

@ -12,6 +12,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Pseudo-random number generation functions.
* The function can be called without arguments or with one argument.
* The argument is ignored and only serves to ensure that several calls to one function are considered different and do not stick together.
@ -42,11 +47,11 @@ namespace detail
struct LinearCongruentialGenerator
{
/// Constants from man lrand48_r.
/// Constants from `man lrand48_r`.
static constexpr UInt64 a = 0x5DEECE66D;
static constexpr UInt64 c = 0xB;
/// And this is from `head -c8 /dev/urandom | Xxd -p`
/// And this is from `head -c8 /dev/urandom | xxd -p`
UInt64 current = 0x09826f4a081cee35ULL;
LinearCongruentialGenerator() {}
@ -236,11 +241,11 @@ public:
struct NameRand { static constexpr auto name = "rand"; };
struct NameRand64 { static constexpr auto name = "rand64"; };
struct NameRand64 { static constexpr auto name = "rand64"; };
struct NameRandConstant { static constexpr auto name = "randConstant"; };
using FunctionRand = FunctionRandom<RandImpl, NameRand> ;
using FunctionRand64 = FunctionRandom<Rand64Impl, NameRand64>;
using FunctionRand = FunctionRandom<RandImpl, NameRand> ;
using FunctionRand64 = FunctionRandom<Rand64Impl, NameRand64>;
using FunctionRandConstant = FunctionRandomConstant<RandImpl, NameRandConstant>;

View File

@ -15,8 +15,14 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** Functions for transforming numbers and dates to strings that contain the same set of bytes in the machine representation, and vice versa.
*/
*/
template<typename Name>
@ -104,8 +110,8 @@ public:
|| executeType<Float32>(block, arguments, result)
|| executeType<Float64>(block, arguments, result)))
throw Exception("Illegal column " + block.safeGetByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
@ -196,32 +202,32 @@ public:
};
struct NameReinterpretAsUInt8 { static constexpr auto name = "reinterpretAsUInt8"; };
struct NameReinterpretAsUInt16 { static constexpr auto name = "reinterpretAsUInt16"; };
struct NameReinterpretAsUInt32 { static constexpr auto name = "reinterpretAsUInt32"; };
struct NameReinterpretAsUInt64 { static constexpr auto name = "reinterpretAsUInt64"; };
struct NameReinterpretAsInt8 { static constexpr auto name = "reinterpretAsInt8"; };
struct NameReinterpretAsInt16 { static constexpr auto name = "reinterpretAsInt16"; };
struct NameReinterpretAsInt32 { static constexpr auto name = "reinterpretAsInt32"; };
struct NameReinterpretAsInt64 { static constexpr auto name = "reinterpretAsInt64"; };
struct NameReinterpretAsFloat32 { static constexpr auto name = "reinterpretAsFloat32"; };
struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; };
struct NameReinterpretAsUInt8 { static constexpr auto name = "reinterpretAsUInt8"; };
struct NameReinterpretAsUInt16 { static constexpr auto name = "reinterpretAsUInt16"; };
struct NameReinterpretAsUInt32 { static constexpr auto name = "reinterpretAsUInt32"; };
struct NameReinterpretAsUInt64 { static constexpr auto name = "reinterpretAsUInt64"; };
struct NameReinterpretAsInt8 { static constexpr auto name = "reinterpretAsInt8"; };
struct NameReinterpretAsInt16 { static constexpr auto name = "reinterpretAsInt16"; };
struct NameReinterpretAsInt32 { static constexpr auto name = "reinterpretAsInt32"; };
struct NameReinterpretAsInt64 { static constexpr auto name = "reinterpretAsInt64"; };
struct NameReinterpretAsFloat32 { static constexpr auto name = "reinterpretAsFloat32"; };
struct NameReinterpretAsFloat64 { static constexpr auto name = "reinterpretAsFloat64"; };
struct NameReinterpretAsDate { static constexpr auto name = "reinterpretAsDate"; };
struct NameReinterpretAsDateTime { static constexpr auto name = "reinterpretAsDateTime"; };
struct NameReinterpretAsString { static constexpr auto name = "reinterpretAsString"; };
struct NameReinterpretAsString { static constexpr auto name = "reinterpretAsString"; };
using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs<DataTypeUInt8, NameReinterpretAsUInt8> ;
using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs<DataTypeUInt16, NameReinterpretAsUInt16>;
using FunctionReinterpretAsUInt32 = FunctionReinterpretStringAs<DataTypeUInt32, NameReinterpretAsUInt32>;
using FunctionReinterpretAsUInt64 = FunctionReinterpretStringAs<DataTypeUInt64, NameReinterpretAsUInt64>;
using FunctionReinterpretAsInt8 = FunctionReinterpretStringAs<DataTypeInt8, NameReinterpretAsInt8> ;
using FunctionReinterpretAsInt16 = FunctionReinterpretStringAs<DataTypeInt16, NameReinterpretAsInt16> ;
using FunctionReinterpretAsInt32 = FunctionReinterpretStringAs<DataTypeInt32, NameReinterpretAsInt32> ;
using FunctionReinterpretAsInt64 = FunctionReinterpretStringAs<DataTypeInt64, NameReinterpretAsInt64> ;
using FunctionReinterpretAsFloat32 = FunctionReinterpretStringAs<DataTypeFloat32, NameReinterpretAsFloat32>;
using FunctionReinterpretAsFloat64 = FunctionReinterpretStringAs<DataTypeFloat64, NameReinterpretAsFloat64>;
using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate, NameReinterpretAsDate> ;
using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
using FunctionReinterpretAsUInt8 = FunctionReinterpretStringAs<DataTypeUInt8, NameReinterpretAsUInt8>;
using FunctionReinterpretAsUInt16 = FunctionReinterpretStringAs<DataTypeUInt16, NameReinterpretAsUInt16>;
using FunctionReinterpretAsUInt32 = FunctionReinterpretStringAs<DataTypeUInt32, NameReinterpretAsUInt32>;
using FunctionReinterpretAsUInt64 = FunctionReinterpretStringAs<DataTypeUInt64, NameReinterpretAsUInt64>;
using FunctionReinterpretAsInt8 = FunctionReinterpretStringAs<DataTypeInt8, NameReinterpretAsInt8>;
using FunctionReinterpretAsInt16 = FunctionReinterpretStringAs<DataTypeInt16, NameReinterpretAsInt16>;
using FunctionReinterpretAsInt32 = FunctionReinterpretStringAs<DataTypeInt32, NameReinterpretAsInt32>;
using FunctionReinterpretAsInt64 = FunctionReinterpretStringAs<DataTypeInt64, NameReinterpretAsInt64>;
using FunctionReinterpretAsFloat32 = FunctionReinterpretStringAs<DataTypeFloat32, NameReinterpretAsFloat32>;
using FunctionReinterpretAsFloat64 = FunctionReinterpretStringAs<DataTypeFloat64, NameReinterpretAsFloat64>;
using FunctionReinterpretAsDate = FunctionReinterpretStringAs<DataTypeDate, NameReinterpretAsDate>;
using FunctionReinterpretAsDateTime = FunctionReinterpretStringAs<DataTypeDateTime, NameReinterpretAsDateTime>;
using FunctionReinterpretAsString = FunctionReinterpretAsStringImpl<NameReinterpretAsString>;

View File

@ -15,6 +15,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Rounding Functions:
* roundToExp2 - down to the nearest power of two;
* roundDuration - down to the nearest of: 0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000;

View File

@ -15,6 +15,14 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <bool negative = false>
struct EmptyImpl
{
@ -894,7 +902,7 @@ public:
{
if (arguments.size() < 2)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
+ ", should be at least 2.",
+ ", should be at least 2.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (const auto arg_idx : ext::range(0, arguments.size()))

View File

@ -13,6 +13,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
/** String functions
*
* length, empty, notEmpty,

View File

@ -15,6 +15,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
/** Functions that split strings into an array of strings or vice versa.
*
* splitByChar(sep, s)
@ -546,9 +552,9 @@ public:
};
using FunctionAlphaTokens = FunctionTokens<AlphaTokensImpl> ;
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl> ;
using FunctionAlphaTokens = FunctionTokens<AlphaTokensImpl>;
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
using FunctionExtractAll = FunctionTokens<ExtractAllImpl> ;
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
}

View File

@ -1,6 +1,6 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTransform.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/DataTypeTraits.h>
namespace DB
{

View File

@ -22,6 +22,8 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
}

View File

@ -8,6 +8,11 @@
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{

View File

@ -3,9 +3,9 @@
#include <memory>
#include <Core/Names.h>
#include <Core/Field.h>
#include <Core/Block.h>
#include <Core/ColumnNumbers.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <DataTypes/IDataType.h>
@ -15,10 +15,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION;
extern const int NOT_IMPLEMENTED;
}
struct ExpressionAction;

View File

@ -9,6 +9,13 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template<typename B>
struct AndImpl
{

View File

@ -1,6 +1,6 @@
#include <iostream>
#include <Functions/NumberTraits.h>
#include <DataTypes/NumberTraits.h>
void printType(DB::UInt8 x) { std::cout << "UInt8"; }

View File

@ -3,11 +3,6 @@
#include <vector>
#include <city.h>
#ifdef USE_QUICKLZ
#include <quicklz/quicklz_level1.h>
#endif
#include <lz4.h>
#include <zstd.h>
@ -57,16 +52,7 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
size_t & size_compressed = size_compressed_without_checksum;
if (method < 0x80)
{
#ifdef USE_QUICKLZ
size_compressed = qlz_size_compressed(&own_compressed_buffer[0]);
size_decompressed = qlz_size_decompressed(&own_compressed_buffer[0]);
#else
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
#endif
}
else if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) || method == static_cast<UInt8>(CompressionMethodByte::ZSTD))
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4) || method == static_cast<UInt8>(CompressionMethodByte::ZSTD))
{
size_compressed = unalignedLoad<UInt32>(&own_compressed_buffer[1]);
size_decompressed = unalignedLoad<UInt32>(&own_compressed_buffer[5]);
@ -108,18 +94,7 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s
UInt8 method = compressed_buffer[0]; /// See CompressedWriteBuffer.h
if (method < 0x80)
{
#ifdef USE_QUICKLZ
if (!qlz_state)
qlz_state = std::make_unique<qlz_state_decompress>();
qlz_decompress(&compressed_buffer[0], to, qlz_state.get());
#else
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
#endif
}
else if (method == static_cast<UInt8>(CompressionMethodByte::LZ4))
if (method == static_cast<UInt8>(CompressionMethodByte::LZ4))
{
if (LZ4_decompress_fast(&compressed_buffer[COMPRESSED_BLOCK_HEADER_SIZE], to, size_decompressed) < 0)
throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS);

View File

@ -1,9 +1,5 @@
#pragma once
#ifdef USE_QUICKLZ
struct qlz_state_decompress;
#endif
#include <Common/PODArray.h>
@ -25,12 +21,6 @@ protected:
/// Points to memory, holding compressed block.
char * compressed_buffer = nullptr;
#ifdef USE_QUICKLZ
std::unique_ptr<qlz_state_decompress> qlz_state;
#else
void * fixed_size_padding = nullptr; /// ABI compatibility for USE_QUICKLZ
#endif
/// Don't checksum on decompressing.
bool disable_checksum = false;

View File

@ -6,7 +6,6 @@
#define DBMS_MAX_COMPRESSED_SIZE 0x40000000ULL /// 1GB
#define QUICKLZ_ADDITIONAL_SPACE 400
#define COMPRESSED_BLOCK_HEADER_SIZE 9
@ -16,7 +15,6 @@ namespace DB
/** Compression method */
enum class CompressionMethod
{
QuickLZ = 0,
LZ4 = 1,
LZ4HC = 2, /// The format is the same as for LZ4. The difference is only in compression.
ZSTD = 3, /// Experimental algorithm: https://github.com/Cyan4973/zstd
@ -29,14 +27,6 @@ enum class CompressionMethod
*
* The next byte specifies the compression algorithm. Then everything depends on the algorithm.
*
* The first 4 options are compatible with QuickLZ level 1.
* That is, if the value of the first byte is < 4, it is enough to use qlz_level1_decompress function to decompress.
*
* 0x00 - uncompressed data, small block. Next, one byte - compressed data size, including header; one byte - uncompressed data size.
* 0x01 - compressed data, QuickLZ level 1, small block. Then two bytes are similar.
* 0x02 - uncompressed data, large block. Then 4 bytes - compressed data size, including header; 4 bytes uncompressed data size.
* 0x03 - compressed data, QuickLZ level 1, large block. Then 8 bytes are similar.
*
* 0x82 - LZ4 or LZ4HC (they have the same format).
* Next 4 bytes - the size of the compressed data, taking into account the header; 4 bytes is the size of the uncompressed data.
*
@ -53,8 +43,8 @@ enum class CompressionMethod
enum class CompressionMethodByte : uint8_t
{
LZ4 = 0x82,
ZSTD = 0x90,
LZ4 = 0x82,
ZSTD = 0x90,
};
}

View File

@ -1,10 +1,5 @@
#include <memory>
#include <city.h>
#ifdef USE_QUICKLZ
#include <quicklz/quicklz_level1.h>
#endif
#include <lz4.h>
#include <lz4hc.h>
#include <zstd.h>
@ -35,28 +30,10 @@ void CompressedWriteBuffer::nextImpl()
char * compressed_buffer_ptr = nullptr;
/** The format of compressed block - see CompressedStream.h
*/
*/
switch (method)
{
case CompressionMethod::QuickLZ:
{
#ifdef USE_QUICKLZ
compressed_buffer.resize(uncompressed_size + QUICKLZ_ADDITIONAL_SPACE);
compressed_size = qlz_compress(
working_buffer.begin(),
&compressed_buffer[0],
uncompressed_size,
qlz_state.get());
compressed_buffer[0] &= 3;
compressed_buffer_ptr = &compressed_buffer[0];
break;
#else
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
#endif
}
case CompressionMethod::LZ4:
case CompressionMethod::LZ4HC:
{
@ -137,9 +114,6 @@ CompressedWriteBuffer::CompressedWriteBuffer(
CompressionMethod method_,
size_t buf_size)
: BufferWithOwnMemory<WriteBuffer>(buf_size), out(out_), method(method_)
#ifdef USE_QUICKLZ
, qlz_state(std::make_unique<qlz_state_compress>())
#endif
{
}

View File

@ -2,10 +2,6 @@
#include <memory>
#ifdef USE_QUICKLZ
struct qlz_state_compress;
#endif
#include <Common/PODArray.h>
#include <IO/WriteBuffer.h>
@ -24,15 +20,6 @@ private:
PODArray<char> compressed_buffer;
#ifdef USE_QUICKLZ
std::unique_ptr<qlz_state_compress> qlz_state;
#else
/// ABI compatibility for USE_QUICKLZ
void * fixed_size_padding = nullptr;
/// Undoes warning unused-private-field.
void * fixed_size_padding_used() const { return fixed_size_padding; }
#endif
void nextImpl() override;
public:

View File

@ -1,109 +0,0 @@
#pragma once
#include <mysqlxx/Row.h>
#include <mysqlxx/Null.h>
#include <mysqlxx/Manip.h>
#include <common/MetrikaTypes.h>
#include <Core/Field.h>
#include <Core/FieldVisitors.h>
#include <IO/WriteHelpers.h>
/// This is for Yandex.Metrica code.
namespace mysqlxx
{
inline std::ostream & operator<< (mysqlxx::EscapeManipResult res, const DB::Array & value)
{
return res.ostr << DB::applyVisitor(DB::FieldVisitorToString(), DB::Field(value));
}
inline std::ostream & operator<< (mysqlxx::QuoteManipResult res, const DB::Array & value)
{
throw Poco::Exception("Cannot quote Array with mysqlxx::quote.");
}
inline std::istream & operator>> (mysqlxx::UnEscapeManipResult res, DB::Array & value)
{
throw Poco::Exception("Cannot unescape Array with mysqlxx::unescape.");
}
inline std::istream & operator>> (mysqlxx::UnQuoteManipResult res, DB::Array & value)
{
throw Poco::Exception("Cannot unquote Array with mysqlxx::unquote.");
}
inline std::ostream & operator<< (mysqlxx::EscapeManipResult res, const DB::Tuple & value)
{
return res.ostr << DB::applyVisitor(DB::FieldVisitorToString(), DB::Field(value));
}
inline std::ostream & operator<< (mysqlxx::QuoteManipResult res, const DB::Tuple & value)
{
throw Poco::Exception("Cannot quote Tuple with mysqlxx::quote.");
}
inline std::istream & operator>> (mysqlxx::UnEscapeManipResult res, DB::Tuple & value)
{
throw Poco::Exception("Cannot unescape Tuple with mysqlxx::unescape.");
}
inline std::istream & operator>> (mysqlxx::UnQuoteManipResult res, DB::Tuple & value)
{
throw Poco::Exception("Cannot unquote Tuple with mysqlxx::unquote.");
}
template <> inline VisitID_t Value::get<VisitID_t>() const { return VisitID_t(getUInt()); }
}
namespace DB
{
/// Output mysqlxx::Row in tab-separated form
inline void writeEscapedRow(const mysqlxx::Row & row, WriteBuffer & buf)
{
for (size_t i = 0; i < row.size(); ++i)
{
if (i != 0)
buf.write('\t');
if (unlikely(row[i].isNull()))
{
buf.write("\\N", 2);
continue;
}
writeAnyEscapedString<'\''>(row[i].data(), row[i].data() + row[i].length(), buf);
}
}
template <typename T>
inline void writeText(const mysqlxx::Null<T> & x, WriteBuffer & buf)
{
if (x.isNull())
writeCString("\\N", buf);
else
writeText(static_cast<const T &>(x), buf);
}
template <typename T>
inline void writeQuoted(const mysqlxx::Null<T> & x, WriteBuffer & buf)
{
if (x.isNull())
writeCString("NULL", buf);
else
writeText(static_cast<const T &>(x), buf);
}
template <typename T>
inline Field toField(const mysqlxx::Null<T> & x)
{
return x.isNull() ? Field(Null()) : toField(static_cast<const T &>(x));
}
}

View File

@ -4,6 +4,7 @@
#include <Common/SimpleCache.h>
#include <Common/StringUtils.h>
#include <IO/HexWriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/Util/Application.h>
#include <openssl/sha.h>
@ -40,15 +41,6 @@ inline bool isLocal(const Cluster::Address & address)
return address.default_database.empty() && isLocalAddress(address.resolved_address);
}
inline std::string addressToDirName(const Cluster::Address & address)
{
return
escapeForFileName(address.user) +
(address.password.empty() ? "" : (':' + escapeForFileName(address.password))) + '@' +
escapeForFileName(address.resolved_address.host().toString()) + ':' +
std::to_string(address.resolved_address.port()) +
(address.default_database.empty() ? "" : ('#' + escapeForFileName(address.default_database)));
}
/// To cache DNS requests.
Poco::Net::SocketAddress resolveSocketAddressImpl1(const String & host, UInt16 port)
@ -108,6 +100,29 @@ Cluster::Address::Address(const String & host_port_, const String & user_, const
}
}
String Cluster::Address::toString() const
{
return toString(host_name, port);
}
String Cluster::Address::toString(const String & host_name, UInt16 port)
{
return escapeForFileName(host_name) + ':' + DB::toString(port);
}
String Cluster::Address::toStringFull() const
{
return
escapeForFileName(user) +
(password.empty() ? "" : (':' + escapeForFileName(password))) + '@' +
escapeForFileName(resolved_address.host().toString()) + ':' +
std::to_string(resolved_address.port()) +
(default_database.empty() ? "" : ('#' + escapeForFileName(default_database)));
}
/// Implementation of Clusters class
Clusters::Clusters(Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_name)
@ -195,7 +210,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
info.local_addresses.push_back(address);
else
{
info.dir_names.push_back(addressToDirName(address));
info.dir_names.push_back(address.toStringFull());
ConnectionPoolPtrs pools;
pools.push_back(std::make_shared<ConnectionPool>(
settings.distributed_connections_pool_size,
@ -229,7 +244,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
if (weight == 0)
continue;
const auto internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false);
/** in case of internal_replication we will be appending names to
* the first element of vector; otherwise we will just .emplace_back
@ -252,14 +267,14 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
{
if (internal_replication)
{
auto dir_name = addressToDirName(replica_addresses.back());
auto dir_name = replica_addresses.back().toStringFull();
if (first)
dir_names.emplace_back(std::move(dir_name));
else
dir_names.front() += "," + dir_name;
}
else
dir_names.emplace_back(addressToDirName(replica_addresses.back()));
dir_names.emplace_back(replica_addresses.back().toStringFull());
if (first) first = false;
}
@ -296,7 +311,7 @@ Cluster::Cluster(Poco::Util::AbstractConfiguration & config, const Settings & se
std::move(replicas), settings.load_balancing, settings.connections_with_failover_max_tries);
slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
shards_info.push_back({std::move(dir_names), current_shard_num, weight, shard_local_addresses, shard_pool});
shards_info.push_back({std::move(dir_names), current_shard_num, weight, shard_local_addresses, shard_pool, internal_replication});
}
else
throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);

View File

@ -58,6 +58,14 @@ public:
Address(Poco::Util::AbstractConfiguration & config, const String & config_prefix);
Address(const String & host_port_, const String & user_, const String & password_);
/// Returns escaped 'host_name:port'
String toString() const;
static String toString(const String & host_name, UInt16 port);
/// Retrurns escaped user:password@resolved_host_address:resolved_host_port#default_database
String toStringFull() const;
};
using Addresses = std::vector<Address>;
@ -69,14 +77,17 @@ public:
bool isLocal() const { return !local_addresses.empty(); }
bool hasRemoteConnections() const { return pool != nullptr; }
size_t getLocalNodeCount() const { return local_addresses.size(); }
bool hasInternalReplication() const { return has_internal_replication; }
public:
/// contains names of directories for asynchronous write to StorageDistributed
/// Contains names of directories for asynchronous write to StorageDistributed
std::vector<std::string> dir_names;
UInt32 shard_num; /// Shard number, starting with 1.
/// Number of the shard, the indexation begins with 1
UInt32 shard_num;
int weight;
Addresses local_addresses;
ConnectionPoolWithFailoverPtr pool;
bool has_internal_replication;
};
using ShardsInfo = std::vector<ShardInfo>;

View File

@ -120,8 +120,9 @@ struct ContextShared
InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
BackgroundProcessingPoolPtr background_pool; /// The thread pool for the background work performed by the tables.
ReshardingWorkerPtr resharding_worker;
Macros macros; /// Substitutions from config. Can be used for parameters of ReplicatedMergeTree.
Macros macros; /// Substitutions extracted from config.
std::unique_ptr<Compiler> compiler; /// Used for dynamic compilation of queries' parts if it necessary.
std::shared_ptr<DDLWorker> ddl_worker; /// Process ddl commands from zk.
/// Rules for selecting the compression method, depending on the size of the part.
mutable std::unique_ptr<CompressionMethodSelector> compression_method_selector;
std::unique_ptr<MergeTreeSettings> merge_tree_settings; /// Settings of MergeTree* engines.
@ -1099,6 +1100,22 @@ ReshardingWorker & Context::getReshardingWorker()
return *shared->resharding_worker;
}
void Context::setDDLWorker(std::shared_ptr<DDLWorker> ddl_worker)
{
auto lock = getLock();
if (shared->ddl_worker)
throw Exception("DDL background thread has already been initialized.", ErrorCodes::LOGICAL_ERROR);
shared->ddl_worker = ddl_worker;
}
DDLWorker & Context::getDDLWorker()
{
auto lock = getLock();
if (!shared->ddl_worker)
throw Exception("DDL background thread not initialized.", ErrorCodes::LOGICAL_ERROR);
return *shared->ddl_worker;
}
void Context::resetCaches() const
{
auto lock = getLock();

View File

@ -53,6 +53,7 @@ class PartLog;
struct MergeTreeSettings;
class IDatabase;
class DDLGuard;
class DDLWorker;
class IStorage;
using StoragePtr = std::shared_ptr<IStorage>;
using Tables = std::map<String, StoragePtr>;
@ -279,6 +280,9 @@ public:
void setReshardingWorker(std::shared_ptr<ReshardingWorker> resharding_worker);
ReshardingWorker & getReshardingWorker();
void setDDLWorker(std::shared_ptr<DDLWorker> ddl_worker);
DDLWorker & getDDLWorker();
/** Clear the caches of the uncompressed blocks and marks.
* This is usually done when renaming tables, changing the type of columns, deleting a table.
* - since caches are linked to file names, and become incorrect.

View File

@ -0,0 +1,726 @@
#include <Interpreters/DDLWorker.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTQueryWithOnCluster.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <Storages/IStorage.h>
#include <DataStreams/OneBlockInputStream.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/Cluster.h>
#include <Common/getFQDNOrHostName.h>
#include <Common/setThreadName.h>
#include <Common/Stopwatch.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnArray.h>
#include <zkutil/ZooKeeper.h>
#include <zkutil/Lock.h>
#include <Poco/Timestamp.h>
#include <experimental/optional>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int INVALID_CONFIG_PARAMETER;
extern const int UNKNOWN_FORMAT_VERSION;
extern const int INCONSISTENT_TABLE_ACCROSS_SHARDS;
extern const int INCONSISTENT_CLUSTER_DEFINITION;
extern const int TIMEOUT_EXCEEDED;
extern const int UNFINISHED;
}
const size_t DDLWorker::node_max_lifetime_seconds = 7 * 24 * 60 * 60; // week
const size_t DDLWorker::cleanup_min_period_seconds = 60; // minute
struct DDLLogEntry
{
String query;
Strings hosts;
String initiator; // optional
static constexpr int CURRENT_VERSION = 1;
String toString()
{
String res;
{
WriteBufferFromString wb(res);
auto version = CURRENT_VERSION;
wb << "version: " << version << "\n";
wb << "query: " << escape << query << "\n";
wb << "hosts: " << hosts << "\n";
wb << "initiator: " << initiator << "\n";
}
return res;
}
void parse(const String & data)
{
ReadBufferFromString rb(data);
int version;
rb >> "version: " >> version >> "\n";
if (version != CURRENT_VERSION)
throw Exception("Unknown DDLLogEntry format version: " + DB::toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION);
rb >> "query: " >> escape >> query >> "\n";
rb >> "hosts: " >> hosts >> "\n";
if (!rb.eof())
rb >> "initiator: " >> initiator >> "\n";
else
initiator.clear();
assertEOF(rb);
}
};
using ShardAndHostNum = std::experimental::optional<std::pair<size_t, size_t>>;
static ShardAndHostNum tryGetShardAndHostNum(const Cluster::AddressesWithFailover & cluster, const String & host_name, UInt16 port)
{
for (size_t shard_num = 0; shard_num < cluster.size(); ++shard_num)
{
for (size_t host_num = 0; host_num < cluster[shard_num].size(); ++host_num)
{
const Cluster::Address & address = cluster[shard_num][host_num];
if (address.host_name == host_name && address.port == port)
return std::make_pair(shard_num, host_num);
}
}
return {};
}
static bool isSupportedAlterType(int type)
{
static const std::unordered_set<int> supported_alter_types{
ASTAlterQuery::ADD_COLUMN,
ASTAlterQuery::DROP_COLUMN,
ASTAlterQuery::MODIFY_COLUMN,
ASTAlterQuery::MODIFY_PRIMARY_KEY,
ASTAlterQuery::DROP_PARTITION
};
return supported_alter_types.count(type);
}
DDLWorker::DDLWorker(const std::string & zk_root_dir, Context & context_)
: context(context_)
{
queue_dir = zk_root_dir;
if (queue_dir.back() == '/')
queue_dir.resize(queue_dir.size() - 1);
host_name = getFQDNOrHostName();
port = context.getTCPPort();
host_id = Cluster::Address::toString(host_name, port);
event_queue_updated = std::make_shared<Poco::Event>();
thread = std::thread(&DDLWorker::run, this);
}
DDLWorker::~DDLWorker()
{
stop_flag = true;
event_queue_updated->set();
thread.join();
}
void DDLWorker::processTasks()
{
LOG_DEBUG(log, "Processing tasks");
Strings queue_nodes = zookeeper->getChildren(queue_dir, nullptr, event_queue_updated);
if (queue_nodes.empty())
return;
bool server_startup = last_processed_node_name.empty();
std::sort(queue_nodes.begin(), queue_nodes.end());
auto begin_node = server_startup
? queue_nodes.begin()
: std::upper_bound(queue_nodes.begin(), queue_nodes.end(), last_processed_node_name);
for (auto it = begin_node; it != queue_nodes.end(); ++it)
{
const String & node_name = *it;
String node_path = queue_dir + "/" + node_name;
String node_data;
if (!zookeeper->tryGet(node_path, node_data))
{
/// It is Ok that node could be deleted just now. It means that there are no current host in node's host list.
continue;
}
DDLLogEntry node;
node.parse(node_data);
bool host_in_hostlist = std::find(node.hosts.cbegin(), node.hosts.cend(), host_id) != node.hosts.cend();
bool already_processed = zookeeper->exists(node_path + "/finished/" + host_id);
if (!server_startup && already_processed)
{
throw Exception(
"Server expects that DDL node " + node_name + " should be processed, but it was already processed according to ZK",
ErrorCodes::LOGICAL_ERROR);
}
if (host_in_hostlist && !already_processed)
{
try
{
processTask(node, node_name);
}
catch (...)
{
tryLogCurrentException(log, "An error occurred while processing node " + node_name + " (" + node.query + ")");
throw;
}
}
else
{
LOG_DEBUG(log, "Node " << node_name << " (" << node.query << ") will not be processed");
}
last_processed_node_name = node_name;
}
}
static bool tryExecuteQuery(const String & query, Context & context, ExecutionStatus & status, Logger * log = nullptr)
{
try
{
executeQuery(query, context);
}
catch (...)
{
status = ExecutionStatus::fromCurrentException();
if (log)
tryLogCurrentException(log, "Query " + query + " wasn't finished successfully");
return false;
}
status = ExecutionStatus(0);
if (log)
LOG_DEBUG(log, "Executed query: " << query);
return true;
}
void DDLWorker::processTask(const DDLLogEntry & node, const std::string & node_name)
{
LOG_DEBUG(log, "Processing node " << node_name << " (" << node.query << ")");
String node_path = queue_dir + "/" + node_name;
createStatusDirs(node_path);
bool should_not_execute = current_node == node_name && current_node_was_executed;
if (!should_not_execute)
{
current_node = node_name;
current_node_was_executed = false;
zookeeper->create(node_path + "/active/" + host_id, "", zkutil::CreateMode::Ephemeral);
try
{
ASTPtr query_ast;
{
ParserQuery parser_query;
String description;
IParser::Pos begin = &node.query.front();
query_ast = parseQuery(parser_query, begin, begin + node.query.size(), description);
}
const ASTQueryWithOnCluster * query;
if (!query_ast || !(query = dynamic_cast<const ASTQueryWithOnCluster *>(query_ast.get())))
throw Exception("Recieved unsupported DDL query", ErrorCodes::NOT_IMPLEMENTED);
String cluster_name = query->cluster;
auto cluster = context.getCluster(cluster_name);
auto shard_host_num = tryGetShardAndHostNum(cluster->getShardsWithFailoverAddresses(), host_name, port);
if (!shard_host_num)
{
throw Exception("Cannot find own address (" + host_id + ") in cluster " + cluster_name + " configuration",
ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
}
size_t shard_num = shard_host_num->first;
size_t host_num = shard_host_num->second;
const auto & host_address = cluster->getShardsWithFailoverAddresses().at(shard_num).at(host_num);
ASTPtr rewritten_ast = query->getRewrittenASTWithoutOnCluster(host_address.default_database);
String rewritten_query = queryToString(rewritten_ast);
LOG_DEBUG(log, "Executing query: " << rewritten_query);
if (auto query_alter = dynamic_cast<const ASTAlterQuery *>(rewritten_ast.get()))
{
processTaskAlter(query_alter, rewritten_query, cluster, shard_num, node_path);
}
else
{
tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
}
}
catch (const zkutil::KeeperException & e)
{
throw;
}
catch (...)
{
current_node_execution_status = ExecutionStatus::fromCurrentException("An error occured during query preparation");
}
/// We need to distinguish ZK errors occured before and after query executing
current_node_was_executed = true;
}
/// Delete active flag and create finish flag
zkutil::Ops ops;
ops.emplace_back(std::make_unique<zkutil::Op::Remove>(node_path + "/active/" + host_id, -1));
ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/finished/" + host_id,
current_node_execution_status.serializeText(), zookeeper->getDefaultACL(), zkutil::CreateMode::Persistent));
int code = zookeeper->tryMultiWithRetries(ops);
if (code != ZOK && code != ZNONODE)
throw zkutil::KeeperException("Cannot commit executed node " + node_name, code);
}
void DDLWorker::processTaskAlter(
const ASTAlterQuery * query_alter,
const String & rewritten_query,
const std::shared_ptr<Cluster> & cluster,
ssize_t shard_num,
const String & node_path)
{
String database = query_alter->database.empty() ? context.getCurrentDatabase() : query_alter->database;
StoragePtr storage = context.getTable(database, query_alter->table);
bool execute_once_on_replica = storage->supportsReplication();
bool execute_on_leader_replica = false;
for (const auto & param : query_alter->parameters)
{
if (!isSupportedAlterType(param.type))
throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
if (execute_once_on_replica)
execute_on_leader_replica |= param.type == ASTAlterQuery::DROP_PARTITION;
}
const auto & shard_info = cluster->getShardsInfo().at(shard_num);
bool config_is_replicated_shard = shard_info.hasInternalReplication();
if (execute_once_on_replica && !config_is_replicated_shard)
{
throw Exception("Table " + query_alter->table + " is replicated, but shard #" + toString(shard_num + 1) +
" isn't replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
}
else if (!execute_once_on_replica && config_is_replicated_shard)
{
throw Exception("Table " + query_alter->table + " isn't replicated, but shard #" + toString(shard_num + 1) +
" replicated according to its cluster definition", ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION);
}
if (execute_once_on_replica)
{
/// The following code may perform ALTER twice if
/// current secver aquires lock, executes replicated alter,
/// losts zookeeper connection and doesn't have time to create /executed node, second server executes replicated alter again
/// To avoid this problem alter() method of replicated tables should be changed and takes into account ddl query id tag.
if (!context.getSettingsRef().distributed_ddl_allow_replicated_alter)
throw Exception("Distributed DDL alters don't work properly yet", ErrorCodes::NOT_IMPLEMENTED);
Strings replica_names;
for (const auto & address : cluster->getShardsWithFailoverAddresses().at(shard_num))
replica_names.emplace_back(address.toString());
std::sort(replica_names.begin(), replica_names.end());
String shard_dir_name;
for (auto it = replica_names.begin(); it != replica_names.end(); ++it)
shard_dir_name += *it + (std::next(it) != replica_names.end() ? "," : "");
String shard_path = node_path + "/shards/" + shard_dir_name;
String is_executed_path = shard_path + "/executed";
zookeeper->createAncestors(shard_path + "/");
bool alter_executed_by_replica = false;
{
auto zookeeper_holder = std::make_shared<zkutil::ZooKeeperHolder>();
zookeeper_holder->initFromInstance(zookeeper);
zkutil::Lock lock(zookeeper_holder, shard_path, "lock", host_id);
std::mt19937 rng(std::hash<String>{}(host_id) + reinterpret_cast<intptr_t>(&rng));
for (int num_tries = 0; num_tries < 10; ++num_tries)
{
if (zookeeper->exists(is_executed_path))
{
alter_executed_by_replica = true;
break;
}
if (lock.tryLock())
{
tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
if (execute_on_leader_replica && current_node_execution_status.code == ErrorCodes::NOT_IMPLEMENTED)
{
/// TODO: it is ok to recieve exception "host is not leader"
}
zookeeper->create(is_executed_path, host_id, zkutil::CreateMode::Persistent);
lock.unlock();
alter_executed_by_replica = true;
break;
}
std::this_thread::sleep_for(std::chrono::duration<double>(std::uniform_real_distribution<double>(0, 1)(rng)));
}
}
if (!alter_executed_by_replica)
current_node_execution_status = ExecutionStatus(ErrorCodes::NOT_IMPLEMENTED, "Cannot enqueue replicated DDL query");
}
else
{
tryExecuteQuery(rewritten_query, context, current_node_execution_status, log);
}
}
void DDLWorker::cleanupQueue(const Strings * node_names_to_check)
{
/// Both ZK and Poco use Unix epoch
size_t current_time_seconds = Poco::Timestamp().epochTime();
constexpr size_t zookeeper_time_resolution = 1000;
// Too early to check
if (last_cleanup_time_seconds && current_time_seconds < last_cleanup_time_seconds + cleanup_min_period_seconds)
return;
last_cleanup_time_seconds = current_time_seconds;
LOG_DEBUG(log, "Cleaning queue");
String data;
zkutil::Stat stat;
DDLLogEntry node;
Strings node_names_fetched = node_names_to_check ? Strings{} : zookeeper->getChildren(queue_dir);
const Strings & node_names = (node_names_to_check) ? *node_names_to_check : node_names_fetched;
for (const String & node_name : node_names)
{
try
{
String node_path = queue_dir + "/" + node_name;
if (!zookeeper->tryGet(node_path, data, &stat))
continue;
/// TODO: Add shared lock to avoid rare race counditions.
size_t zookeeper_time_seconds = stat.mtime / zookeeper_time_resolution;
if (zookeeper_time_seconds + node_max_lifetime_seconds < current_time_seconds)
{
size_t lifetime_seconds = current_time_seconds - zookeeper_time_seconds;
LOG_INFO(log, "Lifetime of node " << node_name << " (" << lifetime_seconds << " sec.) is expired, deleting it");
zookeeper->removeRecursive(node_path);
continue;
}
Strings finished_nodes = zookeeper->getChildren(node_path + "/finished");
node.parse(data);
if (finished_nodes.size() >= node.hosts.size())
{
LOG_INFO(log, "Node " << node_name << " had been executed by each host, deleting it");
zookeeper->removeRecursive(node_path);
}
}
catch (...)
{
tryLogCurrentException(log, "An error occured while checking and cleaning node " + node_name + " from queue");
}
}
}
/// Try to create unexisting "status" dirs for a node
void DDLWorker::createStatusDirs(const std::string & node_path)
{
zkutil::Ops ops;
auto acl = zookeeper->getDefaultACL();
ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/active", "", acl, zkutil::CreateMode::Persistent));
ops.emplace_back(std::make_unique<zkutil::Op::Create>(node_path + "/finished", "", acl, zkutil::CreateMode::Persistent));
int code = zookeeper->tryMulti(ops);
if (code != ZOK && code != ZNODEEXISTS)
throw zkutil::KeeperException(code);
}
String DDLWorker::enqueueQuery(DDLLogEntry & entry)
{
if (entry.hosts.empty())
return {};
String query_path_prefix = queue_dir + "/query-";
zookeeper->createAncestors(query_path_prefix);
String node_path = zookeeper->create(query_path_prefix, entry.toString(), zkutil::CreateMode::PersistentSequential);
createStatusDirs(node_path);
return node_path;
}
void DDLWorker::run()
{
setThreadName("DDLWorker");
LOG_DEBUG(log, "Started DDLWorker thread");
zookeeper = context.getZooKeeper();
zookeeper->createAncestors(queue_dir + "/");
while (!stop_flag)
{
try
{
processTasks();
LOG_DEBUG(log, "Waiting watch");
event_queue_updated->wait();
if (stop_flag)
break;
cleanupQueue();
}
catch (zkutil::KeeperException &)
{
LOG_DEBUG(log, "Recovering ZooKeeper session");
zookeeper = context.getZooKeeper();
}
catch (...)
{
tryLogCurrentException(log);
throw;
}
}
}
class DDLQueryStatusInputSream : public IProfilingBlockInputStream
{
public:
DDLQueryStatusInputSream(const String & zk_node_path, Context & context, size_t num_hosts)
: node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE)
{
sample = Block{
{std::make_shared<DataTypeString>(), "host"},
{std::make_shared<DataTypeUInt64>(), "status"},
{std::make_shared<DataTypeString>(), "error"},
{std::make_shared<DataTypeUInt64>(), "num_hosts_remaining"},
{std::make_shared<DataTypeUInt64>(), "num_hosts_active"},
};
setTotalRowsApprox(num_hosts);
}
String getName() const override
{
return "DDLQueryStatusInputSream";
}
String getID() const override
{
return "DDLQueryStatusInputSream(" + node_path + ")";
}
static constexpr size_t timeout_seconds = 120;
Block readImpl() override
{
Block res;
if (num_hosts_finished >= total_rows_approx)
return res;
auto zookeeper = context.getZooKeeper();
size_t try_number = 0;
while(res.rows() == 0)
{
if (is_cancelled)
return res;
auto elapsed_seconds = watch.elapsedSeconds();
if (elapsed_seconds > timeout_seconds)
throw Exception("Watching query is executing too long (" + toString(std::round(elapsed_seconds)) + " sec.)", ErrorCodes::TIMEOUT_EXCEEDED);
if (num_hosts_finished != 0 || try_number != 0)
std::this_thread::sleep_for(std::chrono::milliseconds(50 * std::min(20LU, try_number + 1)));
/// TODO: add shared lock
if (!zookeeper->exists(node_path))
{
throw Exception("Cannot provide query execution status. The query's node " + node_path
+ " had been deleted by cleaner since it was finished (or its lifetime is expired)",
ErrorCodes::UNFINISHED);
}
Strings new_hosts = getNewAndUpdate(finished_hosts_set, getChildrenAllowNoNode(zookeeper, node_path + "/finished"));
++try_number;
if (new_hosts.empty())
continue;
Strings cur_active_hosts = getChildrenAllowNoNode(zookeeper, node_path + "/active");
res = sample.cloneEmpty();
for (const String & host : new_hosts)
{
ExecutionStatus status(1, "Cannot obtain error message");
{
String status_data;
if (zookeeper->tryGet(node_path + "/finished/" + host, status_data))
status.deserializeText(status_data);
}
res.getByName("host").column->insert(host);
res.getByName("status").column->insert(static_cast<UInt64>(status.code));
res.getByName("error").column->insert(status.message);
res.getByName("num_hosts_remaining").column->insert(total_rows_approx - (++num_hosts_finished));
res.getByName("num_hosts_active").column->insert(cur_active_hosts.size());
}
}
return res;
}
static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
{
Strings res;
int code = zookeeper->tryGetChildren(node_path, res);
if (code != ZOK && code != ZNONODE)
throw zkutil::KeeperException(code, node_path);
return res;
}
static Strings getNewAndUpdate(NameSet & prev, const Strings & cur_list)
{
Strings diff;
for (const String & elem : cur_list)
{
if (!prev.count(elem))
{
diff.emplace_back(elem);
prev.emplace(elem);
}
}
return diff;
}
~DDLQueryStatusInputSream() override = default;
Block sample;
private:
String node_path;
Context & context;
Stopwatch watch;
NameSet finished_hosts_set;
size_t num_hosts_finished = 0;
};
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context)
{
const auto query = dynamic_cast<const ASTQueryWithOnCluster *>(query_ptr.get());
if (!query)
{
throw Exception("Distributed execution is not supported for such DDL queries",
ErrorCodes::NOT_IMPLEMENTED);
}
auto query_alter = dynamic_cast<const ASTAlterQuery *>(query_ptr.get());
if (query_alter)
{
for (const auto & param : query_alter->parameters)
{
if (!isSupportedAlterType(param.type))
throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED);
}
}
ClusterPtr cluster = context.getCluster(query->cluster);
DDLWorker & ddl_worker = context.getDDLWorker();
DDLLogEntry entry;
entry.query = queryToString(query_ptr);
entry.initiator = ddl_worker.getHostName();
Cluster::AddressesWithFailover shards = cluster->getShardsWithFailoverAddresses();
for (const auto & shard : shards)
{
for (const auto & addr : shard)
entry.hosts.emplace_back(addr.toString());
}
String node_path = ddl_worker.enqueueQuery(entry);
BlockIO io;
if (node_path.empty())
return io;
auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, context, entry.hosts.size());
io.in_sample = stream->sample.cloneEmpty();
io.in = std::move(stream);
return io;
}
}

View File

@ -0,0 +1,93 @@
#pragma once
#include <Interpreters/Context.h>
#include <Interpreters/Cluster.h>
#include <DataStreams/BlockIO.h>
#include <common/logger_useful.h>
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <mutex>
#include <thread>
namespace DB
{
class ASTAlterQuery;
struct DDLLogEntry;
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr, Context & context);
class DDLWorker
{
public:
DDLWorker(const std::string & zk_root_dir, Context & context_);
~DDLWorker();
/// Pushes query into DDL queue, returns path to created node
String enqueueQuery(DDLLogEntry & entry);
std::string getHostName() const
{
return host_id;
}
private:
void processTasks();
void processTask(const DDLLogEntry & node, const std::string & node_path);
void processTaskAlter(
const ASTAlterQuery * query_alter,
const String & rewritten_query,
const std::shared_ptr<Cluster> & cluster,
ssize_t shard_num,
const String & node_path);
/// Checks and cleanups queue's nodes
void cleanupQueue(const Strings * node_names_to_check = nullptr);
void createStatusDirs(const std::string & node_name);
ASTPtr getRewrittenQuery(const DDLLogEntry & node);
void run();
private:
Context & context;
Logger * log = &Logger::get("DDLWorker");
std::string host_id; /// host_name:port
std::string host_name;
UInt16 port;
std::string queue_dir; /// dir with queue of queries
std::string master_dir; /// dir with queries was initiated by the server
/// Used to omit already processed nodes. Maybe usage of set is more obvious.
std::string last_processed_node_name;
std::shared_ptr<zkutil::ZooKeeper> zookeeper;
/// Save state of executed task to avoid duplicate execution on ZK error
std::string current_node = {};
bool current_node_was_executed = false;
ExecutionStatus current_node_execution_status;
std::shared_ptr<Poco::Event> event_queue_updated;
std::atomic<bool> stop_flag{false};
std::thread thread;
size_t last_cleanup_time_seconds = 0;
/// Delete node if its age is greater than that
static const size_t node_max_lifetime_seconds;
/// Cleaning starts after new node event is received if the last cleaning wasn't made sooner than N seconds ago
static const size_t cleanup_min_period_seconds;
friend class DDLQueryStatusInputSream;
};
}

View File

@ -209,9 +209,6 @@ void InJoinSubqueriesPreprocessor::process(ASTSelectQuery * query) const
bool InJoinSubqueriesPreprocessor::hasAtLeastTwoShards(const IStorage & table) const
{
if (!table.isRemote())
return false;
const StorageDistributed * distributed = typeid_cast<const StorageDistributed *>(&table);
if (!distributed)
return false;

View File

@ -1,5 +1,6 @@
#include <Interpreters/InterpreterAlterQuery.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/DDLWorker.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ASTExpressionList.h>
@ -39,6 +40,10 @@ InterpreterAlterQuery::InterpreterAlterQuery(const ASTPtr & query_ptr_, const Co
BlockIO InterpreterAlterQuery::execute()
{
auto & alter = typeid_cast<ASTAlterQuery &>(*query_ptr);
if (!alter.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, context);
const String & table_name = alter.table;
String database_name = alter.database.empty() ? context.getCurrentDatabase() : alter.database;
StoragePtr table = context.getTable(database_name, table_name);

View File

@ -23,6 +23,7 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/StorageFactory.h>
#include <Storages/StorageLog.h>
@ -31,6 +32,7 @@
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/InterpreterCreateQuery.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/DDLWorker.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNested.h>
@ -39,6 +41,7 @@
#include <Databases/DatabaseFactory.h>
#include <Databases/IDatabase.h>
#include <zkutil/ZooKeeper.h>
namespace DB
{
@ -63,12 +66,15 @@ InterpreterCreateQuery::InterpreterCreateQuery(const ASTPtr & query_ptr_, Contex
}
void InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
{
if (!create.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, context);
String database_name = create.database;
if (create.if_not_exists && context.isDatabaseExist(database_name))
return;
return {};
String database_engine_name;
if (!create.storage)
@ -147,6 +153,8 @@ void InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
throw;
}
return {};
}
@ -460,6 +468,9 @@ String InterpreterCreateQuery::setEngine(
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
{
if (!create.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, context);
String path = context.getPath();
String current_database = context.getCurrentDatabase();
@ -572,8 +583,7 @@ BlockIO InterpreterCreateQuery::execute()
/// CREATE|ATTACH DATABASE
if (!create.database.empty() && create.table.empty())
{
createDatabase(create);
return {};
return createDatabase(create);
}
else
return createTable(create);

View File

@ -55,7 +55,7 @@ public:
static ColumnsInfo getColumnsInfo(const ASTPtr & columns, const Context & context);
private:
void createDatabase(ASTCreateQuery & create);
BlockIO createDatabase(ASTCreateQuery & create);
BlockIO createTable(ASTCreateQuery & create);
/// Calculate list of columns of table and return it.

View File

@ -6,6 +6,7 @@
#include <Interpreters/InterpreterDropQuery.h>
#include <Storages/IStorage.h>
#include <Databases/IDatabase.h>
#include <Interpreters/DDLWorker.h>
namespace DB
@ -27,11 +28,14 @@ InterpreterDropQuery::InterpreterDropQuery(const ASTPtr & query_ptr_, Context &
BlockIO InterpreterDropQuery::execute()
{
ASTDropQuery & drop = typeid_cast<ASTDropQuery &>(*query_ptr);
if (!drop.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, context);
String path = context.getPath();
String current_database = context.getCurrentDatabase();
ASTDropQuery & drop = typeid_cast<ASTDropQuery &>(*query_ptr);
bool drop_database = drop.table.empty() && !drop.database.empty();
if (drop_database && drop.detach)
@ -85,6 +89,13 @@ BlockIO InterpreterDropQuery::execute()
for (auto & table : tables_to_drop)
{
if (!drop.detach)
{
if (!table.first->checkTableCanBeDropped())
throw Exception("Table " + database_name + "." + table.first->getTableName() + " couldn't be dropped due to failed pre-drop check",
ErrorCodes::TABLE_WAS_NOT_DROPPED);
}
table.first->shutdown();
/// If table was already dropped by anyone, an exception will be thrown
@ -99,10 +110,6 @@ BlockIO InterpreterDropQuery::execute()
}
else
{
if (!table.first->checkTableCanBeDropped())
throw Exception("Table " + database_name + "." + current_table_name + " couldn't be dropped due to failed pre-drop check",
ErrorCodes::TABLE_WAS_NOT_DROPPED);
/// Delete table metdata and table itself from memory
database->removeTable(current_table_name);
/// Delete table data

View File

@ -18,7 +18,7 @@ class InterpreterDropQuery : public IInterpreter
public:
InterpreterDropQuery(const ASTPtr & query_ptr_, Context & context_);
/// Drop table.
/// Drop table or database.
BlockIO execute() override;
private:

View File

@ -3,7 +3,7 @@
#include <Interpreters/Context.h>
#include <Interpreters/InterpreterRenameQuery.h>
#include <Storages/IStorage.h>
#include <Interpreters/DDLWorker.h>
namespace DB
@ -35,11 +35,14 @@ struct RenameDescription
BlockIO InterpreterRenameQuery::execute()
{
ASTRenameQuery & rename = typeid_cast<ASTRenameQuery &>(*query_ptr);
if (!rename.cluster.empty())
return executeDDLQueryOnCluster(query_ptr, context);
String path = context.getPath();
String current_database = context.getCurrentDatabase();
ASTRenameQuery & rename = typeid_cast<ASTRenameQuery &>(*query_ptr);
/** In case of error while renaming, it is possible that only part of tables was renamed
* or we will be in inconsistent state. (It is worth to be fixed.)
*/

View File

@ -838,24 +838,13 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns()
if (max_streams > 1 && !is_remote)
max_streams *= settings.max_streams_to_max_threads_ratio;
ASTPtr actual_query_ptr;
if (storage->isRemote())
{
/// In case of a remote query, we send only SELECT, which will be executed.
actual_query_ptr = query.cloneFirstSelect();
}
else
actual_query_ptr = query_ptr;
/// PREWHERE optimization
{
auto optimize_prewhere = [&](auto & merge_tree)
{
const ASTSelectQuery & actual_select = typeid_cast<const ASTSelectQuery &>(*actual_query_ptr);
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere && actual_select.where_expression && !actual_select.prewhere_expression && !actual_select.final())
MergeTreeWhereOptimizer{actual_query_ptr, context, merge_tree.getData(), required_columns, log};
if (settings.optimize_move_to_prewhere && query.where_expression && !query.prewhere_expression && !query.final())
MergeTreeWhereOptimizer{query_ptr, context, merge_tree.getData(), required_columns, log};
};
if (const StorageMergeTree * merge_tree = typeid_cast<const StorageMergeTree *>(storage.get()))
@ -864,8 +853,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns()
optimize_prewhere(*merge_tree);
}
streams = storage->read(required_columns, actual_query_ptr,
context, from_stage, max_block_size, max_streams);
streams = storage->read(required_columns, query_ptr, context, from_stage, max_block_size, max_streams);
if (alias_actions)
{
@ -1316,11 +1304,6 @@ void InterpreterSelectQuery::executeLimit()
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(SubqueriesForSets & subqueries_for_sets)
{
/// If the query is not distributed, then remove the creation of temporary tables from subqueries (intended for sending to remote servers).
if (!(storage && storage->isRemote()))
for (auto & elem : subqueries_for_sets)
elem.second.table.reset();
const Settings & settings = context.getSettingsRef();
executeUnion();

View File

@ -9,7 +9,7 @@
#include <DataStreams/OneBlockInputStream.h>
#include <DataTypes/DataTypeArray.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/DataTypeTraits.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>

View File

@ -275,7 +275,9 @@ struct Settings
/** Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. \
* If this setting is enabled, the query will be performed anyway, otherwise the error will be reported. \
*/ \
M(SettingBool, fallback_to_stale_replicas_for_distributed_queries, 1)
M(SettingBool, fallback_to_stale_replicas_for_distributed_queries, 1) \
/** For development and testing purposes only still */ \
M(SettingBool, distributed_ddl_allow_replicated_alter, 0)
/// Possible limits for query execution.

View File

@ -556,14 +556,6 @@ struct SettingCompressionMethod
static CompressionMethod getCompressionMethod(const String & s)
{
if (s == "quicklz")
{
#ifdef USE_QUICKLZ
return CompressionMethod::QuickLZ;
#else
throw Exception("QuickLZ compression method is disabled", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
#endif
}
if (s == "lz4")
return CompressionMethod::LZ4;
if (s == "lz4hc")
@ -571,14 +563,14 @@ struct SettingCompressionMethod
if (s == "zstd")
return CompressionMethod::ZSTD;
throw Exception("Unknown compression method: '" + s + "', must be one of 'quicklz', 'lz4', 'lz4hc', 'zstd'", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
throw Exception("Unknown compression method: '" + s + "', must be one of 'lz4', 'lz4hc', 'zstd'", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
}
String toString() const
{
const char * strings[] = { "quicklz", "lz4", "lz4hc", "zstd" };
const char * strings[] = { nullptr, "lz4", "lz4hc", "zstd" };
if (value < CompressionMethod::QuickLZ || value > CompressionMethod::ZSTD)
if (value < CompressionMethod::LZ4 || value > CompressionMethod::ZSTD)
throw Exception("Unknown compression method", ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
return strings[static_cast<size_t>(value)];

View File

@ -11,7 +11,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeNullable.h>
#include <Functions/DataTypeTraits.h>
#include <DataTypes/DataTypeTraits.h>
#include <Core/AccurateComparison.h>
#include <Core/FieldVisitors.h>

View File

@ -15,13 +15,13 @@ ASTAlterQuery::Parameters::Parameters() : type(NO_TYPE) {}
void ASTAlterQuery::Parameters::clone(Parameters & p) const
{
p = *this;
if (col_decl) p.col_decl = col_decl->clone();
if (column) p.column = column->clone();
if (partition) p.partition = partition->clone();
if (last_partition) p.last_partition = last_partition->clone();
if (col_decl) p.col_decl = col_decl->clone();
if (column) p.column = column->clone();
if (partition) p.partition = partition->clone();
if (last_partition) p.last_partition = last_partition->clone();
if (weighted_zookeeper_paths) p.weighted_zookeeper_paths = weighted_zookeeper_paths->clone();
if (sharding_key_expr) p.sharding_key_expr = sharding_key_expr->clone();
if (coordinator) p.coordinator = coordinator->clone();
if (sharding_key_expr) p.sharding_key_expr = sharding_key_expr->clone();
if (coordinator) p.coordinator = coordinator->clone();
}
void ASTAlterQuery::addParameters(const Parameters & params)
@ -63,6 +63,18 @@ ASTPtr ASTAlterQuery::clone() const
return res;
}
ASTPtr ASTAlterQuery::getRewrittenASTWithoutOnCluster(const std::string & new_database) const
{
auto query_ptr = clone();
ASTAlterQuery & query = static_cast<ASTAlterQuery &>(*query_ptr);
query.cluster.clear();
if (query.database.empty())
query.database = new_database;
return query_ptr;
}
void ASTAlterQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
{
frame.need_parens = false;
@ -80,6 +92,7 @@ void ASTAlterQuery::formatImpl(const FormatSettings & settings, FormatState & st
}
settings.ostr << indent_str << backQuoteIfNeed(table);
}
formatOnCluster(settings);
settings.ostr << settings.nl_or_ws;
for (size_t i = 0; i < parameters.size(); ++i)

View File

@ -1,24 +1,25 @@
#pragma once
#include <Parsers/IAST.h>
#include <Parsers/ASTQueryWithOnCluster.h>
namespace DB
{
/** ALTER query
/** ALTER query:
* ALTER TABLE [db.]name_type
* ADD COLUMN col_name type [AFTER col_after],
* DROP COLUMN col_drop [FROM PARTITION partition],
* MODIFY COLUMN col_name type,
* DROP PARTITION partition
* RESHARD [COPY] PARTITION partition
* TO '/path/to/zookeeper/table' [WEIGHT w], ...
* USING expression
* [COORDINATE WITH 'coordinator_id']
* DROP COLUMN col_drop [FROM PARTITION partition],
* MODIFY COLUMN col_name type,
* DROP PARTITION partition,
* RESHARD [COPY] PARTITION partition
* TO '/path/to/zookeeper/table' [WEIGHT w], ...
* USING expression
* [COORDINATE WITH 'coordinator_id']
*/
class ASTAlterQuery : public IAST
class ASTAlterQuery : public IAST, public ASTQueryWithOnCluster
{
public:
enum ParameterType
@ -101,6 +102,8 @@ public:
ASTPtr clone() const override;
ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const override;
protected:
void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
};

Some files were not shown because too many files have changed in this diff Show More