This commit is contained in:
Roman Peshkurov 2015-05-21 16:35:54 +03:00
commit 360fc4cf71
95 changed files with 3673 additions and 569 deletions

View File

@ -10,6 +10,6 @@ else()
endif() endif()
macro (add_check target) macro (add_check target)
add_test(test_${target} ${target}) add_test(NAME test_${target} COMMAND ${target} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_dependencies(check ${target}) add_dependencies(check ${target})
endmacro (add_check) endmacro (add_check)

View File

@ -0,0 +1,489 @@
#pragma once
#include <DB/AggregateFunctions/IAggregateFunction.h>
#include <DB/DataTypes/DataTypeDateTime.h>
#include <DB/Parsers/CommonParsers.h>
#include <statdaemons/ext/range.hpp>
#include <boost/range/iterator_range_core.hpp>
#include <DB/Parsers/ExpressionElementParsers.h>
#include <DB/Parsers/ASTLiteral.h>
#include <bitset>
#include <stack>
namespace DB
{
/// helper type for comparing `std::pair`s using solely the .first member
template <template <typename> class Comparator>
struct ComparePairFirst final
{
template <typename T1, typename T2>
bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
{
return Comparator<T1>{}(lhs.first, rhs.first);
}
};
struct AggregateFunctionSequenceMatchData final
{
static constexpr auto max_events = 32;
using Timestamp = std::uint32_t;
using Events = std::bitset<max_events>;
using TimestampEvents = std::pair<Timestamp, Events>;
using Comparator = ComparePairFirst<std::less>;
bool sorted = true;
std::vector<TimestampEvents> eventsList;
void add(const Timestamp timestamp, const Events & events)
{
/// store information exclusively for rows with at least one event
if (events.any())
{
eventsList.emplace_back(timestamp, events);
sorted = false;
}
}
void merge(const AggregateFunctionSequenceMatchData & other)
{
const auto size = eventsList.size();
eventsList.insert(std::end(eventsList), std::begin(other.eventsList), std::end(other.eventsList));
/// either sort whole container or do so partially merging ranges afterwards
if (!sorted && !other.sorted)
std::sort(std::begin(eventsList), std::end(eventsList), Comparator{});
else
{
const auto begin = std::begin(eventsList);
const auto middle = std::next(begin, size);
const auto end = std::end(eventsList);
if (!sorted)
std::sort(begin, middle, Comparator{});
if (!other.sorted)
std::sort(middle, end, Comparator{});
std::inplace_merge(begin, middle, end, Comparator{});
}
sorted = true;
}
void sort()
{
if (!sorted)
{
std::sort(std::begin(eventsList), std::end(eventsList), Comparator{});
sorted = true;
}
}
void serialize(WriteBuffer & buf) const
{
writeBinary(sorted, buf);
writeBinary(eventsList.size(), buf);
for (const auto & events : eventsList)
{
writeBinary(events.first, buf);
writeBinary(events.second.to_ulong(), buf);
}
}
void deserialize(ReadBuffer & buf)
{
readBinary(sorted, buf);
std::size_t size;
readBinary(size, buf);
decltype(eventsList) eventsList;
eventsList.reserve(size);
for (std::size_t i = 0; i < size; ++i)
{
std::uint32_t timestamp;
readBinary(timestamp, buf);
unsigned long events;
readBinary(events, buf);
eventsList.emplace_back(timestamp, Events{events});
}
this->eventsList = std::move(eventsList);
}
};
class AggregateFunctionSequenceMatch final : public IAggregateFunctionHelper<AggregateFunctionSequenceMatchData>
{
public:
static bool sufficientArgs(const std::size_t arg_count) { return arg_count >= 3; }
String getName() const override { return "sequenceMatch"; }
DataTypePtr getReturnType() const override { return new DataTypeUInt8; }
void setParameters(const Array & params) override
{
if (params.size() != 1)
throw Exception{
"Aggregate function " + getName() + " requires exactly one parameter.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH
};
pattern = params.front().safeGet<std::string>();
}
void setArguments(const DataTypes & arguments) override
{
arg_count = arguments.size();
if (!sufficientArgs(arg_count))
throw Exception{
"Aggregate function " + getName() + " requires at least 3 arguments.",
ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION
};
if (arg_count - 1 > Data::max_events)
throw Exception{
"Aggregate function " + getName() + " supports up to " +
std::to_string(Data::max_events) + " event arguments.",
ErrorCodes::TOO_MUCH_ARGUMENTS_FOR_FUNCTION
};
const auto time_arg = arguments.front().get();
if (!typeid_cast<const DataTypeDateTime *>(time_arg))
throw Exception{
"Illegal type " + time_arg->getName() + " of first argument of aggregate function " +
getName() + ", must be DateTime",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
};
for (const auto i : ext::range(1, arg_count))
{
const auto cond_arg = arguments[i].get();
if (!typeid_cast<const DataTypeUInt8 *>(cond_arg))
throw Exception{
"Illegal type " + cond_arg->getName() + " of argument " + toString(i + 1) +
" of aggregate function " + getName() + ", must be UInt8",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
};
}
parsePattern();
}
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num) const override
{
const auto timestamp = static_cast<const ColumnUInt32 *>(columns[0])->getData()[row_num];
Data::Events events;
for (const auto i : ext::range(1, arg_count))
{
const auto event = static_cast<const ColumnUInt8 *>(columns[i])->getData()[row_num];
events.set(i - 1, event);
}
data(place).add(timestamp, events);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override
{
data(place).merge(data(rhs));
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
data(place).serialize(buf);
}
void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const override
{
AggregateFunctionSequenceMatchData tmp;
tmp.deserialize(buf);
data(place).merge(tmp);
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
const_cast<Data &>(data(place)).sort();
static_cast<ColumnUInt8 &>(to).getData().push_back(match(place));
}
private:
enum class PatternActionType
{
SpecificEvent,
AnyEvent,
KleeneStar,
TimeLessOrEqual,
TimeLess,
TimeGreaterOrEqual,
TimeGreater
};
static std::string to_string(const PatternActionType type)
{
static const std::map<PatternActionType, std::string> map{
{ PatternActionType::SpecificEvent, "SpecificEvent" },
{ PatternActionType::AnyEvent, "AnyEvent" },
{ PatternActionType::KleeneStar, "KleeneStar" },
{ PatternActionType::TimeLessOrEqual, "TimeLessOrEqual" },
{ PatternActionType::TimeLess, "TimeLess", },
{ PatternActionType::TimeGreaterOrEqual, "TimeGreaterOrEqual" },
{ PatternActionType::TimeGreater, "TimeGreater" }
};
return map.find(type)->second;
}
struct PatternAction final
{
PatternActionType type;
std::uint32_t extra;
PatternAction() = default;
PatternAction(const PatternActionType type, const std::uint32_t extra = 0) : type{type}, extra{extra} {}
};
using PatternActions = std::vector<PatternAction>;
void parsePattern()
{
PatternActions actions{
{ PatternActionType::KleeneStar }
};
ParserString special_open_p("(?");
ParserString special_close_p(")");
ParserString t_p("t");
ParserString less_or_equal_p("<=");
ParserString less_p("<");
ParserString greater_or_equal_p(">=");
ParserString greater_p(">");
ParserString dot_closure_p(".*");
ParserString dot_p(".");
ParserNumber number_p;
auto pos = pattern.data();
const auto begin = pos;
const auto end = pos + pattern.size();
ASTPtr node;
decltype(pos) max_parsed_pos{};
Expected expected;
const auto throw_exception = [&] (const std::string & msg) {
throw Exception{
msg + " '" + std::string(pos, end) + "' at position " + std::to_string(pos - begin),
ErrorCodes::SYNTAX_ERROR
};
};
while (pos < end)
{
if (special_open_p.ignore(pos, end))
{
if (t_p.ignore(pos, end))
{
PatternActionType type;
if (less_or_equal_p.ignore(pos, end))
type = PatternActionType::TimeLessOrEqual;
else if (less_p.ignore(pos, end))
type = PatternActionType::TimeLess;
else if (greater_or_equal_p.ignore(pos, end))
type = PatternActionType::TimeGreaterOrEqual;
else if (greater_p.ignore(pos, end))
type = PatternActionType::TimeGreater;
else
throw_exception("Unknown time condition");
if (!number_p.parse(pos, end, node, max_parsed_pos, expected))
throw_exception("Could not parse number");
if (actions.back().type != PatternActionType::SpecificEvent &&
actions.back().type != PatternActionType::AnyEvent &&
actions.back().type != PatternActionType::KleeneStar)
throw Exception{
"Temporal condition should be preceeded by an event condition",
ErrorCodes::BAD_ARGUMENTS
};
actions.emplace_back(type, typeid_cast<const ASTLiteral &>(*node).value.safeGet<UInt64>());
}
else if (number_p.parse(pos, end, node, max_parsed_pos, expected))
{
const auto event_number = typeid_cast<const ASTLiteral &>(*node).value.safeGet<UInt64>();
if (event_number > arg_count - 1)
throw Exception{
"Event number " + std::to_string(event_number) + " is out of range",
ErrorCodes::BAD_ARGUMENTS
};
actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1);
}
else
throw_exception("Unexpected special sequence");
if (!special_close_p.ignore(pos, end))
throw_exception("Expected closing parenthesis, found");
}
else if (dot_closure_p.ignore(pos, end))
actions.emplace_back(PatternActionType::KleeneStar);
else if (dot_p.ignore(pos, end))
actions.emplace_back(PatternActionType::AnyEvent);
else
throw_exception("Could not parse pattern, unexpected starting symbol");
}
this->actions = std::move(actions);
}
bool match(const ConstAggregateDataPtr & place) const
{
const auto action_begin = std::begin(actions);
const auto action_end = std::end(actions);
auto action_it = action_begin;
const auto & data_ref = data(place);
const auto events_begin = std::begin(data_ref.eventsList);
const auto events_end = std::end(data_ref.eventsList);
auto events_it = events_begin;
auto base_it = events_begin;
/// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence
using backtrack_info = std::tuple<decltype(action_it), decltype(events_it), decltype(base_it)>;
std::stack<backtrack_info> back_stack;
/// backtrack if possible
const auto do_backtrack = [&] {
while (!back_stack.empty())
{
auto & top = back_stack.top();
action_it = std::get<0>(top);
events_it = std::next(std::get<1>(top));
base_it = std::get<2>(top);
back_stack.pop();
if (events_it != events_end)
return true;
}
return false;
};
while (action_it != action_end && events_it != events_end)
{
// std::cout << "start_timestamp " << base_it->first << "; ";
// std::cout << "elapsed " << (events_it->first - base_it->first) << "; ";
// std::cout << "action " << (action_it - action_begin) << " { " << to_string(action_it->type) << ' ' << action_it->extra << " }; ";
// std::cout << "symbol " << (events_it - events_begin) << " { " << events_it->first << ' ' << events_it->second.to_ulong() << " }" << std::endl;
if (action_it->type == PatternActionType::SpecificEvent)
{
if (events_it->second.test(action_it->extra))
{
/// move to the next action and events
base_it = events_it;
++action_it, ++events_it;
}
else if (!do_backtrack())
/// backtracking failed, bail out
break;
}
else if (action_it->type == PatternActionType::AnyEvent)
{
base_it = events_it;
++action_it, ++events_it;
}
else if (action_it->type == PatternActionType::KleeneStar)
{
back_stack.emplace(action_it, events_it, base_it);
base_it = events_it;
++action_it;
}
else if (action_it->type == PatternActionType::TimeLessOrEqual)
{
if (events_it->first - base_it->first <= action_it->extra)
{
/// condition satisfied, move onto next action
back_stack.emplace(action_it, events_it, base_it);
base_it = events_it;
++action_it;
}
else if (!do_backtrack())
break;
}
else if (action_it->type == PatternActionType::TimeLess)
{
if (events_it->first - base_it->first < action_it->extra)
{
back_stack.emplace(action_it, events_it, base_it);
base_it = events_it;
++action_it;
}
else if (!do_backtrack())
break;
}
else if (action_it->type == PatternActionType::TimeGreaterOrEqual)
{
if (events_it->first - base_it->first >= action_it->extra)
{
back_stack.emplace(action_it, events_it, base_it);
base_it = events_it;
++action_it;
}
else if (++events_it == events_end && !do_backtrack())
break;
}
else if (action_it->type == PatternActionType::TimeGreater)
{
if (events_it->first - base_it->first > action_it->extra)
{
back_stack.emplace(action_it, events_it, base_it);
base_it = events_it;
++action_it;
}
else if (++events_it == events_end && !do_backtrack())
break;
}
else
throw Exception{
"Unknown PatternActionType",
ErrorCodes::LOGICAL_ERROR
};
}
/// if there are some actions remaining
if (action_it != action_end)
{
/// match multiple empty strings at end
while (action_it->type == PatternActionType::KleeneStar ||
action_it->type == PatternActionType::TimeLessOrEqual ||
action_it->type == PatternActionType::TimeLess ||
(action_it->type == PatternActionType::TimeGreaterOrEqual && action_it->extra == 0))
++action_it;
}
return action_it == action_end;
}
std::string pattern;
std::size_t arg_count;
PatternActions actions;
};
}

View File

@ -0,0 +1,504 @@
#pragma once
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/ReadHelpers.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/AggregateFunctions/IUnaryAggregateFunction.h>
#include <DB/AggregateFunctions/IBinaryAggregateFunction.h>
#include <DB/Columns/ColumnVector.h>
#include <cmath>
namespace DB
{
namespace
{
/// Эта функция возвращает true если оба значения велики и сравнимы.
/// Она употребляется для вычисления среднего значения путём слияния двух источников.
/// Ибо если размеры обоих источников велики и сравнимы, то надо применить особенную
/// формулу гарантирующую больше стабильности.
bool areComparable(UInt64 a, UInt64 b)
{
const Float64 sensitivity = 0.001;
const UInt64 threshold = 10000;
if ((a == 0) || (b == 0))
return false;
auto res = std::minmax(a, b);
return (((1 - static_cast<Float64>(res.first) / res.second) < sensitivity) && (res.first > threshold));
}
}
/** Статистические аггрегатные функции:
* varSamp - выборочная дисперсия
* stddevSamp - среднее выборочное квадратичное отклонение
* varPop - дисперсия
* stddevPop - среднее квадратичное отклонение
* covarSamp - выборочная ковариация
* covarPop - ковариация
* corr - корреляция
*/
/** Параллельный и инкрементальный алгоритм для вычисления дисперсии.
* Источник: "Updating formulae and a pairwise algorithm for computing sample variances"
* (Chan et al., Stanford University, 12.1979)
*/
template<typename T, typename Op>
class AggregateFunctionVarianceData
{
public:
AggregateFunctionVarianceData() = default;
void update(const IColumn & column, size_t row_num)
{
T received = static_cast<const ColumnVector<T> &>(column).getData()[row_num];
Float64 val = static_cast<Float64>(received);
Float64 delta = val - mean;
++count;
mean += delta / count;
m2 += delta * (val - mean);
}
void mergeWith(const AggregateFunctionVarianceData & source)
{
UInt64 total_count = count + source.count;
if (total_count == 0)
return;
Float64 factor = static_cast<Float64>(count * source.count) / total_count;
Float64 delta = mean - source.mean;
if (areComparable(count, source.count))
mean = (source.count * source.mean + count * mean) / total_count;
else
mean = source.mean + delta * (static_cast<Float64>(count) / total_count);
m2 += source.m2 + delta * delta * factor;
count = total_count;
}
void serialize(WriteBuffer & buf) const
{
writeVarUInt(count, buf);
writeBinary(mean, buf);
writeBinary(m2, buf);
}
void deserialize(ReadBuffer & buf)
{
readVarUInt(count, buf);
readBinary(mean, buf);
readBinary(m2, buf);
}
void publish(IColumn & to) const
{
static_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(m2, count));
}
private:
UInt64 count = 0;
Float64 mean = 0.0;
Float64 m2 = 0.0;
};
/** Основной код для реализации функций varSamp, stddevSamp, varPop, stddevPop.
*/
template<typename T, typename Op>
class AggregateFunctionVariance final
: public IUnaryAggregateFunction<AggregateFunctionVarianceData<T, Op>,
AggregateFunctionVariance<T, Op> >
{
public:
String getName() const override { return Op::name; }
DataTypePtr getReturnType() const override
{
return new DataTypeFloat64;
}
void setArgument(const DataTypePtr & argument) override
{
if (!argument->behavesAsNumber())
throw Exception("Illegal type " + argument->getName() + " of argument for aggregate function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void addOne(AggregateDataPtr place, const IColumn & column, size_t row_num) const
{
this->data(place).update(column, row_num);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override
{
this->data(place).mergeWith(this->data(rhs));
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).serialize(buf);
}
void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const override
{
AggregateFunctionVarianceData<T, Op> source;
source.deserialize(buf);
this->data(place).mergeWith(source);
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
this->data(place).publish(to);
}
};
namespace
{
/** Реализации функции varSamp.
*/
struct VarSampImpl
{
static constexpr auto name = "varSamp";
static inline Float64 apply(Float64 m2, UInt64 count)
{
if (count < 2)
return std::numeric_limits<Float64>::infinity();
else
return m2 / (count - 1);
}
};
/** Реализация функции stddevSamp.
*/
struct StdDevSampImpl
{
static constexpr auto name = "stddevSamp";
static inline Float64 apply(Float64 m2, UInt64 count)
{
return sqrt(VarSampImpl::apply(m2, count));
}
};
/** Реализация функции varPop.
*/
struct VarPopImpl
{
static constexpr auto name = "varPop";
static inline Float64 apply(Float64 m2, UInt64 count)
{
if (count == 0)
return std::numeric_limits<Float64>::infinity();
else if (count == 1)
return 0.0;
else
return m2 / count;
}
};
/** Реализация функции stddevPop.
*/
struct StdDevPopImpl
{
static constexpr auto name = "stddevPop";
static inline Float64 apply(Float64 m2, UInt64 count)
{
return sqrt(VarPopImpl::apply(m2, count));
}
};
}
/** Если флаг compute_marginal_moments установлен, этот класс предоставялет наследнику
* CovarianceData поддержку маргинальных моментов для вычисления корреляции.
*/
template<bool compute_marginal_moments>
class BaseCovarianceData
{
protected:
void incrementMarginalMoments(Float64 left_incr, Float64 right_incr) {}
void mergeWith(const BaseCovarianceData & source) {}
void serialize(WriteBuffer & buf) const {}
void deserialize(const ReadBuffer & buf) {}
};
template<>
class BaseCovarianceData<true>
{
protected:
void incrementMarginalMoments(Float64 left_incr, Float64 right_incr)
{
left_m2 += left_incr;
right_m2 += right_incr;
}
void mergeWith(const BaseCovarianceData & source)
{
left_m2 += source.left_m2;
right_m2 += source.right_m2;
}
void serialize(WriteBuffer & buf) const
{
writeBinary(left_m2, buf);
writeBinary(right_m2, buf);
}
void deserialize(ReadBuffer & buf)
{
readBinary(left_m2, buf);
readBinary(right_m2, buf);
}
protected:
Float64 left_m2 = 0.0;
Float64 right_m2 = 0.0;
};
/** Параллельный и инкрементальный алгоритм для вычисления ковариации.
* Источник: "Numerically Stable, Single-Pass, Parallel Statistics Algorithms"
* (J. Bennett et al., Sandia National Laboratories,
* 2009 IEEE International Conference on Cluster Computing)
*/
template<typename T, typename U, typename Op, bool compute_marginal_moments>
class CovarianceData : public BaseCovarianceData<compute_marginal_moments>
{
private:
using Base = BaseCovarianceData<compute_marginal_moments>;
public:
void update(const IColumn & column_left, const IColumn & column_right, size_t row_num)
{
T left_received = static_cast<const ColumnVector<T> &>(column_left).getData()[row_num];
Float64 left_val = static_cast<Float64>(left_received);
Float64 left_delta = left_val - left_mean;
U right_received = static_cast<const ColumnVector<U> &>(column_right).getData()[row_num];
Float64 right_val = static_cast<Float64>(right_received);
Float64 right_delta = right_val - right_mean;
Float64 old_right_mean = right_mean;
++count;
left_mean += left_delta / count;
right_mean += right_delta / count;
co_moment += (left_val - left_mean) * (right_val - old_right_mean);
/// Обновить маргинальные моменты, если они есть.
if (compute_marginal_moments)
{
Float64 left_incr = left_delta * (left_val - left_mean);
Float64 right_incr = right_delta * (right_val - right_mean);
Base::incrementMarginalMoments(left_incr, right_incr);
}
}
void mergeWith(const CovarianceData & source)
{
UInt64 total_count = count + source.count;
if (total_count == 0)
return;
Float64 factor = static_cast<Float64>(count * source.count) / total_count;
Float64 left_delta = left_mean - source.left_mean;
Float64 right_delta = right_mean - source.right_mean;
if (areComparable(count, source.count))
{
left_mean = (source.count * source.left_mean + count * left_mean) / total_count;
right_mean = (source.count * source.right_mean + count * right_mean) / total_count;
}
else
{
left_mean = source.left_mean + left_delta * (static_cast<Float64>(count) / total_count);
right_mean = source.right_mean + right_delta * (static_cast<Float64>(count) / total_count);
}
co_moment += source.co_moment + left_delta * right_delta * factor;
count = total_count;
/// Обновить маргинальные моменты, если они есть.
if (compute_marginal_moments)
{
Float64 left_incr = left_delta * left_delta * factor;
Float64 right_incr = right_delta * right_delta * factor;
Base::mergeWith(source);
Base::incrementMarginalMoments(left_incr, right_incr);
}
}
void serialize(WriteBuffer & buf) const
{
writeVarUInt(count, buf);
writeBinary(left_mean, buf);
writeBinary(right_mean, buf);
writeBinary(co_moment, buf);
Base::serialize(buf);
}
void deserialize(ReadBuffer & buf)
{
readVarUInt(count, buf);
readBinary(left_mean, buf);
readBinary(right_mean, buf);
readBinary(co_moment, buf);
Base::deserialize(buf);
}
template<bool compute = compute_marginal_moments>
void publish(IColumn & to, typename std::enable_if<compute>::type * = nullptr) const
{
static_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, Base::left_m2, Base::right_m2, count));
}
template<bool compute = compute_marginal_moments>
void publish(IColumn & to, typename std::enable_if<!compute>::type * = nullptr) const
{
static_cast<ColumnFloat64 &>(to).getData().push_back(Op::apply(co_moment, count));
}
private:
UInt64 count = 0;
Float64 left_mean = 0.0;
Float64 right_mean = 0.0;
Float64 co_moment = 0.0;
};
template<typename T, typename U, typename Op, bool compute_marginal_moments = false>
class AggregateFunctionCovariance final
: public IBinaryAggregateFunction<
CovarianceData<T, U, Op, compute_marginal_moments>,
AggregateFunctionCovariance<T, U, Op, compute_marginal_moments> >
{
public:
String getName() const override { return Op::name; }
DataTypePtr getReturnType() const override
{
return new DataTypeFloat64;
}
void setArgumentsImpl(const DataTypes & arguments)
{
if (!arguments[0]->behavesAsNumber())
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument to function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!arguments[1]->behavesAsNumber())
throw Exception("Illegal type " + arguments[1]->getName() + " of second argument to function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void addOne(AggregateDataPtr place, const IColumn & column_left, const IColumn & column_right, size_t row_num) const
{
this->data(place).update(column_left, column_right, row_num);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const override
{
this->data(place).mergeWith(this->data(rhs));
}
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
this->data(place).serialize(buf);
}
void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const override
{
CovarianceData<T, U, Op, compute_marginal_moments> source;
source.deserialize(buf);
this->data(place).mergeWith(source);
}
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
{
this->data(place).publish(to);
}
};
namespace
{
/** Реализация функции covarSamp.
*/
struct CovarSampImpl
{
static constexpr auto name = "covarSamp";
static inline Float64 apply(Float64 co_moment, UInt64 count)
{
if (count < 2)
return std::numeric_limits<Float64>::infinity();
else
return co_moment / (count - 1);
}
};
/** Реализация функции covarPop.
*/
struct CovarPopImpl
{
static constexpr auto name = "covarPop";
static inline Float64 apply(Float64 co_moment, UInt64 count)
{
if (count == 0)
return std::numeric_limits<Float64>::infinity();
else if (count == 1)
return 0.0;
else
return co_moment / count;
}
};
/** Реализация функции corr.
*/
struct CorrImpl
{
static constexpr auto name = "corr";
static inline Float64 apply(Float64 co_moment, Float64 left_m2, Float64 right_m2, UInt64 count)
{
if (count < 2)
return std::numeric_limits<Float64>::infinity();
else
return co_moment / sqrt(left_m2 * right_m2);
}
};
}
template<typename T>
using AggregateFunctionVarSamp = AggregateFunctionVariance<T, VarSampImpl>;
template<typename T>
using AggregateFunctionStdDevSamp = AggregateFunctionVariance<T, StdDevSampImpl>;
template<typename T>
using AggregateFunctionVarPop = AggregateFunctionVariance<T, VarPopImpl>;
template<typename T>
using AggregateFunctionStdDevPop = AggregateFunctionVariance<T, StdDevPopImpl>;
template<typename T, typename U>
using AggregateFunctionCovarSamp = AggregateFunctionCovariance<T, U, CovarSampImpl>;
template<typename T, typename U>
using AggregateFunctionCovarPop = AggregateFunctionCovariance<T, U, CovarPopImpl>;
template<typename T, typename U>
using AggregateFunctionCorr = AggregateFunctionCovariance<T, U, CorrImpl, true>;
}

View File

@ -41,14 +41,14 @@ public:
bool isNumeric() const override { return IsNumber<T>::value; } bool isNumeric() const override { return IsNumber<T>::value; }
bool isFixed() const override { return IsNumber<T>::value; } bool isFixed() const override { return IsNumber<T>::value; }
size_t sizeOfField() const override { return sizeof(T); } size_t sizeOfField() const override { return sizeof(T); }
ColumnPtr cloneResized(size_t s_) const override { return new ColumnConst(s_, data); } ColumnPtr cloneResized(size_t s_) const override { return new ColumnConst(s_, data, data_type); }
size_t size() const override { return s; } size_t size() const override { return s; }
Field operator[](size_t n) const override { return FieldType(data); } Field operator[](size_t n) const override { return FieldType(data); }
void get(size_t n, Field & res) const override { res = FieldType(data); } void get(size_t n, Field & res) const override { res = FieldType(data); }
ColumnPtr cut(size_t start, size_t length) const override ColumnPtr cut(size_t start, size_t length) const override
{ {
return new ColumnConst<T>(length, data, data_type); return new ColumnConst(length, data, data_type);
} }
void insert(const Field & x) override void insert(const Field & x) override
@ -79,7 +79,7 @@ public:
if (s != filt.size()) if (s != filt.size())
throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
return new ColumnConst<T>(countBytesInFilter(filt), data, data_type); return new ColumnConst(countBytesInFilter(filt), data, data_type);
} }
ColumnPtr replicate(const Offsets_t & offsets) const override ColumnPtr replicate(const Offsets_t & offsets) const override
@ -88,7 +88,7 @@ public:
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
size_t replicated_size = 0 == s ? 0 : offsets.back(); size_t replicated_size = 0 == s ? 0 : offsets.back();
return new ColumnConst<T>(replicated_size, data, data_type); return new ColumnConst(replicated_size, data, data_type);
} }
size_t byteSize() const override { return sizeof(data) + sizeof(s); } size_t byteSize() const override { return sizeof(data) + sizeof(s); }
@ -103,7 +103,7 @@ public:
if (perm.size() < limit) if (perm.size() < limit)
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
return new ColumnConst<T>(limit, data, data_type); return new ColumnConst(limit, data, data_type);
} }
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override

View File

@ -3,6 +3,7 @@
#include <unordered_map> #include <unordered_map>
#include <list> #include <list>
#include <memory> #include <memory>
#include <chrono>
#include <Poco/ScopedLock.h> #include <Poco/ScopedLock.h>
#include <Poco/Mutex.h> #include <Poco/Mutex.h>
#include <DB/Core/ErrorCodes.h> #include <DB/Core/ErrorCodes.h>
@ -20,22 +21,27 @@ struct TrivialWeightFunction
} }
}; };
/** Кеш, вытесняющий долго не использовавшиеся и устаревшие записи. thread-safe.
/** Кеш, вытесняющий долго не использовавшиеся записи. thread-safe.
* WeightFunction - тип, оператор () которого принимает Mapped и возвращает "вес" (примерный размер) этого значения. * WeightFunction - тип, оператор () которого принимает Mapped и возвращает "вес" (примерный размер) этого значения.
* Кеш начинает выбрасывать значения, когда их суммарный вес превышает max_size. * Кеш начинает выбрасывать значения, когда их суммарный вес превышает max_size и срок годности этих значений истёк.
* После вставки значения его вес не должен меняться. * После вставки значения его вес не должен меняться.
*/ */
template <typename TKey, typename TMapped, typename HashFunction = std::hash<TMapped>, typename WeightFunction = TrivialWeightFunction<TMapped> > template <typename TKey, typename TMapped, typename HashFunction = std::hash<TMapped>, typename WeightFunction = TrivialWeightFunction<TMapped> >
class LRUCache class LRUCache
{ {
public: public:
typedef TKey Key; using Key = TKey;
typedef TMapped Mapped; using Mapped = TMapped;
typedef std::shared_ptr<Mapped> MappedPtr; using MappedPtr = std::shared_ptr<Mapped>;
using Delay = std::chrono::seconds;
LRUCache(size_t max_size_) private:
: max_size(std::max(1ul, max_size_)) {} using Clock = std::chrono::steady_clock;
using Timestamp = Clock::time_point;
public:
LRUCache(size_t max_size_, const Delay & expiration_delay_ = Delay::zero())
: max_size(std::max(1ul, max_size_)), expiration_delay(expiration_delay_) {}
MappedPtr get(const Key & key) MappedPtr get(const Key & key)
{ {
@ -50,6 +56,7 @@ public:
++hits; ++hits;
Cell & cell = it->second; Cell & cell = it->second;
updateCellTimestamp(cell);
/// Переместим ключ в конец очереди. Итератор остается валидным. /// Переместим ключ в конец очереди. Итератор остается валидным.
queue.splice(queue.end(), queue, cell.queue_iterator); queue.splice(queue.end(), queue, cell.queue_iterator);
@ -81,8 +88,9 @@ public:
cell.value = mapped; cell.value = mapped;
cell.size = cell.value ? weight_function(*cell.value) : 0; cell.size = cell.value ? weight_function(*cell.value) : 0;
current_size += cell.size; current_size += cell.size;
updateCellTimestamp(cell);
removeOverflow(); removeOverflow(cell.timestamp);
} }
void getStats(size_t & out_hits, size_t & out_misses) const void getStats(size_t & out_hits, size_t & out_misses) const
@ -120,17 +128,26 @@ protected:
/// Суммарный вес выброшенных из кеша элементов. /// Суммарный вес выброшенных из кеша элементов.
/// Обнуляется каждый раз, когда информация добавляется в Profile events /// Обнуляется каждый раз, когда информация добавляется в Profile events
private: private:
typedef std::list<Key> LRUQueue; using LRUQueue = std::list<Key>;
typedef typename LRUQueue::iterator LRUQueueIterator; using LRUQueueIterator = typename LRUQueue::iterator;
struct Cell struct Cell
{ {
public:
bool expired(const Timestamp & last_timestamp, const Delay & expiration_delay) const
{
return (expiration_delay == Delay::zero()) ||
((last_timestamp > timestamp) && ((last_timestamp - timestamp) > expiration_delay));
}
public:
MappedPtr value; MappedPtr value;
size_t size; size_t size;
LRUQueueIterator queue_iterator; LRUQueueIterator queue_iterator;
Timestamp timestamp;
}; };
typedef std::unordered_map<Key, Cell, HashFunction> Cells; using Cells = std::unordered_map<Key, Cell, HashFunction>;
LRUQueue queue; LRUQueue queue;
Cells cells; Cells cells;
@ -138,6 +155,7 @@ private:
/// Суммарный вес значений. /// Суммарный вес значений.
size_t current_size = 0; size_t current_size = 0;
const size_t max_size; const size_t max_size;
const Delay expiration_delay;
mutable Poco::FastMutex mutex; mutable Poco::FastMutex mutex;
size_t hits = 0; size_t hits = 0;
@ -145,15 +163,31 @@ private:
WeightFunction weight_function; WeightFunction weight_function;
void removeOverflow() void updateCellTimestamp(Cell & cell)
{
if (expiration_delay != Delay::zero())
cell.timestamp = Clock::now();
}
void removeOverflow(const Timestamp & last_timestamp)
{ {
size_t queue_size = cells.size(); size_t queue_size = cells.size();
while (current_size > max_size && queue_size > 1) while ((current_size > max_size) && (queue_size > 1))
{ {
const Key & key = queue.front(); const Key & key = queue.front();
auto it = cells.find(key); auto it = cells.find(key);
current_size -= it->second.size; if (it == cells.end())
current_weight_lost += it->second.size; throw Exception("LRUCache became inconsistent. There must be a bug in it. Clearing it for now.",
ErrorCodes::LOGICAL_ERROR);
const auto & cell = it->second;
if (!cell.expired(last_timestamp, expiration_delay))
break;
current_size -= cell.size;
current_weight_lost += cell.size;
cells.erase(it); cells.erase(it);
queue.pop_front(); queue.pop_front();
--queue_size; --queue_size;

View File

@ -66,6 +66,9 @@
M(DistributedConnectionFailTry) \ M(DistributedConnectionFailTry) \
M(DistributedConnectionFailAtAll) \ M(DistributedConnectionFailAtAll) \
\ \
M(CompileAttempt) \
M(CompileSuccess) \
\
M(END) M(END)
namespace ProfileEvents namespace ProfileEvents

View File

@ -9,285 +9,285 @@ namespace ErrorCodes
enum ErrorCodes enum ErrorCodes
{ {
UNSUPPORTED_METHOD = 1, UNSUPPORTED_METHOD = 1,
UNSUPPORTED_PARAMETER, UNSUPPORTED_PARAMETER = 2,
UNEXPECTED_END_OF_FILE, UNEXPECTED_END_OF_FILE = 3,
EXPECTED_END_OF_FILE, EXPECTED_END_OF_FILE = 4,
CANNOT_READ_DATA_FROM_READ_BUFFER, CANNOT_PARSE_TEXT = 6,
CANNOT_PARSE_TEXT, INCORRECT_NUMBER_OF_COLUMNS = 7,
INCORRECT_NUMBER_OF_COLUMNS, THERE_IS_NO_COLUMN = 8,
THERE_IS_NO_COLUMN, SIZES_OF_COLUMNS_DOESNT_MATCH = 9,
SIZES_OF_COLUMNS_DOESNT_MATCH, NOT_FOUND_COLUMN_IN_BLOCK = 10,
NOT_FOUND_COLUMN_IN_BLOCK, POSITION_OUT_OF_BOUND = 11,
POSITION_OUT_OF_BOUND, PARAMETER_OUT_OF_BOUND = 12,
PARAMETER_OUT_OF_BOUND, SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH = 13,
SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH, EMPTY_TUPLE = 14,
EMPTY_TUPLE, DUPLICATE_COLUMN = 15,
DUPLICATE_COLUMN, NO_SUCH_COLUMN_IN_TABLE = 16,
NO_SUCH_COLUMN_IN_TABLE, DELIMITER_IN_STRING_LITERAL_DOESNT_MATCH = 17,
DELIMITER_IN_STRING_LITERAL_DOESNT_MATCH, CANNOT_INSERT_ELEMENT_INTO_CONSTANT_COLUMN = 18,
CANNOT_INSERT_ELEMENT_INTO_CONSTANT_COLUMN, SIZE_OF_ARRAY_DOESNT_MATCH_SIZE_OF_FIXEDARRAY_COLUMN = 19,
SIZE_OF_ARRAY_DOESNT_MATCH_SIZE_OF_FIXEDARRAY_COLUMN, NUMBER_OF_COLUMNS_DOESNT_MATCH = 20,
NUMBER_OF_COLUMNS_DOESNT_MATCH, CANNOT_READ_ALL_DATA_FROM_TAB_SEPARATED_INPUT = 21,
CANNOT_READ_ALL_DATA_FROM_TAB_SEPARATED_INPUT, CANNOT_PARSE_ALL_VALUE_FROM_TAB_SEPARATED_INPUT = 22,
CANNOT_PARSE_ALL_VALUE_FROM_TAB_SEPARATED_INPUT, CANNOT_READ_FROM_ISTREAM = 23,
CANNOT_READ_FROM_ISTREAM, CANNOT_WRITE_TO_OSTREAM = 24,
CANNOT_WRITE_TO_OSTREAM, CANNOT_PARSE_ESCAPE_SEQUENCE = 25,
CANNOT_PARSE_ESCAPE_SEQUENCE, CANNOT_PARSE_QUOTED_STRING = 26,
CANNOT_PARSE_QUOTED_STRING, CANNOT_PARSE_INPUT_ASSERTION_FAILED = 27,
CANNOT_PARSE_INPUT_ASSERTION_FAILED, CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER = 28,
CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, CANNOT_PRINT_INTEGER = 29,
CANNOT_PRINT_INTEGER, CANNOT_READ_SIZE_OF_COMPRESSED_CHUNK = 30,
CANNOT_READ_SIZE_OF_COMPRESSED_CHUNK, CANNOT_READ_COMPRESSED_CHUNK = 31,
CANNOT_READ_COMPRESSED_CHUNK, ATTEMPT_TO_READ_AFTER_EOF = 32,
ATTEMPT_TO_READ_AFTER_EOF, CANNOT_READ_ALL_DATA = 33,
CANNOT_READ_ALL_DATA, TOO_MUCH_ARGUMENTS_FOR_FUNCTION = 34,
TOO_MUCH_ARGUMENTS_FOR_FUNCTION, TOO_LESS_ARGUMENTS_FOR_FUNCTION = 35,
TOO_LESS_ARGUMENTS_FOR_FUNCTION, BAD_ARGUMENTS = 36,
BAD_ARGUMENTS, UNKNOWN_ELEMENT_IN_AST = 37,
UNKNOWN_ELEMENT_IN_AST, CANNOT_PARSE_DATE = 38,
CANNOT_PARSE_DATE, TOO_LARGE_SIZE_COMPRESSED = 39,
TOO_LARGE_SIZE_COMPRESSED, CHECKSUM_DOESNT_MATCH = 40,
CHECKSUM_DOESNT_MATCH, CANNOT_PARSE_DATETIME = 41,
CANNOT_PARSE_DATETIME, NUMBER_OF_ARGUMENTS_DOESNT_MATCH = 42,
NUMBER_OF_ARGUMENTS_DOESNT_MATCH, ILLEGAL_TYPE_OF_ARGUMENT = 43,
ILLEGAL_TYPE_OF_ARGUMENT, ILLEGAL_COLUMN = 44,
ILLEGAL_COLUMN, ILLEGAL_NUMBER_OF_RESULT_COLUMNS = 45,
ILLEGAL_NUMBER_OF_RESULT_COLUMNS, UNKNOWN_FUNCTION = 46,
UNKNOWN_FUNCTION, UNKNOWN_IDENTIFIER = 47,
UNKNOWN_IDENTIFIER, NOT_IMPLEMENTED = 48,
NOT_IMPLEMENTED, LOGICAL_ERROR = 49,
LOGICAL_ERROR, UNKNOWN_TYPE = 50,
UNKNOWN_TYPE, EMPTY_LIST_OF_COLUMNS_QUERIED = 51,
EMPTY_LIST_OF_COLUMNS_QUERIED, COLUMN_QUERIED_MORE_THAN_ONCE = 52,
COLUMN_QUERIED_MORE_THAN_ONCE, TYPE_MISMATCH = 53,
TYPE_MISMATCH, STORAGE_DOESNT_ALLOW_PARAMETERS = 54,
STORAGE_DOESNT_ALLOW_PARAMETERS, STORAGE_REQUIRES_PARAMETER = 55,
STORAGE_REQUIRES_PARAMETER, UNKNOWN_STORAGE = 56,
UNKNOWN_STORAGE, TABLE_ALREADY_EXISTS = 57,
TABLE_ALREADY_EXISTS, TABLE_METADATA_ALREADY_EXISTS = 58,
TABLE_METADATA_ALREADY_EXISTS, ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER = 59,
ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, UNKNOWN_TABLE = 60,
UNKNOWN_TABLE, ONLY_FILTER_COLUMN_IN_BLOCK = 61,
ONLY_FILTER_COLUMN_IN_BLOCK, SYNTAX_ERROR = 62,
SYNTAX_ERROR, UNKNOWN_AGGREGATE_FUNCTION = 63,
UNKNOWN_AGGREGATE_FUNCTION, CANNOT_READ_AGGREGATE_FUNCTION_FROM_TEXT = 64,
CANNOT_READ_AGGREGATE_FUNCTION_FROM_TEXT, CANNOT_WRITE_AGGREGATE_FUNCTION_AS_TEXT = 65,
CANNOT_WRITE_AGGREGATE_FUNCTION_AS_TEXT, NOT_A_COLUMN = 66,
NOT_A_COLUMN, ILLEGAL_KEY_OF_AGGREGATION = 67,
ILLEGAL_KEY_OF_AGGREGATION, CANNOT_GET_SIZE_OF_FIELD = 68,
CANNOT_GET_SIZE_OF_FIELD, ARGUMENT_OUT_OF_BOUND = 69,
ARGUMENT_OUT_OF_BOUND, CANNOT_CONVERT_TYPE = 70,
CANNOT_CONVERT_TYPE, CANNOT_WRITE_AFTER_END_OF_BUFFER = 71,
CANNOT_WRITE_AFTER_END_OF_BUFFER, CANNOT_PARSE_NUMBER = 72,
CANNOT_PARSE_NUMBER, UNKNOWN_FORMAT = 73,
UNKNOWN_FORMAT, CANNOT_READ_FROM_FILE_DESCRIPTOR = 74,
CANNOT_READ_FROM_FILE_DESCRIPTOR, CANNOT_WRITE_TO_FILE_DESCRIPTOR = 75,
CANNOT_WRITE_TO_FILE_DESCRIPTOR, CANNOT_OPEN_FILE = 76,
CANNOT_OPEN_FILE, CANNOT_CLOSE_FILE = 77,
CANNOT_CLOSE_FILE, UNKNOWN_TYPE_OF_QUERY = 78,
UNKNOWN_TYPE_OF_QUERY, INCORRECT_FILE_NAME = 79,
INCORRECT_FILE_NAME, INCORRECT_QUERY = 80,
INCORRECT_QUERY, UNKNOWN_DATABASE = 81,
UNKNOWN_DATABASE, DATABASE_ALREADY_EXISTS = 82,
DATABASE_ALREADY_EXISTS, DIRECTORY_DOESNT_EXIST = 83,
DIRECTORY_DOESNT_EXIST, DIRECTORY_ALREADY_EXISTS = 84,
DIRECTORY_ALREADY_EXISTS, FORMAT_IS_NOT_SUITABLE_FOR_INPUT = 85,
FORMAT_IS_NOT_SUITABLE_FOR_INPUT, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER = 86,
RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, CANNOT_SEEK_THROUGH_FILE = 87,
CANNOT_SEEK_THROUGH_FILE, CANNOT_TRUNCATE_FILE = 88,
CANNOT_TRUNCATE_FILE, UNKNOWN_COMPRESSION_METHOD = 89,
UNKNOWN_COMPRESSION_METHOD, EMPTY_LIST_OF_COLUMNS_PASSED = 90,
EMPTY_LIST_OF_COLUMNS_PASSED, SIZES_OF_MARKS_FILES_ARE_INCONSISTENT = 91,
SIZES_OF_MARKS_FILES_ARE_INCONSISTENT, EMPTY_DATA_PASSED = 92,
EMPTY_DATA_PASSED, UNKNOWN_AGGREGATED_DATA_VARIANT = 93,
UNKNOWN_AGGREGATED_DATA_VARIANT, CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS = 94,
CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS, CANNOT_READ_FROM_SOCKET = 95,
CANNOT_READ_FROM_SOCKET, CANNOT_WRITE_TO_SOCKET = 96,
CANNOT_WRITE_TO_SOCKET, CANNOT_READ_ALL_DATA_FROM_CHUNKED_INPUT = 97,
CANNOT_READ_ALL_DATA_FROM_CHUNKED_INPUT, CANNOT_WRITE_TO_EMPTY_BLOCK_OUTPUT_STREAM = 98,
CANNOT_WRITE_TO_EMPTY_BLOCK_OUTPUT_STREAM, UNKNOWN_PACKET_FROM_CLIENT = 99,
UNKNOWN_PACKET_FROM_CLIENT, UNKNOWN_PACKET_FROM_SERVER = 100,
UNKNOWN_PACKET_FROM_SERVER, UNEXPECTED_PACKET_FROM_CLIENT = 101,
UNEXPECTED_PACKET_FROM_CLIENT, UNEXPECTED_PACKET_FROM_SERVER = 102,
UNEXPECTED_PACKET_FROM_SERVER, RECEIVED_DATA_FOR_WRONG_QUERY_ID = 103,
RECEIVED_DATA_FOR_WRONG_QUERY_ID, TOO_SMALL_BUFFER_SIZE = 104,
TOO_SMALL_BUFFER_SIZE, CANNOT_READ_HISTORY = 105,
CANNOT_READ_HISTORY, CANNOT_APPEND_HISTORY = 106,
CANNOT_APPEND_HISTORY, FILE_DOESNT_EXIST = 107,
FILE_DOESNT_EXIST, NO_DATA_TO_INSERT = 108,
NO_DATA_TO_INSERT, CANNOT_BLOCK_SIGNAL = 109,
CANNOT_BLOCK_SIGNAL, CANNOT_UNBLOCK_SIGNAL = 110,
CANNOT_UNBLOCK_SIGNAL, CANNOT_MANIPULATE_SIGSET = 111,
CANNOT_MANIPULATE_SIGSET, CANNOT_WAIT_FOR_SIGNAL = 112,
CANNOT_WAIT_FOR_SIGNAL, THERE_IS_NO_SESSION = 113,
THERE_IS_NO_SESSION, CANNOT_CLOCK_GETTIME = 114,
CANNOT_CLOCK_GETTIME, UNKNOWN_SETTING = 115,
UNKNOWN_SETTING, THERE_IS_NO_DEFAULT_VALUE = 116,
THERE_IS_NO_DEFAULT_VALUE, INCORRECT_DATA = 117,
INCORRECT_DATA, TABLE_METADATA_DOESNT_EXIST = 118,
TABLE_METADATA_DOESNT_EXIST, ENGINE_REQUIRED = 119,
ENGINE_REQUIRED, CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE = 120,
CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, UNKNOWN_SET_DATA_VARIANT = 121,
UNKNOWN_SET_DATA_VARIANT, INCOMPATIBLE_COLUMNS = 122,
INCOMPATIBLE_COLUMNS, UNKNOWN_TYPE_OF_AST_NODE = 123,
UNKNOWN_TYPE_OF_AST_NODE, INCORRECT_ELEMENT_OF_SET = 124,
INCORRECT_ELEMENT_OF_SET, INCORRECT_RESULT_OF_SCALAR_SUBQUERY = 125,
INCORRECT_RESULT_OF_SCALAR_SUBQUERY, CANNOT_GET_RETURN_TYPE = 126,
CANNOT_GET_RETURN_TYPE, ILLEGAL_INDEX = 127,
ILLEGAL_INDEX, TOO_LARGE_ARRAY_SIZE = 128,
TOO_LARGE_ARRAY_SIZE, FUNCTION_IS_SPECIAL = 129,
FUNCTION_IS_SPECIAL, CANNOT_READ_ARRAY_FROM_TEXT = 130,
CANNOT_READ_ARRAY_FROM_TEXT, TOO_LARGE_STRING_SIZE = 131,
TOO_LARGE_STRING_SIZE, CANNOT_CREATE_TABLE_FROM_METADATA = 132,
CANNOT_CREATE_TABLE_FROM_METADATA, AGGREGATE_FUNCTION_DOESNT_ALLOW_PARAMETERS = 133,
AGGREGATE_FUNCTION_DOESNT_ALLOW_PARAMETERS, PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS = 134,
PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS, ZERO_ARRAY_OR_TUPLE_INDEX = 135,
ZERO_ARRAY_OR_TUPLE_INDEX, UNKNOWN_ELEMENT_IN_CONFIG = 137,
ALL_CONNECTION_TRIES_FAILED, EXCESSIVE_ELEMENT_IN_CONFIG = 138,
UNKNOWN_ELEMENT_IN_CONFIG, NO_ELEMENTS_IN_CONFIG = 139,
EXCESSIVE_ELEMENT_IN_CONFIG, ALL_REQUESTED_COLUMNS_ARE_MISSING = 140,
NO_ELEMENTS_IN_CONFIG, SAMPLING_NOT_SUPPORTED = 141,
ALL_REQUESTED_COLUMNS_ARE_MISSING, NOT_FOUND_NODE = 142,
SAMPLING_NOT_SUPPORTED, FOUND_MORE_THAN_ONE_NODE = 143,
NOT_FOUND_NODE, FIRST_DATE_IS_BIGGER_THAN_LAST_DATE = 144,
FOUND_MORE_THAN_ONE_NODE, UNKNOWN_OVERFLOW_MODE = 145,
FIRST_DATE_IS_BIGGER_THAN_LAST_DATE, QUERY_SECTION_DOESNT_MAKE_SENSE = 146,
UNKNOWN_OVERFLOW_MODE, NOT_FOUND_FUNCTION_ELEMENT_FOR_AGGREGATE = 147,
QUERY_SECTION_DOESNT_MAKE_SENSE, NOT_FOUND_RELATION_ELEMENT_FOR_CONDITION = 148,
NOT_FOUND_FUNCTION_ELEMENT_FOR_AGGREGATE, NOT_FOUND_RHS_ELEMENT_FOR_CONDITION = 149,
NOT_FOUND_RELATION_ELEMENT_FOR_CONDITION, NO_ATTRIBUTES_LISTED = 150,
NOT_FOUND_RHS_ELEMENT_FOR_CONDITION, INDEX_OF_COLUMN_IN_SORT_CLAUSE_IS_OUT_OF_RANGE = 151,
NO_ATTRIBUTES_LISTED, UNKNOWN_DIRECTION_OF_SORTING = 152,
INDEX_OF_COLUMN_IN_SORT_CLAUSE_IS_OUT_OF_RANGE, ILLEGAL_DIVISION = 153,
UNKNOWN_DIRECTION_OF_SORTING, AGGREGATE_FUNCTION_NOT_APPLICABLE = 154,
ILLEGAL_DIVISION, UNKNOWN_RELATION = 155,
AGGREGATE_FUNCTION_NOT_APPLICABLE, DICTIONARIES_WAS_NOT_LOADED = 156,
UNKNOWN_RELATION, ILLEGAL_OVERFLOW_MODE = 157,
DICTIONARIES_WAS_NOT_LOADED, TOO_MUCH_ROWS = 158,
ILLEGAL_OVERFLOW_MODE, TIMEOUT_EXCEEDED = 159,
TOO_MUCH_ROWS, TOO_SLOW = 160,
TIMEOUT_EXCEEDED, TOO_MUCH_COLUMNS = 161,
TOO_SLOW, TOO_DEEP_SUBQUERIES = 162,
TOO_MUCH_COLUMNS, TOO_DEEP_PIPELINE = 163,
TOO_DEEP_SUBQUERIES, READONLY = 164,
TOO_DEEP_PIPELINE, TOO_MUCH_TEMPORARY_COLUMNS = 165,
READONLY, TOO_MUCH_TEMPORARY_NON_CONST_COLUMNS = 166,
TOO_MUCH_TEMPORARY_COLUMNS, TOO_DEEP_AST = 167,
TOO_MUCH_TEMPORARY_NON_CONST_COLUMNS, TOO_BIG_AST = 168,
TOO_DEEP_AST, BAD_TYPE_OF_FIELD = 169,
TOO_BIG_AST, BAD_GET = 170,
BAD_TYPE_OF_FIELD, BLOCKS_HAS_DIFFERENT_STRUCTURE = 171,
BAD_GET, CANNOT_CREATE_DIRECTORY = 172,
BLOCKS_HAS_DIFFERENT_STRUCTURE, CANNOT_ALLOCATE_MEMORY = 173,
CANNOT_CREATE_DIRECTORY, CYCLIC_ALIASES = 174,
CANNOT_ALLOCATE_MEMORY, NEGATIVE_REFCOUNT = 175,
CYCLIC_ALIASES, CHUNK_NOT_FOUND = 176,
NEGATIVE_REFCOUNT, DUPLICATE_CHUNK_NAME = 177,
CHUNK_NOT_FOUND, MULTIPLE_ALIASES_FOR_EXPRESSION = 178,
DUPLICATE_CHUNK_NAME, MULTIPLE_EXPRESSIONS_FOR_ALIAS = 179,
MULTIPLE_ALIASES_FOR_EXPRESSION, THERE_IS_NO_PROFILE = 180,
MULTIPLE_EXPRESSIONS_FOR_ALIAS, ILLEGAL_FINAL = 181,
THERE_IS_NO_PROFILE, ILLEGAL_PREWHERE = 182,
ILLEGAL_FINAL, UNEXPECTED_EXPRESSION = 183,
ILLEGAL_PREWHERE, ILLEGAL_AGGREGATION = 184,
UNEXPECTED_EXPRESSION, UNSUPPORTED_MYISAM_BLOCK_TYPE = 185,
ILLEGAL_AGGREGATION, UNSUPPORTED_COLLATION_LOCALE = 186,
UNSUPPORTED_MYISAM_BLOCK_TYPE, COLLATION_COMPARISON_FAILED = 187,
UNSUPPORTED_COLLATION_LOCALE, UNKNOWN_ACTION = 188,
COLLATION_COMPARISON_FAILED, TABLE_MUST_NOT_BE_CREATED_MANUALLY = 189,
UNKNOWN_ACTION, SIZES_OF_ARRAYS_DOESNT_MATCH = 190,
TABLE_MUST_NOT_BE_CREATED_MANUALLY, SET_SIZE_LIMIT_EXCEEDED = 191,
SIZES_OF_ARRAYS_DOESNT_MATCH, UNKNOWN_USER = 192,
SET_SIZE_LIMIT_EXCEEDED, WRONG_PASSWORD = 193,
UNKNOWN_USER, REQUIRED_PASSWORD = 194,
WRONG_PASSWORD, IP_ADDRESS_NOT_ALLOWED = 195,
REQUIRED_PASSWORD, UNKNOWN_ADDRESS_PATTERN_TYPE = 196,
IP_ADDRESS_NOT_ALLOWED, SERVER_REVISION_IS_TOO_OLD = 197,
UNKNOWN_ADDRESS_PATTERN_TYPE, DNS_ERROR = 198,
SERVER_REVISION_IS_TOO_OLD, UNKNOWN_QUOTA = 199,
DNS_ERROR, QUOTA_DOESNT_ALLOW_KEYS = 200,
UNKNOWN_QUOTA, QUOTA_EXPIRED = 201,
QUOTA_DOESNT_ALLOW_KEYS, TOO_MUCH_SIMULTANEOUS_QUERIES = 202,
QUOTA_EXPIRED, NO_FREE_CONNECTION = 203,
TOO_MUCH_SIMULTANEOUS_QUERIES, CANNOT_FSYNC = 204,
NO_FREE_CONNECTION, NESTED_TYPE_TOO_DEEP = 205,
CANNOT_FSYNC, ALIAS_REQUIRED = 206,
NESTED_TYPE_TOO_DEEP, AMBIGUOUS_IDENTIFIER = 207,
ALIAS_REQUIRED, EMPTY_NESTED_TABLE = 208,
AMBIGUOUS_IDENTIFIER, SOCKET_TIMEOUT = 209,
EMPTY_NESTED_TABLE, NETWORK_ERROR = 210,
SOCKET_TIMEOUT, EMPTY_QUERY = 211,
NETWORK_ERROR, UNKNOWN_LOAD_BALANCING = 212,
EMPTY_QUERY, UNKNOWN_TOTALS_MODE = 213,
UNKNOWN_LOAD_BALANCING, CANNOT_STATVFS = 214,
UNKNOWN_TOTALS_MODE, NOT_AN_AGGREGATE = 215,
CANNOT_STATVFS, QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING = 216,
NOT_AN_AGGREGATE, CLIENT_HAS_CONNECTED_TO_WRONG_PORT = 217,
QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING, TABLE_IS_DROPPED = 218,
CLIENT_HAS_CONNECTED_TO_WRONG_PORT, DATABASE_NOT_EMPTY = 219,
TABLE_IS_DROPPED, DUPLICATE_INTERSERVER_IO_ENDPOINT = 220,
DATABASE_NOT_EMPTY, NO_SUCH_INTERSERVER_IO_ENDPOINT = 221,
DUPLICATE_INTERSERVER_IO_ENDPOINT, ADDING_REPLICA_TO_NON_EMPTY_TABLE = 222,
NO_SUCH_INTERSERVER_IO_ENDPOINT, UNEXPECTED_AST_STRUCTURE = 223,
ADDING_REPLICA_TO_NON_EMPTY_TABLE, REPLICA_IS_ALREADY_ACTIVE = 224,
UNEXPECTED_AST_STRUCTURE, NO_ZOOKEEPER = 225,
REPLICA_IS_ALREADY_ACTIVE, NO_FILE_IN_DATA_PART = 226,
NO_ZOOKEEPER, UNEXPECTED_FILE_IN_DATA_PART = 227,
NO_FILE_IN_DATA_PART, BAD_SIZE_OF_FILE_IN_DATA_PART = 228,
UNEXPECTED_FILE_IN_DATA_PART, QUERY_IS_TOO_LARGE = 229,
BAD_SIZE_OF_FILE_IN_DATA_PART, NOT_FOUND_EXPECTED_DATA_PART = 230,
QUERY_IS_TOO_LARGE, TOO_MANY_UNEXPECTED_DATA_PARTS = 231,
NOT_FOUND_EXPECTED_DATA_PART, NO_SUCH_DATA_PART = 232,
TOO_MANY_UNEXPECTED_DATA_PARTS, BAD_DATA_PART_NAME = 233,
NO_SUCH_DATA_PART, NO_REPLICA_HAS_PART = 234,
BAD_DATA_PART_NAME, DUPLICATE_DATA_PART = 235,
NO_REPLICA_HAS_PART, ABORTED = 236,
DUPLICATE_DATA_PART, NO_REPLICA_NAME_GIVEN = 237,
ABORTED, FORMAT_VERSION_TOO_OLD = 238,
NO_REPLICA_NAME_GIVEN, CANNOT_MUNMAP = 239,
FORMAT_VERSION_TOO_OLD, CANNOT_MREMAP = 240,
CANNOT_MUNMAP, MEMORY_LIMIT_EXCEEDED = 241,
CANNOT_MREMAP, TABLE_IS_READ_ONLY = 242,
MEMORY_LIMIT_EXCEEDED, NOT_ENOUGH_SPACE = 243,
TABLE_IS_READ_ONLY, UNEXPECTED_ZOOKEEPER_ERROR = 244,
NOT_ENOUGH_SPACE, INVALID_NESTED_NAME = 245,
UNEXPECTED_ZOOKEEPER_ERROR, CORRUPTED_DATA = 246,
INVALID_NESTED_NAME, INCORRECT_MARK = 247,
CORRUPTED_DATA, INVALID_PARTITION_NAME = 248,
INCORRECT_MARK, NOT_LEADER = 249,
INVALID_PARTITION_NAME, NOT_ENOUGH_BLOCK_NUMBERS = 250,
NOT_LEADER, NO_SUCH_REPLICA = 251,
NOT_ENOUGH_BLOCK_NUMBERS, TOO_MUCH_PARTS = 252,
NO_SUCH_REPLICA, REPLICA_IS_ALREADY_EXIST = 253,
TOO_MUCH_PARTS, NO_ACTIVE_REPLICAS = 254,
REPLICA_IS_ALREADY_EXIST, TOO_MUCH_RETRIES_TO_FETCH_PARTS = 255,
NO_ACTIVE_REPLICAS, PARTITION_ALREADY_EXISTS = 256,
TOO_MUCH_RETRIES_TO_FETCH_PARTS, PARTITION_DOESNT_EXIST = 257,
PARTITION_ALREADY_EXISTS, UNION_ALL_RESULT_STRUCTURES_MISMATCH = 258,
PARTITION_DOESNT_EXIST, UNION_ALL_COLUMN_ALIAS_MISMATCH = 259,
UNION_ALL_RESULT_STRUCTURES_MISMATCH, CLIENT_OUTPUT_FORMAT_SPECIFIED = 260,
UNION_ALL_COLUMN_ALIAS_MISMATCH, UNKNOWN_BLOCK_INFO_FIELD = 261,
CLIENT_OUTPUT_FORMAT_SPECIFIED, BAD_COLLATION = 262,
UNKNOWN_BLOCK_INFO_FIELD, CANNOT_COMPILE_CODE = 263,
BAD_COLLATION, INCOMPATIBLE_TYPE_OF_JOIN = 264,
CANNOT_COMPILE_CODE, NO_AVAILABLE_REPLICA = 265,
INCOMPATIBLE_TYPE_OF_JOIN, MISMATCH_REPLICAS_DATA_SOURCES = 266,
NO_AVAILABLE_REPLICA, STORAGE_DOESNT_SUPPORT_PARALLEL_REPLICAS = 267,
MISMATCH_REPLICAS_DATA_SOURCES, CPUID_ERROR = 268,
STORAGE_DOESNT_SUPPORT_PARALLEL_REPLICAS, INFINITE_LOOP = 269,
CPUID_ERROR, CANNOT_COMPRESS = 270,
INFINITE_LOOP, CANNOT_DECOMPRESS = 271,
CANNOT_COMPRESS, AIO_SUBMIT_ERROR = 272,
CANNOT_DECOMPRESS, AIO_COMPLETION_ERROR = 273,
AIO_SUBMIT_ERROR, AIO_READ_ERROR = 274,
AIO_COMPLETION_ERROR, AIO_WRITE_ERROR = 275,
AIO_READ_ERROR, INDEX_NOT_USED = 277,
AIO_WRITE_ERROR, LEADERSHIP_LOST = 278,
INDEX_NOT_USED, ALL_CONNECTION_TRIES_FAILED = 279,
POCO_EXCEPTION = 1000, POCO_EXCEPTION = 1000,
STD_EXCEPTION, STD_EXCEPTION = 1001,
UNKNOWN_EXCEPTION, UNKNOWN_EXCEPTION = 1002,
}; };
} }

View File

@ -50,7 +50,7 @@ protected:
Block readImpl() override; Block readImpl() override;
private: private:
Logger * log = &Logger::get("SummingSortedBlockInputStream"); Logger * log = &Logger::get("AggregatingSortedBlockInputStream");
/// Прочитали до конца. /// Прочитали до конца.
bool finished = false; bool finished = false;
@ -68,7 +68,7 @@ private:
* Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций. * Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций.
*/ */
template<class TSortCursor> template<class TSortCursor>
void merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue); void merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue);
/// Вставить в результат первую строку для текущей группы. /// Вставить в результат первую строку для текущей группы.
void insertCurrentRow(ColumnPlainPtrs & merged_columns); void insertCurrentRow(ColumnPlainPtrs & merged_columns);

View File

@ -81,7 +81,7 @@ private:
* Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций. * Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций.
*/ */
template<class TSortCursor> template<class TSortCursor>
void merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue); void merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue);
/// Вставить в результат строки для текущего идентификатора "визита". /// Вставить в результат строки для текущего идентификатора "визита".
void insertRows(ColumnPlainPtrs & merged_columns, size_t & merged_rows, bool last_in_stream = false); void insertRows(ColumnPlainPtrs & merged_columns, size_t & merged_rows, bool last_in_stream = false);

View File

@ -15,7 +15,7 @@ using Poco::SharedPtr;
/** Выполняет над блоком вычисление некоторого выражения. /** Выполняет над блоком вычисление некоторого выражения.
* Выражение состоит из идентификаторов столбцов из блока, констант, обычных функций. * Выражение состоит из идентификаторов столбцов из блока, констант, обычных функций.
* Например: hits * 2 + 3, url LIKE '%yandex%' * Например: hits * 2 + 3, url LIKE '%yandex%'
* Выражение не меняет количество строк в потоке, и обрабатывает каждую строку независимо от других. * Выражение обрабатывает каждую строку независимо от других.
*/ */
class ExpressionBlockInputStream : public IProfilingBlockInputStream class ExpressionBlockInputStream : public IProfilingBlockInputStream
{ {

View File

@ -136,7 +136,7 @@ private:
void initQueue(std::priority_queue<TSortCursor> & queue); void initQueue(std::priority_queue<TSortCursor> & queue);
template <typename TSortCursor> template <typename TSortCursor>
void merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue); void merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue);
Logger * log = &Logger::get("MergingSortedBlockInputStream"); Logger * log = &Logger::get("MergingSortedBlockInputStream");

View File

@ -5,6 +5,7 @@
#include <DB/Client/Connection.h> #include <DB/Client/Connection.h>
#include <Yandex/logger_useful.h> #include <Yandex/logger_useful.h>
#include <statdaemons/NetException.h>
namespace DB namespace DB
{ {
@ -50,7 +51,7 @@ public:
return; return;
} }
else else
throw Exception("Unexpected packet from server (expected Data or Exception, got " throw NetException("Unexpected packet from server (expected Data or Exception, got "
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
} }
@ -97,7 +98,7 @@ public:
else if (Protocol::Server::Exception == packet.type) else if (Protocol::Server::Exception == packet.type)
packet.exception->rethrow(); packet.exception->rethrow();
else else
throw Exception("Unexpected packet from server (expected EndOfStream or Exception, got " throw NetException("Unexpected packet from server (expected EndOfStream or Exception, got "
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
} }

View File

@ -99,7 +99,7 @@ private:
* Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций. * Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций.
*/ */
template<class TSortCursor> template<class TSortCursor>
void merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue); void merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue);
/// Вставить в результат просуммированную строку для текущей группы. /// Вставить в результат просуммированную строку для текущей группы.
void insertCurrentRow(ColumnPlainPtrs & merged_columns); void insertCurrentRow(ColumnPlainPtrs & merged_columns);

View File

@ -48,6 +48,8 @@ public:
std::size_t getBytesAllocated() const override { return bytes_allocated; } std::size_t getBytesAllocated() const override { return bytes_allocated; }
std::size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
double getHitRate() const override double getHitRate() const override
{ {
return static_cast<double>(hit_count.load(std::memory_order_acquire)) / return static_cast<double>(hit_count.load(std::memory_order_acquire)) /
@ -76,6 +78,11 @@ public:
return creation_time; return creation_time;
} }
bool isInjective(const std::string & attribute_name) const override
{
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
bool hasHierarchy() const override { return hierarchical_attribute; } bool hasHierarchy() const override { return hierarchical_attribute; }
id_t toParent(const id_t id) const override id_t toParent(const id_t id) const override

View File

@ -6,6 +6,7 @@
#include <DB/Columns/ColumnString.h> #include <DB/Columns/ColumnString.h>
#include <DB/Common/Arena.h> #include <DB/Common/Arena.h>
#include <statdaemons/ext/range.hpp> #include <statdaemons/ext/range.hpp>
#include <atomic>
#include <vector> #include <vector>
#include <tuple> #include <tuple>
@ -39,6 +40,8 @@ public:
std::size_t getBytesAllocated() const override { return bytes_allocated; } std::size_t getBytesAllocated() const override { return bytes_allocated; }
std::size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
double getHitRate() const override { return 1.0; } double getHitRate() const override { return 1.0; }
std::size_t getElementCount() const override { return element_count; } std::size_t getElementCount() const override { return element_count; }
@ -60,6 +63,11 @@ public:
return creation_time; return creation_time;
} }
bool isInjective(const std::string & attribute_name) const override
{
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
bool hasHierarchy() const override { return hierarchical_attribute; } bool hasHierarchy() const override { return hierarchical_attribute; }
id_t toParent(const id_t id) const override id_t toParent(const id_t id) const override
@ -67,6 +75,8 @@ public:
const auto attr = hierarchical_attribute; const auto attr = hierarchical_attribute;
const auto & array = *std::get<std::unique_ptr<PODArray<UInt64>>>(attr->arrays); const auto & array = *std::get<std::unique_ptr<PODArray<UInt64>>>(attr->arrays);
query_count.fetch_add(1, std::memory_order_relaxed);
return id < array.size() ? array[id] : std::get<UInt64>(attr->null_values); return id < array.size() ? array[id] : std::get<UInt64>(attr->null_values);
} }
@ -87,6 +97,8 @@ public:
\ \
const auto & array = *std::get<std::unique_ptr<PODArray<TYPE>>>(attribute.arrays);\ const auto & array = *std::get<std::unique_ptr<PODArray<TYPE>>>(attribute.arrays);\
\ \
query_count.fetch_add(1, std::memory_order_relaxed);\
\
return id < array.size() ? array[id] : std::get<TYPE>(attribute.null_values);\ return id < array.size() ? array[id] : std::get<TYPE>(attribute.null_values);\
} }
DECLARE_INDIVIDUAL_GETTER(UInt8) DECLARE_INDIVIDUAL_GETTER(UInt8)
@ -111,6 +123,8 @@ public:
const auto & array = *std::get<std::unique_ptr<PODArray<StringRef>>>(attribute.arrays); const auto & array = *std::get<std::unique_ptr<PODArray<StringRef>>>(attribute.arrays);
query_count.fetch_add(1, std::memory_order_relaxed);
return id < array.size() ? String{array[id]} : std::get<String>(attribute.null_values); return id < array.size() ? String{array[id]} : std::get<String>(attribute.null_values);
} }
@ -155,6 +169,8 @@ public:
const auto string_ref = id < attr.size() ? attr[id] : StringRef{null_value}; const auto string_ref = id < attr.size() ? attr[id] : StringRef{null_value};
out->insertData(string_ref.data, string_ref.size); out->insertData(string_ref.data, string_ref.size);
} }
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
} }
private: private:
@ -312,6 +328,8 @@ private:
const auto id = ids[i]; const auto id = ids[i];
out[i] = id < attr.size() ? attr[id] : null_value; out[i] = id < attr.size() ? attr[id] : null_value;
} }
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
} }
template <typename T> template <typename T>
@ -382,6 +400,8 @@ private:
std::size_t bucket_count = 0; std::size_t bucket_count = 0;
std::chrono::time_point<std::chrono::system_clock> creation_time; std::chrono::time_point<std::chrono::system_clock> creation_time;
mutable std::atomic<std::size_t> query_count;
}; };
} }

View File

@ -6,6 +6,7 @@
#include <DB/Common/HashTable/HashMap.h> #include <DB/Common/HashTable/HashMap.h>
#include <DB/Columns/ColumnString.h> #include <DB/Columns/ColumnString.h>
#include <statdaemons/ext/range.hpp> #include <statdaemons/ext/range.hpp>
#include <atomic>
#include <memory> #include <memory>
#include <tuple> #include <tuple>
@ -36,6 +37,8 @@ public:
std::size_t getBytesAllocated() const override { return bytes_allocated; } std::size_t getBytesAllocated() const override { return bytes_allocated; }
std::size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
double getHitRate() const override { return 1.0; } double getHitRate() const override { return 1.0; }
std::size_t getElementCount() const override { return element_count; } std::size_t getElementCount() const override { return element_count; }
@ -57,6 +60,11 @@ public:
return creation_time; return creation_time;
} }
bool isInjective(const std::string & attribute_name) const override
{
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
bool hasHierarchy() const override { return hierarchical_attribute; } bool hasHierarchy() const override { return hierarchical_attribute; }
id_t toParent(const id_t id) const override id_t toParent(const id_t id) const override
@ -65,6 +73,8 @@ public:
const auto & map = *std::get<std::unique_ptr<HashMap<UInt64, UInt64>>>(attr->maps); const auto & map = *std::get<std::unique_ptr<HashMap<UInt64, UInt64>>>(attr->maps);
const auto it = map.find(id); const auto it = map.find(id);
query_count.fetch_add(1, std::memory_order_relaxed);
return it != map.end() ? it->second : std::get<UInt64>(attr->null_values); return it != map.end() ? it->second : std::get<UInt64>(attr->null_values);
} }
@ -86,6 +96,8 @@ public:
const auto & map = *std::get<std::unique_ptr<HashMap<UInt64, TYPE>>>(attribute.maps);\ const auto & map = *std::get<std::unique_ptr<HashMap<UInt64, TYPE>>>(attribute.maps);\
const auto it = map.find(id);\ const auto it = map.find(id);\
\ \
query_count.fetch_add(1, std::memory_order_relaxed);\
\
return it != map.end() ? TYPE{it->second} : std::get<TYPE>(attribute.null_values);\ return it != map.end() ? TYPE{it->second} : std::get<TYPE>(attribute.null_values);\
} }
DECLARE_INDIVIDUAL_GETTER(UInt8) DECLARE_INDIVIDUAL_GETTER(UInt8)
@ -111,6 +123,8 @@ public:
const auto & map = *std::get<std::unique_ptr<HashMap<UInt64, StringRef>>>(attribute.maps); const auto & map = *std::get<std::unique_ptr<HashMap<UInt64, StringRef>>>(attribute.maps);
const auto it = map.find(id); const auto it = map.find(id);
query_count.fetch_add(1, std::memory_order_relaxed);
return it != map.end() ? String{it->second} : std::get<String>(attribute.null_values); return it != map.end() ? String{it->second} : std::get<String>(attribute.null_values);
} }
@ -155,6 +169,8 @@ public:
const auto string_ref = it != attr.end() ? it->second : StringRef{null_value}; const auto string_ref = it != attr.end() ? it->second : StringRef{null_value};
out->insertData(string_ref.data, string_ref.size); out->insertData(string_ref.data, string_ref.size);
} }
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
} }
private: private:
@ -310,6 +326,8 @@ private:
const auto it = attr.find(ids[i]); const auto it = attr.find(ids[i]);
out[i] = it != attr.end() ? it->second : null_value; out[i] = it != attr.end() ? it->second : null_value;
} }
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
} }
template <typename T> template <typename T>
@ -368,6 +386,7 @@ private:
std::size_t bytes_allocated = 0; std::size_t bytes_allocated = 0;
std::size_t element_count = 0; std::size_t element_count = 0;
std::size_t bucket_count = 0; std::size_t bucket_count = 0;
mutable std::atomic<std::size_t> query_count{};
std::chrono::time_point<std::chrono::system_clock> creation_time; std::chrono::time_point<std::chrono::system_clock> creation_time;
}; };

View File

@ -30,6 +30,8 @@ public:
virtual std::size_t getBytesAllocated() const = 0; virtual std::size_t getBytesAllocated() const = 0;
virtual std::size_t getQueryCount() const = 0;
virtual double getHitRate() const = 0; virtual double getHitRate() const = 0;
virtual std::size_t getElementCount() const = 0; virtual std::size_t getElementCount() const = 0;
@ -47,6 +49,8 @@ public:
virtual std::chrono::time_point<std::chrono::system_clock> getCreationTime() const = 0; virtual std::chrono::time_point<std::chrono::system_clock> getCreationTime() const = 0;
virtual bool isInjective(const std::string & attribute_name) const = 0;
virtual bool hasHierarchy() const = 0; virtual bool hasHierarchy() const = 0;
/// do not call unless you ensure that hasHierarchy() returns true /// do not call unless you ensure that hasHierarchy() returns true

View File

@ -136,7 +136,7 @@ struct ConvertImpl<DataTypeDateTime, DataTypeDate, Name>
/** Отдельный случай для преобразования (U)Int32 или (U)Int64 в Date. /** Отдельный случай для преобразования (U)Int32 или (U)Int64 в Date.
* Если число меньше 65536, то оно понимается, как DayNum, а если больше - как unix timestamp. * Если число меньше 65536, то оно понимается, как DayNum, а если больше или равно - как unix timestamp.
* Немного нелогично, что мы, по сути, помещаем две разные функции в одну. * Немного нелогично, что мы, по сути, помещаем две разные функции в одну.
* Но зато это позволяет поддержать распространённый случай, * Но зато это позволяет поддержать распространённый случай,
* когда пользователь пишет toDate(UInt32), ожидая, что это - перевод unix timestamp в дату * когда пользователь пишет toDate(UInt32), ожидая, что это - перевод unix timestamp в дату
@ -490,8 +490,8 @@ public:
* Для неконстантных столбцов arguments[i].column = nullptr. * Для неконстантных столбцов arguments[i].column = nullptr.
*/ */
void getReturnTypeAndPrerequisites(const ColumnsWithNameAndType & arguments, void getReturnTypeAndPrerequisites(const ColumnsWithNameAndType & arguments,
DataTypePtr & out_return_type, DataTypePtr & out_return_type,
ExpressionActions::Actions & out_prerequisites) std::vector<ExpressionAction> & out_prerequisites)
{ {
if (arguments.size() != 2) if (arguments.size() != 2)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "

View File

@ -503,7 +503,7 @@ struct NameRegionToCity { static constexpr auto name = "regionToCity"; };
struct NameRegionToArea { static constexpr auto name = "regionToArea"; }; struct NameRegionToArea { static constexpr auto name = "regionToArea"; };
struct NameRegionToDistrict { static constexpr auto name = "regionToDistrict"; }; struct NameRegionToDistrict { static constexpr auto name = "regionToDistrict"; };
struct NameRegionToCountry { static constexpr auto name = "regionToCountry"; }; struct NameRegionToCountry { static constexpr auto name = "regionToCountry"; };
struct NameRegionToContinent { static constexpr auto name = "regionToContient"; }; struct NameRegionToContinent { static constexpr auto name = "regionToContinent"; };
struct NameRegionToPopulation { static constexpr auto name = "regionToPopulation"; }; struct NameRegionToPopulation { static constexpr auto name = "regionToPopulation"; };
struct NameOSToRoot { static constexpr auto name = "OSToRoot"; }; struct NameOSToRoot { static constexpr auto name = "OSToRoot"; };
struct NameSEToRoot { static constexpr auto name = "SEToRoot"; }; struct NameSEToRoot { static constexpr auto name = "SEToRoot"; };
@ -738,7 +738,7 @@ public:
}; };
class FunctionDictGetString : public IFunction class FunctionDictGetString final : public IFunction
{ {
public: public:
static constexpr auto name = "dictGetString"; static constexpr auto name = "dictGetString";

View File

@ -3,6 +3,7 @@
#include <Poco/Net/DNS.h> #include <Poco/Net/DNS.h>
#include <math.h> #include <math.h>
#include <mutex>
#include <DB/IO/WriteBufferFromString.h> #include <DB/IO/WriteBufferFromString.h>
#include <DB/DataTypes/DataTypesNumberFixed.h> #include <DB/DataTypes/DataTypesNumberFixed.h>
@ -21,7 +22,9 @@
#include <DB/Columns/ColumnArray.h> #include <DB/Columns/ColumnArray.h>
#include <DB/Columns/ColumnReplicated.h> #include <DB/Columns/ColumnReplicated.h>
#include <DB/Common/UnicodeBar.h> #include <DB/Common/UnicodeBar.h>
#include <DB/Common/HashTable/HashMap.h>
#include <DB/Functions/IFunction.h> #include <DB/Functions/IFunction.h>
#include <DB/Interpreters/ExpressionActions.h>
#include <statdaemons/ext/range.hpp> #include <statdaemons/ext/range.hpp>
@ -54,6 +57,8 @@ namespace DB
* sleep(n) - спит n секунд каждый блок. * sleep(n) - спит n секунд каждый блок.
* *
* bar(x, min, max, width) - рисует полосу из количества символов, пропорционального (x - min) и равного width при x == max. * bar(x, min, max, width) - рисует полосу из количества символов, пропорционального (x - min) и равного width при x == max.
*
* transform(x, from_array, to_array[, default]) - преобразовать x согласно переданному явным образом соответствию.
*/ */
@ -884,4 +889,506 @@ using FunctionIsFinite = FunctionNumericPredicate<IsFiniteImpl>;
using FunctionIsInfinite = FunctionNumericPredicate<IsInfiniteImpl>; using FunctionIsInfinite = FunctionNumericPredicate<IsInfiniteImpl>;
using FunctionIsNaN = FunctionNumericPredicate<IsNaNImpl>; using FunctionIsNaN = FunctionNumericPredicate<IsNaNImpl>;
DataTypePtr getSmallestCommonNumericType(const IDataType & t1, const IDataType & t2);
/** transform(x, [from...], [to...], default)
* - преобразует значения согласно явно указанному отображению.
*
* x - что преобразовывать.
* from - константный массив значений для преобразования.
* to - константный массив значений, в которые должны быть преобразованы значения из from.
* default - константа, какое значение использовать, если x не равен ни одному из значений во from.
* from и to - массивы одинаковых размеров.
*
* Типы:
* transform(T, Array(T), Array(U), U) -> U
*
* transform(x, [from...], [to...])
* - eсли default не указан, то для значений x, для которых нет соответствующего элемента во from, возвращается не изменённое значение x.
*
* Типы:
* transform(T, Array(T), Array(T)) -> T
*
* Замечание: реализация довольно громоздкая.
*/
class FunctionTransform : public IFunction
{
public:
static constexpr auto name = "transform";
static IFunction * create(const Context &) { return new FunctionTransform; }
String getName() const override { return name; }
DataTypePtr getReturnType(const DataTypes & arguments) const override
{
const auto args_size = arguments.size();
if (args_size != 3 && args_size != 4)
throw Exception{
"Number of arguments for function " + getName() + " doesn't match: passed " +
toString(args_size) + ", should be 3 or 4",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
const IDataType * type_x = arguments[0].get();
if (!type_x->isNumeric() && !typeid_cast<const DataTypeString *>(type_x))
throw Exception("Unsupported type " + type_x->getName()
+ " of first argument of function " + getName()
+ ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const DataTypeArray * type_arr_from = typeid_cast<const DataTypeArray *>(arguments[1].get());
if (!type_arr_from)
throw Exception("Second argument of function " + getName()
+ ", must be array of source values to transform from.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto type_arr_from_nested = type_arr_from->getNestedType();
if ((type_x->isNumeric() != type_arr_from_nested->isNumeric())
|| (!!typeid_cast<const DataTypeString *>(type_x) != !!typeid_cast<const DataTypeString *>(type_arr_from_nested.get())))
throw Exception("First argument and elements of array of second argument of function " + getName()
+ " must have compatible types: both numeric or both strings.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const DataTypeArray * type_arr_to = typeid_cast<const DataTypeArray *>(arguments[2].get());
if (!type_arr_to)
throw Exception("Third argument of function " + getName()
+ ", must be array of destination values to transform to.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto type_arr_to_nested = type_arr_to->getNestedType();
if (args_size == 3)
{
if ((type_x->isNumeric() != type_arr_to_nested->isNumeric())
|| (!!typeid_cast<const DataTypeString *>(type_x) != !!typeid_cast<const DataTypeString *>(type_arr_to_nested.get())))
throw Exception("Function " + getName()
+ " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return type_x->clone();
}
else
{
const IDataType * type_default = arguments[3].get();
if (!type_default->isNumeric() && !typeid_cast<const DataTypeString *>(type_default))
throw Exception("Unsupported type " + type_default->getName()
+ " of fourth argument (default value) of function " + getName()
+ ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if ((type_default->isNumeric() != type_arr_to_nested->isNumeric())
|| (!!typeid_cast<const DataTypeString *>(type_default) != !!typeid_cast<const DataTypeString *>(type_arr_to_nested.get())))
throw Exception("Function " + getName()
+ " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (type_arr_to_nested->behavesAsNumber() && type_default->behavesAsNumber())
{
/// Берём наименьший общий тип для элементов массива значений to и для default-а.
return getSmallestCommonNumericType(*type_arr_to_nested, *type_default);
}
/// TODO Больше проверок.
return type_arr_to_nested->clone();
}
}
void execute(Block & block, const ColumnNumbers & arguments, const size_t result) override
{
const ColumnConstArray * array_from = typeid_cast<const ColumnConstArray *>(&*block.getByPosition(arguments[1]).column);
const ColumnConstArray * array_to = typeid_cast<const ColumnConstArray *>(&*block.getByPosition(arguments[2]).column);
if (!array_from && !array_to)
throw Exception("Second and third arguments of function " + getName() + " must be constant arrays.", ErrorCodes::ILLEGAL_COLUMN);
prepare(array_from->getData(), array_to->getData(), block, arguments);
const auto in = block.getByPosition(arguments.front()).column.get();
if (in->isConst())
{
executeConst(block, arguments, result);
return;
}
auto column_result = block.getByPosition(result).type->createColumn();
auto out = column_result.get();
if (!executeNum<UInt8>(in, out)
&& !executeNum<UInt16>(in, out)
&& !executeNum<UInt32>(in, out)
&& !executeNum<UInt64>(in, out)
&& !executeNum<Int8>(in, out)
&& !executeNum<Int16>(in, out)
&& !executeNum<Int32>(in, out)
&& !executeNum<Int64>(in, out)
&& !executeNum<Float32>(in, out)
&& !executeNum<Float64>(in, out)
&& !executeString(in, out))
throw Exception(
"Illegal column " + in->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
block.getByPosition(result).column = column_result;
}
private:
void executeConst(Block & block, const ColumnNumbers & arguments, const size_t result)
{
/// Составим блок из полноценных столбцов размера 1 и вычислим функцию как обычно.
Block tmp_block;
ColumnNumbers tmp_arguments;
tmp_block.insert(block.getByPosition(arguments[0]));
tmp_block.getByPosition(0).column = static_cast<IColumnConst *>(tmp_block.getByPosition(0).column->cloneResized(1).get())->convertToFullColumn();
tmp_arguments.push_back(0);
for (size_t i = 1; i < arguments.size(); ++i)
{
tmp_block.insert(block.getByPosition(arguments[i]));
tmp_arguments.push_back(i);
}
tmp_block.insert(block.getByPosition(result));
size_t tmp_result = arguments.size();
execute(tmp_block, tmp_arguments, tmp_result);
block.getByPosition(result).column = block.getByPosition(result).type->createConstColumn(
block.rowsInFirstColumn(),
(*tmp_block.getByPosition(tmp_result).column)[0]);
}
template <typename T>
bool executeNum(const IColumn * in_untyped, IColumn * out_untyped)
{
if (const auto in = typeid_cast<const ColumnVector<T> *>(in_untyped))
{
if (default_value.isNull())
{
auto out = typeid_cast<ColumnVector<T> *>(out_untyped);
if (!out)
throw Exception(
"Illegal column " + out_untyped->getName() + " of elements of array of third argument of function " + getName()
+ ", must be " + in->getName(),
ErrorCodes::ILLEGAL_COLUMN);
executeImplNumToNum<T>(in->getData(), out->getData());
}
else
{
if (!executeNumToNumWithDefault<T, UInt8>(in, out_untyped)
&& !executeNumToNumWithDefault<T, UInt16>(in, out_untyped)
&& !executeNumToNumWithDefault<T, UInt32>(in, out_untyped)
&& !executeNumToNumWithDefault<T, UInt64>(in, out_untyped)
&& !executeNumToNumWithDefault<T, Int8>(in, out_untyped)
&& !executeNumToNumWithDefault<T, Int16>(in, out_untyped)
&& !executeNumToNumWithDefault<T, Int32>(in, out_untyped)
&& !executeNumToNumWithDefault<T, Int64>(in, out_untyped)
&& !executeNumToNumWithDefault<T, Float32>(in, out_untyped)
&& !executeNumToNumWithDefault<T, Float64>(in, out_untyped)
&& !executeNumToString<T>(in, out_untyped))
throw Exception(
"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
return true;
}
return false;
}
bool executeString(const IColumn * in_untyped, IColumn * out_untyped)
{
if (const auto in = typeid_cast<const ColumnString *>(in_untyped))
{
if (!executeStringToNum<UInt8>(in, out_untyped)
&& !executeStringToNum<UInt16>(in, out_untyped)
&& !executeStringToNum<UInt32>(in, out_untyped)
&& !executeStringToNum<UInt64>(in, out_untyped)
&& !executeStringToNum<Int8>(in, out_untyped)
&& !executeStringToNum<Int16>(in, out_untyped)
&& !executeStringToNum<Int32>(in, out_untyped)
&& !executeStringToNum<Int64>(in, out_untyped)
&& !executeStringToNum<Float32>(in, out_untyped)
&& !executeStringToNum<Float64>(in, out_untyped)
&& !executeStringToString(in, out_untyped))
throw Exception(
"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
return true;
}
return false;
}
template <typename T, typename U>
bool executeNumToNumWithDefault(const ColumnVector<T> * in, IColumn * out_untyped)
{
auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
if (!out)
return false;
executeImplNumToNumWithDefault<T, U>(in->getData(), out->getData(), default_value.get<U>());
return true;
}
template <typename T>
bool executeNumToString(const ColumnVector<T> * in, IColumn * out_untyped)
{
auto out = typeid_cast<ColumnString *>(out_untyped);
if (!out)
return false;
const String & default_str = default_value.get<const String &>();
StringRef default_string_ref{default_str.data(), default_str.size() + 1};
executeImplNumToStringWithDefault<T>(in->getData(), out->getChars(), out->getOffsets(), default_string_ref);
return true;
}
template <typename U>
bool executeStringToNum(const ColumnString * in, IColumn * out_untyped)
{
auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
if (!out)
return false;
executeImplStringToNumWithDefault<U>(in->getChars(), in->getOffsets(), out->getData(), default_value.get<U>());
return true;
}
bool executeStringToString(const ColumnString * in, IColumn * out_untyped)
{
auto out = typeid_cast<ColumnString *>(out_untyped);
if (!out)
return false;
if (default_value.isNull())
executeImplStringToString<false>(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets(), {});
else
{
const String & default_str = default_value.get<const String &>();
StringRef default_string_ref{default_str.data(), default_str.size() + 1};
executeImplStringToString<true>(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets(), default_string_ref);
}
return true;
}
template <typename T, typename U>
void executeImplNumToNumWithDefault(const PODArray<T> & src, PODArray<U> & dst, U dst_default)
{
const auto & table = *table_num_to_num;
size_t size = src.size();
dst.resize(size);
for (size_t i = 0; i < size; ++i)
{
auto it = table.find(src[i]);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i])); /// little endian.
else
dst[i] = dst_default;
}
}
template <typename T>
void executeImplNumToNum(const PODArray<T> & src, PODArray<T> & dst)
{
const auto & table = *table_num_to_num;
size_t size = src.size();
dst.resize(size);
for (size_t i = 0; i < size; ++i)
{
auto it = table.find(src[i]);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i]));
else
dst[i] = src[i];
}
}
template <typename T>
void executeImplNumToStringWithDefault(const PODArray<T> & src,
ColumnString::Chars_t & dst_data, ColumnString::Offsets_t & dst_offsets, StringRef dst_default)
{
const auto & table = *table_num_to_string;
size_t size = src.size();
dst_offsets.resize(size);
ColumnString::Offset_t current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
auto it = table.find(src[i]);
StringRef ref = it != table.end() ? it->second : dst_default;
dst_data.resize(current_offset + ref.size);
memcpy(&dst_data[current_offset], ref.data, ref.size);
current_offset += ref.size;
dst_offsets[i] = current_offset;
}
}
template <typename U>
void executeImplStringToNumWithDefault(
const ColumnString::Chars_t & src_data, const ColumnString::Offsets_t & src_offsets,
PODArray<U> & dst, U dst_default)
{
const auto & table = *table_string_to_num;
size_t size = src_offsets.size();
dst.resize(size);
ColumnString::Offset_t current_offset = 0;
for (size_t i = 0; i < size; ++i)
{
StringRef ref{&src_data[current_offset], src_offsets[i] - current_offset};
current_offset = src_offsets[i];
auto it = table.find(ref);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i]));
else
dst[i] = dst_default;
}
}
template <bool with_default>
void executeImplStringToString(
const ColumnString::Chars_t & src_data, const ColumnString::Offsets_t & src_offsets,
ColumnString::Chars_t & dst_data, ColumnString::Offsets_t & dst_offsets, StringRef dst_default)
{
const auto & table = *table_string_to_string;
size_t size = src_offsets.size();
dst_offsets.resize(size);
ColumnString::Offset_t current_src_offset = 0;
ColumnString::Offset_t current_dst_offset = 0;
for (size_t i = 0; i < size; ++i)
{
StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
current_src_offset = src_offsets[i];
auto it = table.find(src_ref);
StringRef dst_ref = it != table.end() ? it->second : (with_default ? dst_default : src_ref);
dst_data.resize(current_dst_offset + dst_ref.size);
memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
current_dst_offset += dst_ref.size;
dst_offsets[i] = current_dst_offset;
}
}
/// Разные варианты хэш-таблиц для реализации отображения.
using NumToNum = HashMap<UInt64, UInt64, HashCRC32<UInt64>>;
using NumToString = HashMap<UInt64, StringRef, HashCRC32<UInt64>>; /// Везде StringRef-ы с завершающим нулём.
using StringToNum = HashMap<StringRef, UInt64>;
using StringToString = HashMap<StringRef, StringRef>;
std::unique_ptr<NumToNum> table_num_to_num;
std::unique_ptr<NumToString> table_num_to_string;
std::unique_ptr<StringToNum> table_string_to_num;
std::unique_ptr<StringToString> table_string_to_string;
Arena string_pool;
Field default_value; /// Null, если не задано.
bool prepared = false;
std::mutex mutex;
/// Может вызываться из разных потоков. Срабатывает только при первом вызове.
void prepare(const Array & from, const Array & to, Block & block, const ColumnNumbers & arguments)
{
if (prepared)
return;
const size_t size = from.size();
if (0 == size)
throw Exception("Empty arrays are illegal in function " + getName(), ErrorCodes::BAD_ARGUMENTS);
std::lock_guard<std::mutex> lock(mutex);
if (prepared)
return;
if (from.size() != to.size())
throw Exception("Second and third arguments of function " + getName() + " must be arrays of same size", ErrorCodes::BAD_ARGUMENTS);
Array converted_to;
const Array * used_to = &to;
/// Задано ли значение по-умолчанию.
if (arguments.size() == 4)
{
const IColumnConst * default_col = dynamic_cast<const IColumnConst *>(&*block.getByPosition(arguments[3]).column);
if (!default_col)
throw Exception("Fourth argument of function " + getName() + " (default value) must be constant", ErrorCodes::ILLEGAL_COLUMN);
default_value = (*default_col)[0];
/// Нужно ли преобразовать элементы to и default_value к наименьшему общему типу, который является Float64?
if (default_value.getType() == Field::Types::Float64 && to[0].getType() != Field::Types::Float64)
{
converted_to.resize(to.size());
for (size_t i = 0, size = to.size(); i < size; ++i)
converted_to[i] = apply_visitor(FieldVisitorConvertToNumber<Float64>(), to[i]);
used_to = &converted_to;
}
else if (default_value.getType() != Field::Types::Float64 && to[0].getType() == Field::Types::Float64)
{
default_value = apply_visitor(FieldVisitorConvertToNumber<Float64>(), default_value);
}
}
/// Замечание: не делается проверка дубликатов в массиве from.
if (from[0].getType() != Field::Types::String && to[0].getType() != Field::Types::String)
{
table_num_to_num.reset(new NumToNum);
auto & table = *table_num_to_num;
for (size_t i = 0; i < size; ++i)
table[from[i].get<UInt64>()] = (*used_to)[i].get<UInt64>();
}
else if (from[0].getType() != Field::Types::String && to[0].getType() == Field::Types::String)
{
table_num_to_string.reset(new NumToString);
auto & table = *table_num_to_string;
for (size_t i = 0; i < size; ++i)
{
const String & str_to = to[i].get<const String &>();
StringRef ref{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
table[from[i].get<UInt64>()] = ref;
}
}
else if (from[0].getType() == Field::Types::String && to[0].getType() != Field::Types::String)
{
table_string_to_num.reset(new StringToNum);
auto & table = *table_string_to_num;
for (size_t i = 0; i < size; ++i)
{
const String & str_from = from[i].get<const String &>();
StringRef ref{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
table[ref] = (*used_to)[i].get<UInt64>();
}
}
else if (from[0].getType() == Field::Types::String && to[0].getType() == Field::Types::String)
{
table_string_to_string.reset(new StringToString);
auto & table = *table_string_to_string;
for (size_t i = 0; i < size; ++i)
{
const String & str_from = from[i].get<const String &>();
const String & str_to = to[i].get<const String &>();
StringRef ref_from{string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
StringRef ref_to{string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
table[ref_from] = ref_to;
}
}
prepared = true;
}
};
} }

View File

@ -1,7 +1,9 @@
#pragma once #pragma once
#include <DB/Functions/FunctionsArithmetic.h> #include <DB/Functions/FunctionsArithmetic.h>
#include <cmath> // log2() #include <cmath>
#include <type_traits>
#include <array>
namespace DB namespace DB
@ -11,6 +13,9 @@ namespace DB
* roundToExp2 - вниз до ближайшей степени двойки; * roundToExp2 - вниз до ближайшей степени двойки;
* roundDuration - вниз до ближайшего из: 0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000; * roundDuration - вниз до ближайшего из: 0, 1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000;
* roundAge - вниз до ближайшего из: 0, 18, 25, 35, 45. * roundAge - вниз до ближайшего из: 0, 18, 25, 35, 45.
* round(x, N) - арифметическое округление (N - сколько знаков после запятой оставить; 0 по умолчанию).
* ceil(x, N) - наименьшее число, которое не меньше x (N - сколько знаков после запятой оставить; 0 по умолчанию).
* floor(x, N) - наибольшее число, которое не больше x (N - сколько знаков после запятой оставить; 0 по умолчанию).
*/ */
template<typename A> template<typename A>
@ -87,13 +92,439 @@ namespace DB
} }
}; };
/// Реализация функций округления на низком уровне.
template<typename T, int rounding_mode, bool with_scale>
struct RoundingComputation
{
};
template<int rounding_mode, bool with_scale>
struct RoundingComputation<Float32, rounding_mode, with_scale>
{
using Data = std::array<Float32, 4>;
using Scale = __m128;
template<bool with_scale2 = with_scale>
static inline void prepareScale(size_t scale, Scale & mm_scale,
typename std::enable_if<with_scale2>::type * = nullptr)
{
Float32 fscale = static_cast<Float32>(scale);
mm_scale = _mm_load1_ps(&fscale);
}
template<bool with_scale2 = with_scale>
static inline void prepareScale(size_t scale, Scale & mm_scale,
typename std::enable_if<!with_scale2>::type * = nullptr)
{
}
template<bool with_scale2 = with_scale>
static inline void compute(const Data & in, const Scale & mm_scale, Data & out,
typename std::enable_if<with_scale2>::type * = nullptr)
{
__m128 mm_value = _mm_loadu_ps(reinterpret_cast<const Float32 *>(&in));
mm_value = _mm_mul_ps(mm_value, mm_scale);
mm_value = _mm_round_ps(mm_value, rounding_mode);
mm_value = _mm_div_ps(mm_value, mm_scale);
_mm_storeu_ps(reinterpret_cast<Float32 *>(&out), mm_value);
}
template<bool with_scale2 = with_scale>
static inline void compute(const Data & in, const Scale & mm_scale, Data & out,
typename std::enable_if<!with_scale2>::type * = nullptr)
{
__m128 mm_value = _mm_loadu_ps(reinterpret_cast<const Float32 *>(&in));
mm_value = _mm_round_ps(mm_value, rounding_mode);
_mm_storeu_ps(reinterpret_cast<Float32 *>(&out), mm_value);
}
};
template<int rounding_mode, bool with_scale>
struct RoundingComputation<Float64, rounding_mode, with_scale>
{
using Data = std::array<Float64, 2>;
using Scale = __m128d;
template<bool with_scale2 = with_scale>
static inline void prepareScale(size_t scale, Scale & mm_scale,
typename std::enable_if<with_scale2>::type * = nullptr)
{
Float64 fscale = static_cast<Float64>(scale);
mm_scale = _mm_load1_pd(&fscale);
}
template<bool with_scale2 = with_scale>
static inline void prepareScale(size_t scale, Scale & mm_scale,
typename std::enable_if<!with_scale2>::type * = nullptr)
{
}
template<bool with_scale2 = with_scale>
static inline void compute(const Data & in, const Scale & mm_scale, Data & out,
typename std::enable_if<with_scale2>::type * = nullptr)
{
__m128d mm_value = _mm_loadu_pd(reinterpret_cast<const Float64 *>(&in));
mm_value = _mm_mul_pd(mm_value, mm_scale);
mm_value = _mm_round_pd(mm_value, rounding_mode);
mm_value = _mm_div_pd(mm_value, mm_scale);
_mm_storeu_pd(reinterpret_cast<Float64 *>(&out), mm_value);
}
template<bool with_scale2 = with_scale>
static inline void compute(const Data & in, const Scale & mm_scale, Data & out,
typename std::enable_if<!with_scale2>::type * = nullptr)
{
__m128d mm_value = _mm_loadu_pd(reinterpret_cast<const Float64 *>(&in));
mm_value = _mm_round_pd(mm_value, rounding_mode);
_mm_storeu_pd(reinterpret_cast<Float64 *>(&out), mm_value);
}
};
/// Реализация функций округления на высоком уровне.
template<typename T, int rounding_mode, bool with_scale, typename Enable = void>
struct FunctionRoundingImpl
{
};
/// В случае целочисленных значений не выполяется округления.
template<typename T, int rounding_mode, bool with_scale>
struct FunctionRoundingImpl<T, rounding_mode, with_scale, typename std::enable_if<std::is_integral<T>::value>::type>
{
static inline void apply(const PODArray<T> & in, size_t scale, typename ColumnVector<T>::Container_t & out)
{
size_t size = in.size();
for (size_t i = 0; i < size; ++i)
out[i] = in[i];
}
static inline T apply(T val, size_t scale)
{
return val;
}
};
template<typename T, int rounding_mode, bool with_scale>
struct FunctionRoundingImpl<T, rounding_mode, with_scale, typename std::enable_if<std::is_floating_point<T>::value>::type>
{
private:
using Op = RoundingComputation<T, rounding_mode, with_scale>;
using Data = typename Op::Data;
using Scale = typename Op::Scale;
public:
static inline void apply(const PODArray<T> & in, size_t scale, typename ColumnVector<T>::Container_t & out)
{
Scale mm_scale;
Op::prepareScale(scale, mm_scale);
const size_t size = in.size();
const size_t data_size = std::tuple_size<Data>();
size_t i;
for (i = 0; i < (size - data_size + 1); i += data_size)
{
Data tmp;
for (size_t j = 0; j < data_size; ++j)
tmp[j] = in[i + j];
Data res;
Op::compute(tmp, mm_scale, res);
for (size_t j = 0; j < data_size; ++j)
out[i + j] = res[j];
}
if (i < size)
{
Data tmp{0};
for (size_t j = 0; (j < data_size) && ((i + j) < size); ++j)
tmp[j] = in[i + j];
Data res;
Op::compute(tmp, mm_scale, res);
for (size_t j = 0; (j < data_size) && ((i + j) < size); ++j)
out[i + j] = res[j];
}
}
static inline T apply(T val, size_t scale)
{
if (val == 0)
return val;
else
{
Scale mm_scale;
Op::prepareScale(scale, mm_scale);
Data tmp{0};
tmp[0] = val;
Data res;
Op::compute(tmp, mm_scale, res);
return res[0];
}
}
};
template<typename T, typename U>
struct PrecisionForType
{
template<typename L = T>
static inline bool apply(const ColumnPtr & column, UInt8 & precision,
typename std::enable_if<std::is_floating_point<L>::value>::type * = nullptr)
{
using ColumnType = ColumnConst<U>;
const ColumnType * precision_col = typeid_cast<const ColumnType *>(&*column);
if (precision_col == nullptr)
return false;
U val = precision_col->getData();
if (val < 0)
val = 0;
else if (val >= static_cast<U>(std::numeric_limits<L>::digits10))
val = static_cast<U>(std::numeric_limits<L>::digits10);
precision = static_cast<UInt8>(val);
return true;
}
/// Для целых чисел точность не имеет значения.
template<typename L = T>
static inline bool apply(const ColumnPtr & column, UInt8 & precision,
typename std::enable_if<std::is_integral<L>::value>::type * = nullptr)
{
using ColumnType = ColumnConst<U>;
const ColumnType * precision_col = typeid_cast<const ColumnType *>(&*column);
if (precision_col == nullptr)
return false;
precision = 0;
return true;
}
};
/// Следующий код генерирует во время сборки таблицу степеней числа 10.
namespace
{
/// Отдельные степени числа 10.
template<size_t N>
struct PowerOf10
{
static const size_t value = 10 * PowerOf10<N - 1>::value;
};
template<>
struct PowerOf10<0>
{
static const size_t value = 1;
};
}
/// Объявление и определение контейнера содержащего таблицу степеней числа 10.
template<size_t... TArgs>
struct TableContainer
{
static const std::array<size_t, sizeof...(TArgs)> values;
};
template<size_t... TArgs>
const std::array<size_t, sizeof...(TArgs)> TableContainer<TArgs...>::values = { TArgs... };
/// Генератор первых N степеней.
template<size_t N, size_t... TArgs>
struct FillArrayImpl
{
using result = typename FillArrayImpl<N - 1, PowerOf10<N>::value, TArgs...>::result;
};
template<size_t... TArgs>
struct FillArrayImpl<0, TArgs...>
{
using result = TableContainer<PowerOf10<0>::value, TArgs...>;
};
template<size_t N>
struct FillArray
{
using result = typename FillArrayImpl<N - 1>::result;
};
/** Шаблон для функций, которые вычисляют приближенное значение входного параметра
* типа (U)Int8/16/32/64 или Float32/64 и принимают дополнительный необязятельный
* параметр указывающий сколько знаков после запятой оставить (по умолчанию - 0).
* Op - функция (round/floor/ceil)
*/
template<typename Name, int rounding_mode>
class FunctionRounding : public IFunction
{
public:
static constexpr auto name = Name::name;
static IFunction * create(const Context & context) { return new FunctionRounding; }
private:
using PowersOf10 = FillArray<std::numeric_limits<Float64>::digits10 + 1>::result;
private:
template<typename T>
bool checkType(const IDataType * type) const
{
return typeid_cast<const T *>(type) != nullptr;
}
template<typename T>
bool executeForType(Block & block, const ColumnNumbers & arguments, size_t result)
{
using OpWithScale = FunctionRoundingImpl<T, rounding_mode, true>;
using OpWithoutScale = FunctionRoundingImpl<T, rounding_mode, false>;
if (ColumnVector<T> * col = typeid_cast<ColumnVector<T> *>(&*block.getByPosition(arguments[0]).column))
{
ColumnVector<T> * col_res = new ColumnVector<T>;
block.getByPosition(result).column = col_res;
typename ColumnVector<T>::Container_t & vec_res = col_res->getData();
vec_res.resize(col->getData().size());
UInt8 precision = 0;
if (arguments.size() == 2)
precision = getPrecision<T>(block.getByPosition(arguments[1]).column);
if (precision > 0)
OpWithScale::apply(col->getData(), PowersOf10::values[precision], vec_res);
else
OpWithoutScale::apply(col->getData(), 0, vec_res);
return true;
}
else if (ColumnConst<T> * col = typeid_cast<ColumnConst<T> *>(&*block.getByPosition(arguments[0]).column))
{
UInt8 precision = 0;
if (arguments.size() == 2)
precision = getPrecision<T>(block.getByPosition(arguments[1]).column);
T res;
if (precision > 0)
res = OpWithScale::apply(col->getData(), PowersOf10::values[precision]);
else
res = OpWithoutScale::apply(col->getData(), 0);
ColumnConst<T> * col_res = new ColumnConst<T>(col->size(), res);
block.getByPosition(result).column = col_res;
return true;
}
return false;
}
/// В зависимости от входного параметра, определить какая нужна точность
/// для результата.
template<typename T>
UInt8 getPrecision(const ColumnPtr & column)
{
UInt8 precision = 0;
if (!( PrecisionForType<T, UInt8>::apply(column, precision)
|| PrecisionForType<T, UInt16>::apply(column, precision)
|| PrecisionForType<T, UInt16>::apply(column, precision)
|| PrecisionForType<T, UInt32>::apply(column, precision)
|| PrecisionForType<T, UInt64>::apply(column, precision)
|| PrecisionForType<T, Int8>::apply(column, precision)
|| PrecisionForType<T, Int16>::apply(column, precision)
|| PrecisionForType<T, Int32>::apply(column, precision)
|| PrecisionForType<T, Int64>::apply(column, precision)))
{
throw Exception("Illegal column " + column->getName()
+ " of second ('precision') argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
return precision;
}
public:
/// Получить имя функции.
String getName() const override
{
return name;
}
/// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
DataTypePtr getReturnType(const DataTypes & arguments) const override
{
if ((arguments.size() < 1) || (arguments.size() > 2))
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1 or 2.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (arguments.size() == 2)
{
const IDataType * type = &*arguments[1];
if (!( checkType<DataTypeUInt8>(type)
|| checkType<DataTypeUInt16>(type)
|| checkType<DataTypeUInt32>(type)
|| checkType<DataTypeUInt64>(type)
|| checkType<DataTypeInt8>(type)
|| checkType<DataTypeInt16>(type)
|| checkType<DataTypeInt32>(type)
|| checkType<DataTypeInt64>(type)))
{
throw Exception("Illegal type in second argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
const IDataType * type = &*arguments[0];
if (!type->behavesAsNumber())
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0];
}
/// Выполнить функцию над блоком.
void execute(Block & block, const ColumnNumbers & arguments, size_t result) override
{
if (!( executeForType<UInt8>(block, arguments, result)
|| executeForType<UInt16>(block, arguments, result)
|| executeForType<UInt32>(block, arguments, result)
|| executeForType<UInt64>(block, arguments, result)
|| executeForType<Int8>(block, arguments, result)
|| executeForType<Int16>(block, arguments, result)
|| executeForType<Int32>(block, arguments, result)
|| executeForType<Int64>(block, arguments, result)
|| executeForType<Float32>(block, arguments, result)
|| executeForType<Float64>(block, arguments, result)))
{
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
}
};
struct NameRoundToExp2 { static constexpr auto name = "roundToExp2"; }; struct NameRoundToExp2 { static constexpr auto name = "roundToExp2"; };
struct NameRoundDuration { static constexpr auto name = "roundDuration"; }; struct NameRoundDuration { static constexpr auto name = "roundDuration"; };
struct NameRoundAge { static constexpr auto name = "roundAge"; }; struct NameRoundAge { static constexpr auto name = "roundAge"; };
struct NameRound { static constexpr auto name = "round"; };
struct NameCeil { static constexpr auto name = "ceil"; };
struct NameFloor { static constexpr auto name = "floor"; };
typedef FunctionUnaryArithmetic<RoundToExp2Impl, NameRoundToExp2> FunctionRoundToExp2; typedef FunctionUnaryArithmetic<RoundToExp2Impl, NameRoundToExp2> FunctionRoundToExp2;
typedef FunctionUnaryArithmetic<RoundDurationImpl, NameRoundDuration> FunctionRoundDuration; typedef FunctionUnaryArithmetic<RoundDurationImpl, NameRoundDuration> FunctionRoundDuration;
typedef FunctionUnaryArithmetic<RoundAgeImpl, NameRoundAge> FunctionRoundAge; typedef FunctionUnaryArithmetic<RoundAgeImpl, NameRoundAge> FunctionRoundAge;
typedef FunctionRounding<NameRound, _MM_FROUND_NINT> FunctionRound;
typedef FunctionRounding<NameCeil, _MM_FROUND_CEIL> FunctionCeil;
typedef FunctionRounding<NameFloor, _MM_FROUND_FLOOR> FunctionFloor;
} }

View File

@ -279,7 +279,6 @@ struct ExtractPathFull
if (nullptr != (pos = strchr(data, '/')) && pos[1] == '/' && nullptr != (pos = strchr(pos + 2, '/'))) if (nullptr != (pos = strchr(data, '/')) && pos[1] == '/' && nullptr != (pos = strchr(pos + 2, '/')))
{ {
/// no leading slash
res_data = pos; res_data = pos;
res_size = end - res_data; res_size = end - res_data;
} }

View File

@ -7,12 +7,14 @@
#include <DB/Core/ColumnNumbers.h> #include <DB/Core/ColumnNumbers.h>
#include <DB/Core/ColumnsWithNameAndType.h> #include <DB/Core/ColumnsWithNameAndType.h>
#include <DB/DataTypes/IDataType.h> #include <DB/DataTypes/IDataType.h>
#include <DB/Interpreters/ExpressionActions.h>
namespace DB namespace DB
{ {
class ExpressionAction;
/** Интерфейс для обычных функций. /** Интерфейс для обычных функций.
* Обычные функции - это функции, которые не меняют количество строк в таблице, * Обычные функции - это функции, которые не меняют количество строк в таблице,
* и результат работы которых для каждой строчки не зависит от других строк. * и результат работы которых для каждой строчки не зависит от других строк.
@ -54,7 +56,7 @@ public:
virtual void getReturnTypeAndPrerequisites( virtual void getReturnTypeAndPrerequisites(
const ColumnsWithNameAndType & arguments, const ColumnsWithNameAndType & arguments,
DataTypePtr & out_return_type, DataTypePtr & out_return_type,
ExpressionActions::Actions & out_prerequisites) std::vector<ExpressionAction> & out_prerequisites)
{ {
DataTypes types(arguments.size()); DataTypes types(arguments.size());
for (size_t i = 0; i < arguments.size(); ++i) for (size_t i = 0; i < arguments.size(); ++i)

View File

@ -55,7 +55,7 @@ private:
BufferWithOwnMemory<ReadBuffer> fill_buffer; BufferWithOwnMemory<ReadBuffer> fill_buffer;
/// Описание асинхронного запроса на чтение. /// Описание асинхронного запроса на чтение.
iocb request; iocb request = { 0 };
std::vector<iocb *> request_ptrs{&request}; std::vector<iocb *> request_ptrs{&request};
std::vector<io_event> events{1}; std::vector<io_event> events{1};

View File

@ -26,7 +26,6 @@ public:
WriteBufferAIO & operator=(const WriteBufferAIO &) = delete; WriteBufferAIO & operator=(const WriteBufferAIO &) = delete;
off_t getPositionInFile() override; off_t getPositionInFile() override;
void truncate(off_t length = 0) override;
void sync() override; void sync() override;
std::string getFileName() const override { return filename; } std::string getFileName() const override { return filename; }
int getFD() const override { return fd; } int getFD() const override { return fd; }
@ -36,6 +35,8 @@ private:
void nextImpl() override; void nextImpl() override;
/// ///
off_t doSeek(off_t off, int whence) override; off_t doSeek(off_t off, int whence) override;
///
void doTruncate(off_t length) override;
/// Если в буфере ещё остались данные - запишем их. /// Если в буфере ещё остались данные - запишем их.
void flush(); void flush();
/// Ждать окончания текущей асинхронной задачи. /// Ждать окончания текущей асинхронной задачи.
@ -50,7 +51,7 @@ private:
BufferWithOwnMemory<WriteBuffer> flush_buffer; BufferWithOwnMemory<WriteBuffer> flush_buffer;
/// Описание асинхронного запроса на запись. /// Описание асинхронного запроса на запись.
iocb request; iocb request = { 0 };
std::vector<iocb *> request_ptrs{&request}; std::vector<iocb *> request_ptrs{&request};
std::vector<io_event> events{1}; std::vector<io_event> events{1};

View File

@ -14,15 +14,17 @@ class WriteBufferFromFileBase : public BufferWithOwnMemory<WriteBuffer>
public: public:
WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment); WriteBufferFromFileBase(size_t buf_size, char * existing_memory, size_t alignment);
virtual ~WriteBufferFromFileBase(); virtual ~WriteBufferFromFileBase();
off_t seek(off_t off, int whence = SEEK_SET); off_t seek(off_t off, int whence = SEEK_SET);
void truncate(off_t length = 0);
virtual off_t getPositionInFile() = 0; virtual off_t getPositionInFile() = 0;
virtual void truncate(off_t length) = 0;
virtual void sync() = 0; virtual void sync() = 0;
virtual std::string getFileName() const = 0; virtual std::string getFileName() const = 0;
virtual int getFD() const = 0; virtual int getFD() const = 0;
protected: protected:
virtual off_t doSeek(off_t off, int whence) = 0; virtual off_t doSeek(off_t off, int whence) = 0;
virtual void doTruncate(off_t length) = 0;
}; };
} }

View File

@ -85,13 +85,6 @@ public:
return seek(0, SEEK_CUR); return seek(0, SEEK_CUR);
} }
void truncate(off_t length = 0) override
{
int res = ftruncate(fd, length);
if (-1 == res)
throwFromErrno("Cannot truncate file " + getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE);
}
void sync() override void sync() override
{ {
/// Если в буфере ещё остались данные - запишем их. /// Если в буфере ещё остались данные - запишем их.
@ -111,6 +104,13 @@ private:
throwFromErrno("Cannot seek through file " + getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); throwFromErrno("Cannot seek through file " + getFileName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
return res; return res;
} }
void doTruncate(off_t length) override
{
int res = ftruncate(fd, length);
if (-1 == res)
throwFromErrno("Cannot truncate file " + getFileName(), ErrorCodes::CANNOT_TRUNCATE_FILE);
}
}; };
} }

View File

@ -7,6 +7,7 @@
#include <DB/IO/WriteBuffer.h> #include <DB/IO/WriteBuffer.h>
#include <DB/IO/BufferWithOwnMemory.h> #include <DB/IO/BufferWithOwnMemory.h>
#include <statdaemons/NetException.h>
namespace DB namespace DB
@ -38,7 +39,7 @@ private:
ostr->flush(); ostr->flush();
if (!ostr->good()) if (!ostr->good())
throw Exception("Cannot write to ostream", ErrorCodes::CANNOT_WRITE_TO_OSTREAM); throw NetException("Cannot write to ostream", ErrorCodes::CANNOT_WRITE_TO_OSTREAM);
} }
public: public:

View File

@ -26,6 +26,10 @@ public:
/// к локальным узлам обращаемся напрямую /// к локальным узлам обращаемся напрямую
size_t getLocalNodesNum() const { return local_nodes_num; } size_t getLocalNodesNum() const { return local_nodes_num; }
/// используеться для выставления ограничения на размер таймаута
static Poco::Timespan saturate(const Poco::Timespan & v, const Poco::Timespan & limit);
public:
/// Соединения с удалёнными серверами. /// Соединения с удалёнными серверами.
ConnectionPools pools; ConnectionPools pools;
@ -33,15 +37,13 @@ public:
{ {
/// contains names of directories for asynchronous write to StorageDistributed /// contains names of directories for asynchronous write to StorageDistributed
std::vector<std::string> dir_names; std::vector<std::string> dir_names;
UInt32 shard_num;
int weight; int weight;
size_t num_local_nodes; size_t num_local_nodes;
}; };
std::vector<ShardInfo> shard_info_vec; std::vector<ShardInfo> shard_info_vec;
std::vector<size_t> slot_to_shard; std::vector<size_t> slot_to_shard;
/// используеться для выставления ограничения на размер таймаута
static Poco::Timespan saturate(const Poco::Timespan & v, const Poco::Timespan & limit);
struct Address struct Address
{ {
/** В конфиге адреса либо находятся в узлах <node>: /** В конфиге адреса либо находятся в узлах <node>:
@ -61,8 +63,10 @@ public:
* </shard> * </shard>
*/ */
Poco::Net::SocketAddress host_port; Poco::Net::SocketAddress host_port;
String host_name;
String user; String user;
String password; String password;
UInt32 replica_num;
Address(const String & config_prefix); Address(const String & config_prefix);
Address(const String & host_port_, const String & user_, const String & password_); Address(const String & host_port_, const String & user_, const String & password_);
@ -71,12 +75,18 @@ public:
private: private:
static bool isLocal(const Address & address); static bool isLocal(const Address & address);
public:
/// Массив шардов. Каждый шард - адреса одного сервера. /// Массив шардов. Каждый шард - адреса одного сервера.
typedef std::vector<Address> Addresses; typedef std::vector<Address> Addresses;
/// Массив шардов. Для каждого шарда - массив адресов реплик (серверов, считающихся идентичными). /// Массив шардов. Для каждого шарда - массив адресов реплик (серверов, считающихся идентичными).
typedef std::vector<Addresses> AddressesWithFailover; typedef std::vector<Addresses> AddressesWithFailover;
public:
const Addresses & getShardsInfo() const { return addresses; }
const AddressesWithFailover & getShardsWithFailoverInfo() const { return addresses_with_failover; }
private:
Addresses addresses; Addresses addresses;
AddressesWithFailover addresses_with_failover; AddressesWithFailover addresses_with_failover;

View File

@ -39,6 +39,7 @@ class ProcessList;
class ProcessListElement; class ProcessListElement;
class Macros; class Macros;
class Progress; class Progress;
class Clusters;
/// имя таблицы -> таблица /// имя таблицы -> таблица
@ -238,6 +239,7 @@ public:
void initClusters(); void initClusters();
Cluster & getCluster(const std::string & cluster_name); Cluster & getCluster(const std::string & cluster_name);
Poco::SharedPtr<Clusters> getClusters() const;
Compiler & getCompiler(); Compiler & getCompiler();

View File

@ -134,13 +134,19 @@ private:
public: public:
/// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд. /// Справочники будут обновляться в отдельном потоке, каждые reload_period секунд.
Dictionaries(const bool throw_on_error, const int reload_period_ = 3600) Dictionaries(const bool throw_on_error, const int reload_period_)
: reload_period(reload_period_), log(&Logger::get("Dictionaries")) : reload_period(reload_period_), log(&Logger::get("Dictionaries"))
{ {
reloadImpl(throw_on_error); reloadImpl(throw_on_error);
reloading_thread = std::thread([this] { reloadPeriodically(); }); reloading_thread = std::thread([this] { reloadPeriodically(); });
} }
Dictionaries(const bool throw_on_error)
: Dictionaries(throw_on_error,
Application::instance().config()
.getInt("builtin_dictionaries_reload_interval", 3600))
{}
~Dictionaries() ~Dictionaries()
{ {
destroy.set(); destroy.set();

View File

@ -2,14 +2,17 @@
#include <DB/Core/QueryProcessingStage.h> #include <DB/Core/QueryProcessingStage.h>
#include <DB/Interpreters/Context.h> #include <DB/Interpreters/Context.h>
#include <DB/Interpreters/ExpressionAnalyzer.h> #include <DB/Interpreters/ExpressionActions.h>
#include <DB/DataStreams/IBlockInputStream.h> #include <DB/DataStreams/IBlockInputStream.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/TableFunctions/ITableFunction.h>
namespace DB namespace DB
{ {
class ExpressionAnalyzer;
class ASTSelectQuery;
class SubqueryForSet;
/** Интерпретирует запрос SELECT. Возвращает поток блоков с результатами выполнения запроса до стадии to_stage. /** Интерпретирует запрос SELECT. Возвращает поток блоков с результатами выполнения запроса до стадии to_stage.
*/ */
class InterpreterSelectQuery class InterpreterSelectQuery
@ -58,6 +61,8 @@ public:
size_t subquery_depth_ = 0, size_t subquery_depth_ = 0,
BlockInputStreamPtr input = nullptr); BlockInputStreamPtr input = nullptr);
~InterpreterSelectQuery();
/** Выполнить запрос, возможно являющиийся цепочкой UNION ALL. /** Выполнить запрос, возможно являющиийся цепочкой UNION ALL.
* Получить поток блоков для чтения * Получить поток блоков для чтения
*/ */
@ -121,9 +126,9 @@ private:
void executeLimit( BlockInputStreams & streams); void executeLimit( BlockInputStreams & streams);
void executeProjection( BlockInputStreams & streams, ExpressionActionsPtr expression); void executeProjection( BlockInputStreams & streams, ExpressionActionsPtr expression);
void executeDistinct( BlockInputStreams & streams, bool before_order, Names columns); void executeDistinct( BlockInputStreams & streams, bool before_order, Names columns);
void executeSubqueriesInSetsAndJoins(BlockInputStreams & streams, SubqueriesForSets & subqueries_for_sets); void executeSubqueriesInSetsAndJoins(BlockInputStreams & streams, std::unordered_map<String, SubqueryForSet> & subqueries_for_sets);
void ignoreWithTotals() { query.group_by_with_totals = false; } void ignoreWithTotals();
ASTPtr query_ptr; ASTPtr query_ptr;
ASTSelectQuery & query; ASTSelectQuery & query;

View File

@ -61,7 +61,7 @@ struct Limits
M(SettingUInt64, max_subquery_depth, 100) \ M(SettingUInt64, max_subquery_depth, 100) \
M(SettingUInt64, max_pipeline_depth, 1000) \ M(SettingUInt64, max_pipeline_depth, 1000) \
M(SettingUInt64, max_ast_depth, 1000) /** Проверяются не во время парсинга, */ \ M(SettingUInt64, max_ast_depth, 1000) /** Проверяются не во время парсинга, */ \
M(SettingUInt64, max_ast_elements, 10000) /** а уже после парсинга запроса. */ \ M(SettingUInt64, max_ast_elements, 50000) /** а уже после парсинга запроса. */ \
\ \
/** 0 - можно всё. 1 - только запросы на чтение. 2 - только запросы на чтение, а также изменение настроек, кроме настройки readonly. */ \ /** 0 - можно всё. 1 - только запросы на чтение. 2 - только запросы на чтение, а также изменение настроек, кроме настройки readonly. */ \
M(SettingUInt64, readonly, 0) \ M(SettingUInt64, readonly, 0) \

View File

@ -120,6 +120,15 @@ struct Settings
\ \
/** В запросе INSERT с указанием столбцов, заполнять значения по-умолчанию только для столбцов с явными DEFAULT-ами. */ \ /** В запросе INSERT с указанием столбцов, заполнять значения по-умолчанию только для столбцов с явными DEFAULT-ами. */ \
M(SettingBool, strict_insert_defaults, 0) \ M(SettingBool, strict_insert_defaults, 0) \
\
/** В случае превышения максимального размера mark_cache, удалять только записи, старше чем mark_cache_min_lifetime секунд. */ \
M(SettingUInt64, mark_cache_min_lifetime, 10000) \
\
/** Позволяет использовать больше источников, чем количество потоков - для более равномерного распределения работы по потокам. \
* Предполагается, что это временное решение, так как можно будет в будущем сделать количество источников равное количеству потоков, \
* но чтобы каждый источник динамически выбирал себе доступную работу. \
*/ \
M(SettingFloat, max_streams_to_max_threads_ratio, 1) \
/// Всевозможные ограничения на выполнение запроса. /// Всевозможные ограничения на выполнение запроса.
Limits limits; Limits limits;

View File

@ -53,8 +53,8 @@ private:
typedef LRUCache<UInt128, MarksInCompressedFile, UInt128TrivialHash, MarksWeightFunction> Base; typedef LRUCache<UInt128, MarksInCompressedFile, UInt128TrivialHash, MarksWeightFunction> Base;
public: public:
MarkCache(size_t max_size_in_bytes) MarkCache(size_t max_size_in_bytes, const Delay & expiration_delay)
: Base(max_size_in_bytes) {} : Base(max_size_in_bytes, expiration_delay) {}
/// Посчитать ключ от пути к файлу и смещения. /// Посчитать ключ от пути к файлу и смещения.
static UInt128 hash(const String & path_to_file) static UInt128 hash(const String & path_to_file)

View File

@ -12,6 +12,7 @@
#include <DB/Core/Exception.h> #include <DB/Core/Exception.h>
#include <DB/Core/ErrorCodes.h> #include <DB/Core/ErrorCodes.h>
#include <DB/IO/WriteHelpers.h> #include <DB/IO/WriteHelpers.h>
#include <Yandex/logger_useful.h>
namespace DB namespace DB
{ {
@ -95,6 +96,8 @@ public:
BackgroundProcessingPool(int size_) : size(size_) BackgroundProcessingPool(int size_) : size(size_)
{ {
LOG_INFO(&Logger::get("BackgroundProcessingPool"), "Create BackgroundProcessingPool with " << size << " threads");
threads.resize(size); threads.resize(size);
for (auto & thread : threads) for (auto & thread : threads)
thread = std::thread([this] { threadFunction(); }); thread = std::thread([this] { threadFunction(); });

View File

@ -75,8 +75,10 @@ public:
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override { name = new_table_name; } void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override { name = new_table_name; }
bool supportsSampling() const override { return true; } bool supportsSampling() const override { return true; }
bool supportsFinal() const override { return true; }
bool supportsPrewhere() const override { return true; } bool supportsPrewhere() const override { return true; }
bool supportsFinal() const override { return true; }
bool supportsIndexForIn() const override { return true; }
bool supportsParallelReplicas() const override { return true; }
/// Структура подчинённой таблицы не проверяется и не изменяется. /// Структура подчинённой таблицы не проверяется и не изменяется.
void alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) override; void alter(const AlterCommands & params, const String & database_name, const String & table_name, Context & context) override;

View File

@ -26,9 +26,11 @@ public:
NameAndTypePair getColumn(const String & column_name) const override; NameAndTypePair getColumn(const String & column_name) const override;
bool hasColumn(const String & column_name) const override; bool hasColumn(const String & column_name) const override;
bool supportsSampling() const override { return data->supportsSampling(); } bool supportsSampling() const override { return data->supportsSampling(); }
bool supportsFinal() const override { return data->supportsFinal(); } bool supportsPrewhere() const override { return data->supportsPrewhere(); }
bool supportsPrewhere() const override { return data->supportsPrewhere(); } bool supportsFinal() const override { return data->supportsFinal(); }
bool supportsIndexForIn() const override { return data->supportsIndexForIn(); }
bool supportsParallelReplicas() const override { return data->supportsParallelReplicas(); }
BlockOutputStreamPtr write(ASTPtr query) override; BlockOutputStreamPtr write(ASTPtr query) override;
void drop() override; void drop() override;

View File

@ -37,12 +37,12 @@ public:
std::string getName() const override { return "Merge"; } std::string getName() const override { return "Merge"; }
std::string getTableName() const override { return name; } std::string getTableName() const override { return name; }
bool supportsSampling() const override { return true; }
/// Проверка откладывается до метода read. Там проверяется поддержка PREWHERE у использующихся таблиц. /// Проверка откладывается до метода read. Там проверяется поддержка PREWHERE у использующихся таблиц.
bool supportsSampling() const override { return true; }
bool supportsPrewhere() const override { return true; } bool supportsPrewhere() const override { return true; }
bool supportsFinal() const override { return true; }
bool supportsParallelReplicas() const override { return true; } bool supportsIndexForIn() const override { return true; }
const NamesAndTypesList & getColumnsListImpl() const override { return *columns; } const NamesAndTypesList & getColumnsListImpl() const override { return *columns; }
NameAndTypePair getColumn(const String &column_name) const override; NameAndTypePair getColumn(const String &column_name) const override;

View File

@ -0,0 +1,41 @@
#pragma once
#include <DB/Storages/IStorage.h>
namespace DB
{
class Context;
/** Реализует системную таблицу columns, которая позволяет получить информацию
* о столбцах каждой таблицы для всех баз данных.
*/
class StorageSystemClusters : public IStorage
{
public:
StorageSystemClusters(const std::string & name_, Context & context_);
static StoragePtr create(const std::string & name_, Context & context_);
std::string getName() const override { return "SystemColumns"; }
std::string getTableName() const override { return name; }
const NamesAndTypesList & getColumnsListImpl() const override { return columns; }
BlockInputStreams read(
const Names & column_names,
ASTPtr query,
const Context & context,
const Settings & settings,
QueryProcessingStage::Enum & processed_stage,
size_t max_block_size = DEFAULT_BLOCK_SIZE,
unsigned threads = 1) override;
private:
StorageSystemClusters(const std::string & name_);
private:
const std::string name;
NamesAndTypesList columns;
Context & context;
};
}

View File

@ -24,7 +24,7 @@ class TableFunctionRemote : public ITableFunction
{ {
public: public:
/// Максимальное количество различных шардов и максимальное количество реплик одного шарда /// Максимальное количество различных шардов и максимальное количество реплик одного шарда
const size_t MAX_ADDRESSES = 200; /// TODO Перенести в Settings. const size_t MAX_ADDRESSES = 1000; /// TODO Перенести в Settings.
std::string getName() const override { return "remote"; } std::string getName() const override { return "remote"; }

View File

@ -15,6 +15,8 @@
#include <DB/AggregateFunctions/AggregateFunctionState.h> #include <DB/AggregateFunctions/AggregateFunctionState.h>
#include <DB/AggregateFunctions/AggregateFunctionMerge.h> #include <DB/AggregateFunctions/AggregateFunctionMerge.h>
#include <DB/AggregateFunctions/AggregateFunctionDebug.h> #include <DB/AggregateFunctions/AggregateFunctionDebug.h>
#include <DB/AggregateFunctions/AggregateFunctionSequenceMatch.h>
#include <DB/AggregateFunctions/AggregateFunctionsStatistics.h>
#include <DB/AggregateFunctions/AggregateFunctionFactory.h> #include <DB/AggregateFunctions/AggregateFunctionFactory.h>
@ -536,6 +538,97 @@ AggregateFunctionPtr AggregateFunctionFactory::get(const String & name, const Da
else else
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
} }
else if (name == "sequenceMatch")
{
if (!AggregateFunctionSequenceMatch::sufficientArgs(argument_types.size()))
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return new AggregateFunctionSequenceMatch;
}
else if (name == "varSamp")
{
if (argument_types.size() != 1)
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
AggregateFunctionPtr res = createWithNumericType<AggregateFunctionVarSamp>(*argument_types[0]);
if (!res)
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
else if (name == "varPop")
{
if (argument_types.size() != 1)
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
AggregateFunctionPtr res = createWithNumericType<AggregateFunctionVarPop>(*argument_types[0]);
if (!res)
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
else if (name == "stddevSamp")
{
if (argument_types.size() != 1)
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
AggregateFunctionPtr res = createWithNumericType<AggregateFunctionStdDevSamp>(*argument_types[0]);
if (!res)
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
else if (name == "stddevPop")
{
if (argument_types.size() != 1)
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
AggregateFunctionPtr res = createWithNumericType<AggregateFunctionStdDevPop>(*argument_types[0]);
if (!res)
throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
else if (name == "covarSamp")
{
if (argument_types.size() != 2)
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
AggregateFunctionPtr res = createWithTwoNumericTypes<AggregateFunctionCovarSamp>(*argument_types[0], *argument_types[1]);
if (!res)
throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName()
+ " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
else if (name == "covarPop")
{
if (argument_types.size() != 2)
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
AggregateFunctionPtr res = createWithTwoNumericTypes<AggregateFunctionCovarPop>(*argument_types[0], *argument_types[1]);
if (!res)
throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName()
+ " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
else if (name == "corr")
{
if (argument_types.size() != 2)
throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
AggregateFunctionPtr res = createWithTwoNumericTypes<AggregateFunctionCorr>(*argument_types[0], *argument_types[1]);
if (!res)
throw Exception("Illegal types " + argument_types[0]->getName() + " and " + argument_types[1]->getName()
+ " of arguments for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return res;
}
else if (recursion_level == 0 && name.size() > strlen("State") && !(strcmp(name.data() + name.size() - strlen("State"), "State"))) else if (recursion_level == 0 && name.size() > strlen("State") && !(strcmp(name.data() + name.size() - strlen("State"), "State")))
{ {
/// Для агрегатных функций вида aggState, где agg - имя другой агрегатной функции. /// Для агрегатных функций вида aggState, где agg - имя другой агрегатной функции.
@ -630,7 +723,15 @@ const AggregateFunctionFactory::FunctionNames & AggregateFunctionFactory::getFun
"quantilesTimingWeighted", "quantilesTimingWeighted",
"medianTimingWeighted", "medianTimingWeighted",
"quantileDeterministic", "quantileDeterministic",
"quantilesDeterministic" "quantilesDeterministic",
"sequenceMatch",
"varSamp",
"varPop",
"stddevSamp",
"stddevPop",
"covarSamp",
"covarPop",
"corr"
}; };
return names; return names;

View File

@ -60,6 +60,7 @@
#include <DB/Common/formatReadable.h> #include <DB/Common/formatReadable.h>
#include <DB/Columns/ColumnString.h> #include <DB/Columns/ColumnString.h>
#include <statdaemons/NetException.h>
/// http://en.wikipedia.org/wiki/ANSI_escape_code /// http://en.wikipedia.org/wiki/ANSI_escape_code
#define SAVE_CURSOR_POSITION "\033[s" #define SAVE_CURSOR_POSITION "\033[s"
@ -827,7 +828,7 @@ private:
return false; return false;
default: default:
throw Exception("Unexpected packet from server (expected Data, got " throw NetException("Unexpected packet from server (expected Data, got "
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
} }
} }

View File

@ -21,6 +21,8 @@
#include <DB/Client/Connection.h> #include <DB/Client/Connection.h>
#include <statdaemons/NetException.h>
namespace DB namespace DB
{ {
@ -58,14 +60,14 @@ void Connection::connect()
disconnect(); disconnect();
/// Добавляем в сообщение адрес сервера. Также объект Exception запомнит stack trace. Жаль, что более точный тип исключения теряется. /// Добавляем в сообщение адрес сервера. Также объект Exception запомнит stack trace. Жаль, что более точный тип исключения теряется.
throw Exception(e.displayText(), "(" + getServerAddress() + ")", ErrorCodes::NETWORK_ERROR); throw NetException(e.displayText(), "(" + getServerAddress() + ")", ErrorCodes::NETWORK_ERROR);
} }
catch (Poco::TimeoutException & e) catch (Poco::TimeoutException & e)
{ {
disconnect(); disconnect();
/// Добавляем в сообщение адрес сервера. Также объект Exception запомнит stack trace. Жаль, что более точный тип исключения теряется. /// Добавляем в сообщение адрес сервера. Также объект Exception запомнит stack trace. Жаль, что более точный тип исключения теряется.
throw Exception(e.displayText(), "(" + getServerAddress() + ")", ErrorCodes::SOCKET_TIMEOUT); throw NetException(e.displayText(), "(" + getServerAddress() + ")", ErrorCodes::SOCKET_TIMEOUT);
} }
} }
@ -125,7 +127,7 @@ void Connection::receiveHello()
/// Закроем соединение, чтобы не было рассинхронизации. /// Закроем соединение, чтобы не было рассинхронизации.
disconnect(); disconnect();
throw Exception("Unexpected packet from server " + getServerAddress() + " (expected Hello or Exception, got " throw NetException("Unexpected packet from server " + getServerAddress() + " (expected Hello or Exception, got "
+ String(Protocol::Server::toString(packet_type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER); + String(Protocol::Server::toString(packet_type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
} }
} }

View File

@ -1,8 +1,75 @@
#include <DB/Common/LRUCache.h> #include <DB/Common/LRUCache.h>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <thread>
#include <chrono>
using namespace DB; namespace
{
void run();
void runTest(unsigned int num, const std::function<bool()> func);
bool test1();
bool test2();
#define ASSERT_CHECK(cond, res) \
do \
{ \
if (!(cond)) \
{ \
std::cout << __FILE__ << ":" << __LINE__ << ":" \
<< "Assertion " << #cond << " failed.\n"; \
if ((res)) { (res) = false; } \
} \
} \
while (0)
void run()
{
const std::vector<std::function<bool()> > tests =
{
test1,
test2
};
unsigned int num = 0;
for (const auto & test : tests)
{
++num;
runTest(num, test);
}
}
void runTest(unsigned int num, const std::function<bool()> func)
{
bool ok;
try
{
ok = func();
}
catch (const DB::Exception & ex)
{
ok = false;
std::cout << "Caught exception " << ex.displayText() << "\n";
}
catch (const std::exception & ex)
{
ok = false;
std::cout << "Caught exception " << ex.what() << "\n";
}
catch (...)
{
ok = false;
std::cout << "Caught unhandled exception\n";
}
if (ok)
std::cout << "Test " << num << " passed\n";
else
std::cout << "Test " << num << " failed\n";
}
struct Weight struct Weight
{ {
@ -12,52 +79,108 @@ struct Weight
} }
}; };
void fail() bool test1()
{ {
std::cout << "failed" << std::endl; using Cache = DB::LRUCache<std::string, std::string, std::hash<std::string>, Weight>;
exit(1); using MappedPtr = Cache::MappedPtr;
auto ptr = [](const std::string & s)
{
return MappedPtr(new std::string(s));
};
Cache cache(10);
bool res = true;
ASSERT_CHECK(!cache.get("asd"), res);
cache.set("asd", ptr("qwe"));
ASSERT_CHECK((*cache.get("asd") == "qwe"), res);
cache.set("zxcv", ptr("12345"));
cache.set("01234567891234567", ptr("--"));
ASSERT_CHECK((*cache.get("zxcv") == "12345"), res);
ASSERT_CHECK((*cache.get("asd") == "qwe"), res);
ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res);
ASSERT_CHECK(!cache.get("123x"), res);
cache.set("321x", ptr("+"));
ASSERT_CHECK(!cache.get("zxcv"), res);
ASSERT_CHECK((*cache.get("asd") == "qwe"), res);
ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res);
ASSERT_CHECK(!cache.get("123x"), res);
ASSERT_CHECK((*cache.get("321x") == "+"), res);
ASSERT_CHECK((cache.weight() == 6), res);
ASSERT_CHECK((cache.count() == 3), res);
return res;
} }
typedef LRUCache<std::string, std::string, std::hash<std::string>, Weight> Cache; bool test2()
typedef Cache::MappedPtr MappedPtr;
MappedPtr ptr(const std::string & s)
{ {
return MappedPtr(new std::string(s)); using namespace std::literals;
using Cache = DB::LRUCache<std::string, std::string, std::hash<std::string>, Weight>;
using MappedPtr = Cache::MappedPtr;
auto ptr = [](const std::string & s)
{
return MappedPtr(new std::string(s));
};
Cache cache(10, 3s);
bool res = true;
ASSERT_CHECK(!cache.get("asd"), res);
cache.set("asd", ptr("qwe"));
ASSERT_CHECK((*cache.get("asd") == "qwe"), res);
cache.set("zxcv", ptr("12345"));
cache.set("01234567891234567", ptr("--"));
ASSERT_CHECK((*cache.get("zxcv") == "12345"), res);
ASSERT_CHECK((*cache.get("asd") == "qwe"), res);
ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res);
ASSERT_CHECK(!cache.get("123x"), res);
cache.set("321x", ptr("+"));
ASSERT_CHECK((cache.get("zxcv")), res);
ASSERT_CHECK((*cache.get("asd") == "qwe"), res);
ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res);
ASSERT_CHECK(!cache.get("123x"), res);
ASSERT_CHECK((*cache.get("321x") == "+"), res);
ASSERT_CHECK((cache.weight() == 11), res);
ASSERT_CHECK((cache.count() == 4), res);
std::this_thread::sleep_for(5s);
cache.set("123x", ptr("2769"));
ASSERT_CHECK(!cache.get("zxcv"), res);
ASSERT_CHECK((*cache.get("asd") == "qwe"), res);
ASSERT_CHECK((*cache.get("01234567891234567") == "--"), res);
ASSERT_CHECK((*cache.get("321x") == "+"), res);
ASSERT_CHECK((cache.weight() == 10), res);
ASSERT_CHECK((cache.count() == 4), res);
return res;
}
} }
int main() int main()
{ {
try run();
{
Cache cache(10);
if (cache.get("asd")) fail();
cache.set("asd", ptr("qwe"));
if (*cache.get("asd") != "qwe") fail();
cache.set("zxcv", ptr("12345"));
cache.set("01234567891234567", ptr("--"));
if (*cache.get("zxcv") != "12345") fail();
if (*cache.get("asd") != "qwe") fail();
if (*cache.get("01234567891234567") != "--") fail();
if (cache.get("123x")) fail();
cache.set("321x", ptr("+"));
if (cache.get("zxcv")) fail();
if (*cache.get("asd") != "qwe") fail();
if (*cache.get("01234567891234567") != "--") fail();
if (cache.get("123x")) fail();
if (*cache.get("321x") != "+") fail();
if (cache.weight() != 6) fail();
if (cache.count() != 3) fail();
std::cout << "passed" << std::endl;
}
catch (...)
{
fail();
}
return 0; return 0;
} }

View File

@ -61,16 +61,16 @@ Block AggregatingSortedBlockInputStream::readImpl()
columns_to_aggregate[i] = typeid_cast<ColumnAggregateFunction *>(merged_columns[column_numbers_to_aggregate[i]]); columns_to_aggregate[i] = typeid_cast<ColumnAggregateFunction *>(merged_columns[column_numbers_to_aggregate[i]]);
if (has_collation) if (has_collation)
merge(merged_block, merged_columns, queue_with_collation); merge(merged_columns, queue_with_collation);
else else
merge(merged_block, merged_columns, queue); merge(merged_columns, queue);
return merged_block; return merged_block;
} }
template<class TSortCursor> template<class TSortCursor>
void AggregatingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue) void AggregatingSortedBlockInputStream::merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue)
{ {
size_t merged_rows = 0; size_t merged_rows = 0;
@ -81,13 +81,15 @@ void AggregatingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainP
setPrimaryKey(next_key, current); setPrimaryKey(next_key, current);
bool key_differs = next_key != current_key;
/// если накопилось достаточно строк и последняя посчитана полностью /// если накопилось достаточно строк и последняя посчитана полностью
if (next_key != current_key && merged_rows >= max_block_size) if (key_differs && merged_rows >= max_block_size)
return; return;
queue.pop(); queue.pop();
if (next_key != current_key) if (key_differs)
{ {
current_key = std::move(next_key); current_key = std::move(next_key);
next_key.resize(description.size()); next_key.resize(description.size());

View File

@ -104,15 +104,15 @@ Block CollapsingSortedBlockInputStream::readImpl()
} }
if (has_collation) if (has_collation)
merge(merged_block, merged_columns, queue_with_collation); merge(merged_columns, queue_with_collation);
else else
merge(merged_block, merged_columns, queue); merge(merged_columns, queue);
return merged_block; return merged_block;
} }
template<class TSortCursor> template<class TSortCursor>
void CollapsingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue) void CollapsingSortedBlockInputStream::merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue)
{ {
size_t merged_rows = 0; size_t merged_rows = 0;
@ -120,12 +120,22 @@ void CollapsingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPt
while (!queue.empty()) while (!queue.empty())
{ {
TSortCursor current = queue.top(); TSortCursor current = queue.top();
queue.pop();
Int8 sign = get<Int64>((*current->all_columns[sign_column_number])[current->pos]); Int8 sign = get<Int64>((*current->all_columns[sign_column_number])[current->pos]);
setPrimaryKey(next_key, current); setPrimaryKey(next_key, current);
if (next_key != current_key) bool key_differs = next_key != current_key;
/// если накопилось достаточно строк и последняя посчитана полностью
if (key_differs && merged_rows >= max_block_size)
{
++blocks_written;
return;
}
queue.pop();
if (key_differs)
{ {
/// Запишем данные для предыдущего визита. /// Запишем данные для предыдущего визита.
insertRows(merged_columns, merged_rows); insertRows(merged_columns, merged_rows);
@ -168,12 +178,6 @@ void CollapsingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPt
/// Достаём из соответствующего источника следующий блок, если есть. /// Достаём из соответствующего источника следующий блок, если есть.
fetchNextBlock(current, queue); fetchNextBlock(current, queue);
} }
if (merged_rows >= max_block_size)
{
++blocks_written;
return;
}
} }
/// Запишем данные для последнего визита. /// Запишем данные для последнего визита.

View File

@ -32,16 +32,22 @@ Block DistinctBlockInputStream::readImpl()
size_t rows = block.rows(); size_t rows = block.rows();
size_t columns = columns_names.empty() ? block.columns() : columns_names.size(); size_t columns = columns_names.empty() ? block.columns() : columns_names.size();
ConstColumnPlainPtrs column_ptrs(columns); ConstColumnPlainPtrs column_ptrs;
column_ptrs.reserve(columns);
for (size_t i = 0; i < columns; ++i) for (size_t i = 0; i < columns; ++i)
{ {
if (columns_names.empty()) auto & column = columns_names.empty()
column_ptrs[i] = block.getByPosition(i).column; ? block.getByPosition(i).column
else : block.getByName(columns_names[i]).column;
column_ptrs[i] = block.getByName(columns_names[i]).column;
/// Игнорируем все константные столбцы.
if (!column->isConst())
column_ptrs.emplace_back(column.get());
} }
columns = column_ptrs.size();
/// Будем фильтровать блок, оставляя там только строки, которых мы ещё не видели. /// Будем фильтровать блок, оставляя там только строки, которых мы ещё не видели.
IColumn::Filter filter(rows); IColumn::Filter filter(rows);

View File

@ -248,7 +248,14 @@ void IProfilingBlockInputStream::progressImpl(const Progress & value)
ErrorCodes::TOO_MUCH_ROWS); ErrorCodes::TOO_MUCH_ROWS);
} }
else if (limits.read_overflow_mode == OverflowMode::BREAK) else if (limits.read_overflow_mode == OverflowMode::BREAK)
cancel(); {
/// Для break будем останавливаться только если действительно было прочитано столько строк, а не только предполагается к чтению.
if ((limits.max_rows_to_read && rows_processed > limits.max_rows_to_read)
|| (limits.max_bytes_to_read && bytes_processed > limits.max_bytes_to_read))
{
cancel();
}
}
else else
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR); throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
} }

View File

@ -111,15 +111,15 @@ Block MergingSortedBlockInputStream::readImpl()
return Block(); return Block();
if (has_collation) if (has_collation)
merge(merged_block, merged_columns, queue_with_collation); merge(merged_columns, queue_with_collation);
else else
merge(merged_block, merged_columns, queue); merge(merged_columns, queue);
return merged_block; return merged_block;
} }
template <typename TSortCursor> template <typename TSortCursor>
void MergingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue) void MergingSortedBlockInputStream::merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue)
{ {
size_t merged_rows = 0; size_t merged_rows = 0;

View File

@ -142,16 +142,16 @@ Block SummingSortedBlockInputStream::readImpl()
} }
if (has_collation) if (has_collation)
merge(merged_block, merged_columns, queue_with_collation); merge(merged_columns, queue_with_collation);
else else
merge(merged_block, merged_columns, queue); merge(merged_columns, queue);
return merged_block; return merged_block;
} }
template<class TSortCursor> template<class TSortCursor>
void SummingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue) void SummingSortedBlockInputStream::merge(ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue)
{ {
size_t merged_rows = 0; size_t merged_rows = 0;
@ -159,11 +159,18 @@ void SummingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs
while (!queue.empty()) while (!queue.empty())
{ {
TSortCursor current = queue.top(); TSortCursor current = queue.top();
queue.pop();
setPrimaryKey(next_key, current); setPrimaryKey(next_key, current);
if (next_key != current_key) bool key_differs = next_key != current_key;
/// если накопилось достаточно строк и последняя посчитана полностью
if (key_differs && merged_rows >= max_block_size)
return;
queue.pop();
if (key_differs)
{ {
/// Запишем данные для предыдущей группы. /// Запишем данные для предыдущей группы.
if (!current_key[0].isNull() && !current_row_is_zero) if (!current_key[0].isNull() && !current_row_is_zero)
@ -194,9 +201,6 @@ void SummingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs
/// Достаём из соответствующего источника следующий блок, если есть. /// Достаём из соответствующего источника следующий блок, если есть.
fetchNextBlock(current, queue); fetchNextBlock(current, queue);
} }
if (merged_rows >= max_block_size)
return;
} }
/// Запишем данные для последней группы, если она ненулевая. /// Запишем данные для последней группы, если она ненулевая.

View File

@ -1,5 +1,6 @@
#include <math.h> #include <math.h>
#include <DB/Functions/NumberTraits.h>
#include <DB/Functions/FunctionFactory.h> #include <DB/Functions/FunctionFactory.h>
#include <DB/Functions/FunctionsArithmetic.h> #include <DB/Functions/FunctionsArithmetic.h>
#include <DB/Functions/FunctionsMiscellaneous.h> #include <DB/Functions/FunctionsMiscellaneous.h>
@ -308,6 +309,71 @@ void FunctionVisibleWidth::execute(Block & block, const ColumnNumbers & argument
ErrorCodes::ILLEGAL_COLUMN); ErrorCodes::ILLEGAL_COLUMN);
} }
/// TODO: Убрать copy-paste из FunctionsConditional.h
template <typename T>
struct DataTypeFromFieldTypeOrError
{
static DataTypePtr getDataType()
{
return new typename DataTypeFromFieldType<T>::Type;
}
};
template <>
struct DataTypeFromFieldTypeOrError<NumberTraits::Error>
{
static DataTypePtr getDataType()
{
return nullptr;
}
};
template <typename T1, typename T2>
DataTypePtr getSmallestCommonNumericTypeImpl()
{
using ResultType = typename NumberTraits::ResultOfIf<T1, T2>::Type;
auto type_res = DataTypeFromFieldTypeOrError<ResultType>::getDataType();
if (!type_res)
throw Exception("Types " + TypeName<T1>::get() + " and " + TypeName<T2>::get()
+ " are not upscalable to a common type without loss of precision", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return type_res;
}
template <typename T1>
DataTypePtr getSmallestCommonNumericTypeLeft(const IDataType & t2)
{
if (typeid_cast<const DataTypeUInt8 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, UInt8>();
if (typeid_cast<const DataTypeUInt16 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, UInt16>();
if (typeid_cast<const DataTypeUInt32 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, UInt32>();
if (typeid_cast<const DataTypeUInt64 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, UInt64>();
if (typeid_cast<const DataTypeInt8 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, Int8>();
if (typeid_cast<const DataTypeInt16 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, Int16>();
if (typeid_cast<const DataTypeInt32 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, Int32>();
if (typeid_cast<const DataTypeInt64 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, Int64>();
if (typeid_cast<const DataTypeFloat32 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, Float32>();
if (typeid_cast<const DataTypeFloat64 *>(&t2)) return getSmallestCommonNumericTypeImpl<T1, Float64>();
throw Exception("Logical error: not a numeric type passed to function getSmallestCommonNumericType", ErrorCodes::LOGICAL_ERROR);
}
DataTypePtr getSmallestCommonNumericType(const IDataType & t1, const IDataType & t2)
{
if (typeid_cast<const DataTypeUInt8 *>(&t1)) return getSmallestCommonNumericTypeLeft<UInt8>(t2);
if (typeid_cast<const DataTypeUInt16 *>(&t1)) return getSmallestCommonNumericTypeLeft<UInt16>(t2);
if (typeid_cast<const DataTypeUInt32 *>(&t1)) return getSmallestCommonNumericTypeLeft<UInt32>(t2);
if (typeid_cast<const DataTypeUInt64 *>(&t1)) return getSmallestCommonNumericTypeLeft<UInt64>(t2);
if (typeid_cast<const DataTypeInt8 *>(&t1)) return getSmallestCommonNumericTypeLeft<Int8>(t2);
if (typeid_cast<const DataTypeInt16 *>(&t1)) return getSmallestCommonNumericTypeLeft<Int16>(t2);
if (typeid_cast<const DataTypeInt32 *>(&t1)) return getSmallestCommonNumericTypeLeft<Int32>(t2);
if (typeid_cast<const DataTypeInt64 *>(&t1)) return getSmallestCommonNumericTypeLeft<Int64>(t2);
if (typeid_cast<const DataTypeFloat32 *>(&t1)) return getSmallestCommonNumericTypeLeft<Float32>(t2);
if (typeid_cast<const DataTypeFloat64 *>(&t1)) return getSmallestCommonNumericTypeLeft<Float64>(t2);
throw Exception("Logical error: not a numeric type passed to function getSmallestCommonNumericType", ErrorCodes::LOGICAL_ERROR);
}
} }
@ -337,6 +403,8 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
factory.registerFunction<FunctionIsFinite>(); factory.registerFunction<FunctionIsFinite>();
factory.registerFunction<FunctionIsInfinite>(); factory.registerFunction<FunctionIsInfinite>();
factory.registerFunction<FunctionIsNaN>(); factory.registerFunction<FunctionIsNaN>();
factory.registerFunction<FunctionTransform>();
} }
} }

View File

@ -9,6 +9,9 @@ void registerFunctionsRound(FunctionFactory & factory)
factory.registerFunction<FunctionRoundToExp2>(); factory.registerFunction<FunctionRoundToExp2>();
factory.registerFunction<FunctionRoundDuration>(); factory.registerFunction<FunctionRoundDuration>();
factory.registerFunction<FunctionRoundAge>(); factory.registerFunction<FunctionRoundAge>();
factory.registerFunction<FunctionRound>();
factory.registerFunction<FunctionCeil>();
factory.registerFunction<FunctionFloor>();
} }
} }

View File

@ -28,8 +28,6 @@ ReadBufferAIO::ReadBufferAIO(const std::string & filename_, size_t buffer_size_,
auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE;
throwFromErrno("Cannot open file " + filename, error_code); throwFromErrno("Cannot open file " + filename, error_code);
} }
::memset(&request, 0, sizeof(request));
} }
ReadBufferAIO::~ReadBufferAIO() ReadBufferAIO::~ReadBufferAIO()

View File

@ -4,6 +4,7 @@
#include <DB/Core/ErrorCodes.h> #include <DB/Core/ErrorCodes.h>
#include <DB/IO/ReadBufferFromPocoSocket.h> #include <DB/IO/ReadBufferFromPocoSocket.h>
#include <statdaemons/NetException.h>
namespace DB namespace DB
@ -20,19 +21,19 @@ bool ReadBufferFromPocoSocket::nextImpl()
} }
catch (const Poco::Net::NetException & e) catch (const Poco::Net::NetException & e)
{ {
throw Exception(e.displayText(), "while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); throw NetException(e.displayText(), "while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
} }
catch (const Poco::TimeoutException & e) catch (const Poco::TimeoutException & e)
{ {
throw Exception("Timeout exceeded while reading from socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT); throw NetException("Timeout exceeded while reading from socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT);
} }
catch (const Poco::IOException & e) catch (const Poco::IOException & e)
{ {
throw Exception(e.displayText(), "while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); throw NetException(e.displayText(), "while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
} }
if (bytes_read < 0) if (bytes_read < 0)
throw Exception("Cannot read from socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); throw NetException("Cannot read from socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET);
if (bytes_read) if (bytes_read)
working_buffer.resize(bytes_read); working_buffer.resize(bytes_read);

View File

@ -34,10 +34,6 @@ WriteBufferAIO::WriteBufferAIO(const std::string & filename_, size_t buffer_size
auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE;
throwFromErrno("Cannot open file " + filename, error_code); throwFromErrno("Cannot open file " + filename, error_code);
} }
ProfileEvents::increment(ProfileEvents::FileOpen);
::memset(&request, 0, sizeof(request));
} }
WriteBufferAIO::~WriteBufferAIO() WriteBufferAIO::~WriteBufferAIO()
@ -63,15 +59,6 @@ off_t WriteBufferAIO::getPositionInFile()
return seek(0, SEEK_CUR); return seek(0, SEEK_CUR);
} }
void WriteBufferAIO::truncate(off_t length)
{
flush();
int res = ::ftruncate(fd, length);
if (res == -1)
throwFromErrno("Cannot truncate file " + filename, ErrorCodes::CANNOT_TRUNCATE_FILE);
}
void WriteBufferAIO::sync() void WriteBufferAIO::sync()
{ {
flush(); flush();
@ -142,6 +129,15 @@ off_t WriteBufferAIO::doSeek(off_t off, int whence)
return pos_in_file; return pos_in_file;
} }
void WriteBufferAIO::doTruncate(off_t length)
{
flush();
int res = ::ftruncate(fd, length);
if (res == -1)
throwFromErrno("Cannot truncate file " + filename, ErrorCodes::CANNOT_TRUNCATE_FILE);
}
void WriteBufferAIO::flush() void WriteBufferAIO::flush()
{ {
next(); next();
@ -342,14 +338,21 @@ void WriteBufferAIO::prepare()
if (read_count < 0) if (read_count < 0)
throw Exception("Read error", ErrorCodes::AIO_READ_ERROR); throw Exception("Read error", ErrorCodes::AIO_READ_ERROR);
Position truncation_begin;
off_t offset = DEFAULT_AIO_FILE_BLOCK_SIZE - region_right_padding; off_t offset = DEFAULT_AIO_FILE_BLOCK_SIZE - region_right_padding;
if (read_count > offset) if (read_count > offset)
{ {
::memcpy(buffer_end, memory_page + offset, read_count - offset); ::memcpy(buffer_end, memory_page + offset, read_count - offset);
truncation_begin = buffer_end + (read_count - offset);
truncation_count = DEFAULT_AIO_FILE_BLOCK_SIZE - read_count; truncation_count = DEFAULT_AIO_FILE_BLOCK_SIZE - read_count;
} }
else else
{
truncation_begin = buffer_end;
truncation_count = region_right_padding; truncation_count = region_right_padding;
}
::memset(truncation_begin, 0, truncation_count);
} }
} }
} }

View File

@ -17,4 +17,9 @@ off_t WriteBufferFromFileBase::seek(off_t off, int whence)
return doSeek(off, whence); return doSeek(off, whence);
} }
void WriteBufferFromFileBase::truncate(off_t length)
{
return doTruncate(length);
}
} }

View File

@ -4,6 +4,7 @@
#include <DB/Core/ErrorCodes.h> #include <DB/Core/ErrorCodes.h>
#include <DB/IO/WriteBufferFromPocoSocket.h> #include <DB/IO/WriteBufferFromPocoSocket.h>
#include <statdaemons/NetException.h>
namespace DB namespace DB
@ -26,19 +27,19 @@ void WriteBufferFromPocoSocket::nextImpl()
} }
catch (const Poco::Net::NetException & e) catch (const Poco::Net::NetException & e)
{ {
throw Exception(e.displayText() + " while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); throw NetException(e.displayText() + " while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
} }
catch (const Poco::TimeoutException & e) catch (const Poco::TimeoutException & e)
{ {
throw Exception("Timeout exceeded while writing to socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT); throw NetException("Timeout exceeded while writing to socket (" + peer_address.toString() + ")", ErrorCodes::SOCKET_TIMEOUT);
} }
catch (const Poco::IOException & e) catch (const Poco::IOException & e)
{ {
throw Exception(e.displayText(), " while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); throw NetException(e.displayText(), " while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR);
} }
if (res < 0) if (res < 0)
throw Exception("Cannot write to socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_WRITE_TO_SOCKET); throw NetException("Cannot write to socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_WRITE_TO_SOCKET);
bytes_written += res; bytes_written += res;
} }
} }

View File

@ -3,7 +3,6 @@
#include <DB/Common/isLocalAddress.h> #include <DB/Common/isLocalAddress.h>
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
#include <Poco/Util/Application.h> #include <Poco/Util/Application.h>
#include <Poco/Net/NetworkInterface.h>
namespace DB namespace DB
{ {
@ -13,8 +12,9 @@ Cluster::Address::Address(const String & config_prefix)
{ {
auto & config = Poco::Util::Application::instance().config(); auto & config = Poco::Util::Application::instance().config();
host_name = config.getString(config_prefix + ".host");
host_port = Poco::Net::SocketAddress( host_port = Poco::Net::SocketAddress(
config.getString(config_prefix + ".host"), host_name,
config.getInt(config_prefix + ".port") config.getInt(config_prefix + ".port")
); );
@ -68,6 +68,8 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa
const auto & config_prefix = cluster_name + "."; const auto & config_prefix = cluster_name + ".";
UInt32 current_shard_num = 1;
for (auto it = config_keys.begin(); it != config_keys.end(); ++it) for (auto it = config_keys.begin(); it != config_keys.end(); ++it)
{ {
if (0 == strncmp(it->c_str(), "node", strlen("node"))) if (0 == strncmp(it->c_str(), "node", strlen("node")))
@ -78,12 +80,13 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa
continue; continue;
addresses.emplace_back(prefix); addresses.emplace_back(prefix);
addresses.back().replica_num = 1;
slot_to_shard.insert(std::end(slot_to_shard), weight, shard_info_vec.size()); slot_to_shard.insert(std::end(slot_to_shard), weight, shard_info_vec.size());
if (const auto is_local = isLocal(addresses.back())) if (const auto is_local = isLocal(addresses.back()))
shard_info_vec.push_back({{}, weight, is_local}); shard_info_vec.push_back({{}, current_shard_num, weight, is_local});
else else
shard_info_vec.push_back({{addressToDirName(addresses.back())}, weight, is_local}); shard_info_vec.push_back({{addressToDirName(addresses.back())}, current_shard_num, weight, is_local});
} }
else if (0 == strncmp(it->c_str(), "shard", strlen("shard"))) else if (0 == strncmp(it->c_str(), "shard", strlen("shard")))
{ {
@ -92,6 +95,7 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa
addresses_with_failover.emplace_back(); addresses_with_failover.emplace_back();
Addresses & replica_addresses = addresses_with_failover.back(); Addresses & replica_addresses = addresses_with_failover.back();
UInt32 current_replica_num = 1;
const auto & partial_prefix = config_prefix + *it + "."; const auto & partial_prefix = config_prefix + *it + ".";
const auto weight = config.getInt(partial_prefix + ".weight", 1); const auto weight = config.getInt(partial_prefix + ".weight", 1);
@ -116,6 +120,8 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa
if (0 == strncmp(jt->c_str(), "replica", strlen("replica"))) if (0 == strncmp(jt->c_str(), "replica", strlen("replica")))
{ {
replica_addresses.emplace_back(partial_prefix + *jt); replica_addresses.emplace_back(partial_prefix + *jt);
replica_addresses.back().replica_num = current_replica_num;
++current_replica_num;
if (isLocal(replica_addresses.back())) if (isLocal(replica_addresses.back()))
{ {
@ -142,10 +148,12 @@ Cluster::Cluster(const Settings & settings, const DataTypeFactory & data_type_fa
} }
slot_to_shard.insert(std::end(slot_to_shard), weight, shard_info_vec.size()); slot_to_shard.insert(std::end(slot_to_shard), weight, shard_info_vec.size());
shard_info_vec.push_back({std::move(dir_names), weight, num_local_nodes}); shard_info_vec.push_back({std::move(dir_names), current_shard_num, weight, num_local_nodes});
} }
else else
throw Exception("Unknown element in config: " + *it, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); throw Exception("Unknown element in config: " + *it, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
++current_shard_num;
} }
if (!addresses_with_failover.empty() && !addresses.empty()) if (!addresses_with_failover.empty() && !addresses.empty())

View File

@ -191,6 +191,8 @@ void Compiler::compile(
CodeGenerator get_code, CodeGenerator get_code,
ReadyCallback on_ready) ReadyCallback on_ready)
{ {
ProfileEvents::increment(ProfileEvents::CompileAttempt);
std::string prefix = path + "/" + file_name; std::string prefix = path + "/" + file_name;
std::string cpp_file_path = prefix + ".cpp"; std::string cpp_file_path = prefix + ".cpp";
std::string so_file_path = prefix + ".so"; std::string so_file_path = prefix + ".so";
@ -257,6 +259,7 @@ void Compiler::compile(
} }
LOG_INFO(log, "Compiled code " << file_name); LOG_INFO(log, "Compiled code " << file_name);
ProfileEvents::increment(ProfileEvents::CompileSuccess);
on_ready(lib); on_ready(lib);
} }

View File

@ -1,10 +1,10 @@
#include <map> #include <map>
#include <set> #include <set>
#include <chrono>
#include <Poco/SharedPtr.h> #include <Poco/SharedPtr.h>
#include <Poco/Mutex.h> #include <Poco/Mutex.h>
#include <Poco/File.h> #include <Poco/File.h>
#include <Poco/Net/NetworkInterface.h>
#include <Yandex/logger_useful.h> #include <Yandex/logger_useful.h>
@ -730,7 +730,7 @@ void Context::setMarkCache(size_t cache_size_in_bytes)
if (shared->mark_cache) if (shared->mark_cache)
throw Exception("Uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR); throw Exception("Uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR);
shared->mark_cache.reset(new MarkCache(cache_size_in_bytes)); shared->mark_cache.reset(new MarkCache(cache_size_in_bytes, std::chrono::seconds(settings.mark_cache_min_lifetime)));
} }
MarkCachePtr Context::getMarkCache() const MarkCachePtr Context::getMarkCache() const
@ -815,6 +815,12 @@ Cluster & Context::getCluster(const std::string & cluster_name)
throw Poco::Exception("Failed to find cluster with name = " + cluster_name); throw Poco::Exception("Failed to find cluster with name = " + cluster_name);
} }
Poco::SharedPtr<Clusters> Context::getClusters() const
{
if (!shared->clusters)
throw Poco::Exception("Clusters have not been initialized yet.");
return shared->clusters;
}
Compiler & Context::getCompiler() Compiler & Context::getCompiler()
{ {

View File

@ -215,7 +215,7 @@ void ExpressionAction::prepare(Block & sample_block)
void ExpressionAction::execute(Block & block) const void ExpressionAction::execute(Block & block) const
{ {
std::cerr << "executing: " << toString() << std::endl; // std::cerr << "executing: " << toString() << std::endl;
if (type == REMOVE_COLUMN || type == COPY_COLUMN) if (type == REMOVE_COLUMN || type == COPY_COLUMN)
if (!block.has(source_name)) if (!block.has(source_name))
@ -327,7 +327,7 @@ void ExpressionAction::execute(Block & block) const
break; break;
case ADD_COLUMN: case ADD_COLUMN:
block.insert(ColumnWithNameAndType(added_column->cloneResized(block.rows()), result_type, result_name)); block.insert(ColumnWithNameAndType(added_column->cloneResized(block.rowsInFirstColumn()), result_type, result_name));
break; break;
case COPY_COLUMN: case COPY_COLUMN:
@ -608,6 +608,8 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum
void ExpressionActions::finalize(const Names & output_columns) void ExpressionActions::finalize(const Names & output_columns)
{ {
// std::cerr << "finalize\n";
NameSet final_columns; NameSet final_columns;
for (size_t i = 0; i < output_columns.size(); ++i) for (size_t i = 0; i < output_columns.size(); ++i)
{ {
@ -629,7 +631,7 @@ void ExpressionActions::finalize(const Names & output_columns)
unmodified_columns.insert(it->name); unmodified_columns.insert(it->name);
} }
/// Будем идти с конца и поодерживать множество нужных на данном этапе столбцов. /// Будем идти с конца и поддерживать множество нужных на данном этапе столбцов.
/// Будем выбрасывать ненужные действия, хотя обычно их нет по построению. /// Будем выбрасывать ненужные действия, хотя обычно их нет по построению.
for (int i = static_cast<int>(actions.size()) - 1; i >= 0; --i) for (int i = static_cast<int>(actions.size()) - 1; i >= 0; --i)
{ {
@ -691,6 +693,23 @@ void ExpressionActions::finalize(const Names & output_columns)
unmodified_columns.erase(out); unmodified_columns.erase(out);
needed_columns.erase(out); needed_columns.erase(out);
/** Если функция - константное выражение, то заменим действие на добавление столбца-константы - результата.
* То есть, осуществляем constant folding.
*/
if (action.type == ExpressionAction::APPLY_FUNCTION && sample_block.has(out))
{
auto & result = sample_block.getByName(out);
if (!result.column.isNull())
{
action.type = ExpressionAction::ADD_COLUMN;
action.result_type = result.type;
action.added_column = result.column;
action.function = nullptr;
action.argument_names.clear();
in.clear();
}
}
} }
needed_columns.insert(in.begin(), in.end()); needed_columns.insert(in.begin(), in.end());

View File

@ -21,6 +21,7 @@
#include <DB/Interpreters/InterpreterSelectQuery.h> #include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/ExpressionAnalyzer.h> #include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Interpreters/LogicalExpressionsOptimizer.h> #include <DB/Interpreters/LogicalExpressionsOptimizer.h>
#include <DB/Interpreters/ExternalDictionaries.h>
#include <DB/AggregateFunctions/AggregateFunctionFactory.h> #include <DB/AggregateFunctions/AggregateFunctionFactory.h>
@ -32,6 +33,8 @@
#include <DB/DataStreams/LazyBlockInputStream.h> #include <DB/DataStreams/LazyBlockInputStream.h>
#include <DB/DataStreams/copyData.h> #include <DB/DataStreams/copyData.h>
#include <DB/Dictionaries/IDictionary.h>
#include <DB/Common/typeid_cast.h> #include <DB/Common/typeid_cast.h>
#include <DB/Parsers/formatAST.h> #include <DB/Parsers/formatAST.h>
@ -67,6 +70,23 @@ const std::unordered_set<String> injective_function_names
"regionToName", "regionToName",
}; };
const std::unordered_set<String> possibly_injective_function_names
{
"dictGetString",
"dictGetUInt8",
"dictGetUInt16",
"dictGetUInt32",
"dictGetUInt64",
"dictGetInt8",
"dictGetInt16",
"dictGetInt32",
"dictGetInt64",
"dictGetFloat32",
"dictGetFloat64",
"dictGetDate",
"dictGetDateTime"
};
void ExpressionAnalyzer::init() void ExpressionAnalyzer::init()
{ {
select_query = typeid_cast<ASTSelectQuery *>(&*ast); select_query = typeid_cast<ASTSelectQuery *>(&*ast);
@ -328,7 +348,8 @@ void ExpressionAnalyzer::normalizeTree()
/// finished_asts - уже обработанные вершины (и на что они заменены) /// finished_asts - уже обработанные вершины (и на что они заменены)
/// current_asts - вершины в текущем стеке вызовов этого метода /// current_asts - вершины в текущем стеке вызовов этого метода
/// current_alias - алиас, повешенный на предка ast (самого глубокого из предков с алиасами) /// current_alias - алиас, повешенный на предка ast (самого глубокого из предков с алиасами)
void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias) void ExpressionAnalyzer::normalizeTreeImpl(
ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias)
{ {
if (finished_asts.count(ast)) if (finished_asts.count(ast))
{ {
@ -346,25 +367,33 @@ void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_as
/// rewrite правила, которые действуют при обходе сверху-вниз. /// rewrite правила, которые действуют при обходе сверху-вниз.
bool replaced = false; bool replaced = false;
if (ASTFunction * node = typeid_cast<ASTFunction *>(&*ast)) ASTFunction * func_node = typeid_cast<ASTFunction *>(&*ast);
if (func_node)
{ {
/** Нет ли в таблице столбца, название которого полностью совпадает с записью функции? /** Нет ли в таблице столбца, название которого полностью совпадает с записью функции?
* Например, в таблице есть столбец "domain(URL)", и мы запросили domain(URL). * Например, в таблице есть столбец "domain(URL)", и мы запросили domain(URL).
*/ */
String function_string = node->getColumnName(); String function_string = func_node->getColumnName();
NamesAndTypesList::const_iterator it = findColumn(function_string); NamesAndTypesList::const_iterator it = findColumn(function_string);
if (columns.end() != it) if (columns.end() != it)
{ {
ASTIdentifier * ast_id = new ASTIdentifier(node->range, function_string); ASTIdentifier * ast_id = new ASTIdentifier(func_node->range, function_string);
ast = ast_id; ast = ast_id;
current_asts.insert(ast); current_asts.insert(ast);
replaced = true; replaced = true;
} }
/// может быть указано IN t, где t - таблица, что равносильно IN (SELECT * FROM t). /// Может быть указано IN t, где t - таблица, что равносильно IN (SELECT * FROM t).
if (node->name == "in" || node->name == "notIn" || node->name == "globalIn" || node->name == "globalNotIn") if (func_node->name == "in" || func_node->name == "notIn" || func_node->name == "globalIn" || func_node->name == "globalNotIn")
if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(&*node->arguments->children.at(1))) if (ASTIdentifier * right = typeid_cast<ASTIdentifier *>(&*func_node->arguments->children.at(1)))
right->kind = ASTIdentifier::Table; right->kind = ASTIdentifier::Table;
/// А ещё, в качестве исключения, будем понимать count(*) как count(), а не count(список всех столбцов).
if (func_node->name == "count" && func_node->arguments->children.size() == 1
&& typeid_cast<const ASTAsterisk *>(func_node->arguments->children[0].get()))
{
func_node->arguments->children.clear();
}
} }
else if (ASTIdentifier * node = typeid_cast<ASTIdentifier *>(&*ast)) else if (ASTIdentifier * node = typeid_cast<ASTIdentifier *>(&*ast))
{ {
@ -427,10 +456,32 @@ void ExpressionAnalyzer::normalizeTreeImpl(ASTPtr & ast, MapOfASTs & finished_as
} }
/// Рекурсивные вызовы. Не опускаемся в подзапросы. /// Рекурсивные вызовы. Не опускаемся в подзапросы.
/// Также не опускаемся в левый аргумент лямбда-выражений, чтобы не заменять формальные параметры
/// по алиасам в выражениях вида 123 AS x, arrayMap(x -> 1, [2]).
if (func_node && func_node->name == "lambda")
{
/// Пропускаем первый аргумент. Также предполагаем, что у функции lambda не может быть parameters.
for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i)
{
auto & child = func_node->arguments->children[i];
if (typeid_cast<ASTSelectQuery *>(&*child))
continue;
for (auto & child : ast->children)
if (!typeid_cast<ASTSelectQuery *>(&*child))
normalizeTreeImpl(child, finished_asts, current_asts, current_alias); normalizeTreeImpl(child, finished_asts, current_asts, current_alias);
}
}
else
{
for (auto & child : ast->children)
{
if (typeid_cast<ASTSelectQuery *>(&*child))
continue;
normalizeTreeImpl(child, finished_asts, current_asts, current_alias);
}
}
/// Если секция WHERE или HAVING состоит из одного алиаса, ссылку нужно заменить не только в children, но и в where_expression и having_expression. /// Если секция WHERE или HAVING состоит из одного алиаса, ссылку нужно заменить не только в children, но и в where_expression и having_expression.
if (ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(&*ast)) if (ASTSelectQuery * select = typeid_cast<ASTSelectQuery *>(&*ast))
@ -496,10 +547,33 @@ void ExpressionAnalyzer::optimizeGroupBy()
/// iterate over each GROUP BY expression, eliminate injective function calls and literals /// iterate over each GROUP BY expression, eliminate injective function calls and literals
for (size_t i = 0; i < group_exprs.size();) for (size_t i = 0; i < group_exprs.size();)
{ {
if (const auto function = typeid_cast<ASTFunction*>(group_exprs[i].get())) if (const auto function = typeid_cast<ASTFunction *>(group_exprs[i].get()))
{ {
/// assert function is injective /// assert function is injective
if (!injective_function_names.count(function->name)) if (possibly_injective_function_names.count(function->name))
{
/// do not handle semantic errors here
if (function->arguments->children.size() < 2)
{
++i;
continue;
}
const auto & dict_name = typeid_cast<const ASTLiteral &>(*function->arguments->children[0])
.value.safeGet<String>();
const auto & dict_ptr = context.getExternalDictionaries().getDictionary(dict_name);
const auto & attr_name = typeid_cast<const ASTLiteral &>(*function->arguments->children[1])
.value.safeGet<String>();
if (!dict_ptr->isInjective(attr_name))
{
++i;
continue;
}
}
else if (!injective_function_names.count(function->name))
{ {
++i; ++i;
continue; continue;
@ -809,76 +883,76 @@ void ExpressionAnalyzer::makeSet(ASTFunction * node, const Block & sample_block)
/// Случай явного перечисления значений. /// Случай явного перечисления значений.
void ExpressionAnalyzer::makeExplicitSet(ASTFunction * node, const Block & sample_block, bool create_ordered_set) void ExpressionAnalyzer::makeExplicitSet(ASTFunction * node, const Block & sample_block, bool create_ordered_set)
{ {
IAST & args = *node->arguments; IAST & args = *node->arguments;
ASTPtr & arg = args.children.at(1); ASTPtr & arg = args.children.at(1);
DataTypes set_element_types; DataTypes set_element_types;
ASTPtr & left_arg = args.children.at(0); ASTPtr & left_arg = args.children.at(0);
ASTFunction * left_arg_tuple = typeid_cast<ASTFunction *>(&*left_arg); ASTFunction * left_arg_tuple = typeid_cast<ASTFunction *>(&*left_arg);
if (left_arg_tuple && left_arg_tuple->name == "tuple") if (left_arg_tuple && left_arg_tuple->name == "tuple")
{
for (const auto & arg : left_arg_tuple->arguments->children)
{ {
for (const auto & arg : left_arg_tuple->arguments->children) const auto & data_type = sample_block.getByName(arg->getColumnName()).type;
{
const auto & data_type = sample_block.getByName(arg->getColumnName()).type;
/// @note prevent crash in query: SELECT (1, [1]) in (1, 1) /// @note prevent crash in query: SELECT (1, [1]) in (1, 1)
if (const auto array = typeid_cast<const DataTypeArray * >(data_type.get())) if (const auto array = typeid_cast<const DataTypeArray * >(data_type.get()))
throw Exception("Incorrect element of tuple: " + array->getName(), ErrorCodes::INCORRECT_ELEMENT_OF_SET); throw Exception("Incorrect element of tuple: " + array->getName(), ErrorCodes::INCORRECT_ELEMENT_OF_SET);
set_element_types.push_back(data_type); set_element_types.push_back(data_type);
}
} }
}
else
{
DataTypePtr left_type = sample_block.getByName(left_arg->getColumnName()).type;
if (DataTypeArray * array_type = typeid_cast<DataTypeArray *>(&*left_type))
set_element_types.push_back(array_type->getNestedType());
else else
{ set_element_types.push_back(left_type);
DataTypePtr left_type = sample_block.getByName(left_arg->getColumnName()).type; }
if (DataTypeArray * array_type = typeid_cast<DataTypeArray *>(&*left_type))
set_element_types.push_back(array_type->getNestedType());
else
set_element_types.push_back(left_type);
}
/// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)). /// Отличим случай x in (1, 2) от случая x in 1 (он же x in (1)).
bool single_value = false; bool single_value = false;
ASTPtr elements_ast = arg; ASTPtr elements_ast = arg;
if (ASTFunction * set_func = typeid_cast<ASTFunction *>(&*arg)) if (ASTFunction * set_func = typeid_cast<ASTFunction *>(&*arg))
{ {
if (set_func->name != "tuple") if (set_func->name != "tuple")
throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
/// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2).
ASTFunction * any_element = typeid_cast<ASTFunction *>(&*set_func->arguments->children.at(0));
if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple"))
single_value = true;
else
elements_ast = set_func->arguments;
}
else if (typeid_cast<ASTLiteral *>(&*arg))
{
single_value = true;
}
else
{
throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.", throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
if (single_value) /// Отличм случай (x, y) in ((1, 2), (3, 4)) от случая (x, y) in (1, 2).
{ ASTFunction * any_element = typeid_cast<ASTFunction *>(&*set_func->arguments->children.at(0));
ASTPtr exp_list = new ASTExpressionList; if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple"))
exp_list->children.push_back(elements_ast); single_value = true;
elements_ast = exp_list; else
} elements_ast = set_func->arguments;
}
else if (typeid_cast<ASTLiteral *>(&*arg))
{
single_value = true;
}
else
{
throw Exception("Incorrect type of 2nd argument for function " + node->name + ". Must be subquery or set of values.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
ASTSet * ast_set = new ASTSet(arg->getColumnName()); if (single_value)
ASTPtr ast_set_ptr = ast_set; {
ast_set->set = new Set(settings.limits); ASTPtr exp_list = new ASTExpressionList;
ast_set->is_explicit = true; exp_list->children.push_back(elements_ast);
ast_set->set->createFromAST(set_element_types, elements_ast, create_ordered_set); elements_ast = exp_list;
arg = ast_set_ptr; }
ASTSet * ast_set = new ASTSet(arg->getColumnName());
ASTPtr ast_set_ptr = ast_set;
ast_set->set = new Set(settings.limits);
ast_set->is_explicit = true;
ast_set->set->createFromAST(set_element_types, elements_ast, create_ordered_set);
arg = ast_set_ptr;
} }

View File

@ -24,6 +24,7 @@
#include <DB/Parsers/ASTOrderByElement.h> #include <DB/Parsers/ASTOrderByElement.h>
#include <DB/Interpreters/InterpreterSelectQuery.h> #include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Storages/StorageView.h> #include <DB/Storages/StorageView.h>
#include <DB/TableFunctions/ITableFunction.h> #include <DB/TableFunctions/ITableFunction.h>
#include <DB/TableFunctions/TableFunctionFactory.h> #include <DB/TableFunctions/TableFunctionFactory.h>
@ -33,6 +34,9 @@
namespace DB namespace DB
{ {
InterpreterSelectQuery::~InterpreterSelectQuery() = default;
void InterpreterSelectQuery::init(BlockInputStreamPtr input, const Names & required_column_names, const NamesAndTypesList & table_column_names) void InterpreterSelectQuery::init(BlockInputStreamPtr input, const Names & required_column_names, const NamesAndTypesList & table_column_names)
{ {
original_max_threads = settings.max_threads; original_max_threads = settings.max_threads;
@ -269,10 +273,9 @@ DataTypes InterpreterSelectQuery::getReturnTypes()
{ {
DataTypes res; DataTypes res;
NamesAndTypesList columns = query_analyzer->getSelectSampleBlock().getColumnsList(); NamesAndTypesList columns = query_analyzer->getSelectSampleBlock().getColumnsList();
for (NamesAndTypesList::iterator it = columns.begin(); it != columns.end(); ++it) for (auto & column : columns)
{ res.push_back(column.type);
res.push_back(it->type);
}
return res; return res;
} }
@ -499,7 +502,7 @@ void InterpreterSelectQuery::executeSingleQuery()
if (second_stage) if (second_stage)
{ {
bool need_second_distinct_pass = true; bool need_second_distinct_pass = query.distinct;
if (need_aggregate) if (need_aggregate)
{ {
@ -515,7 +518,7 @@ void InterpreterSelectQuery::executeSingleQuery()
executeExpression(streams, before_order_and_select); executeExpression(streams, before_order_and_select);
executeDistinct(streams, true, selected_columns); executeDistinct(streams, true, selected_columns);
need_second_distinct_pass = streams.size() > 1; need_second_distinct_pass = query.distinct && (streams.size() > 1);
} }
else if (query.group_by_with_totals && !aggregate_final) else if (query.group_by_with_totals && !aggregate_final)
{ {
@ -548,6 +551,9 @@ void InterpreterSelectQuery::executeSingleQuery()
if (query.limit_length && streams.size() > 1 && !query.distinct) if (query.limit_length && streams.size() > 1 && !query.distinct)
executePreLimit(streams); executePreLimit(streams);
if (need_second_distinct_pass)
union_within_single_query = true;
if (union_within_single_query) if (union_within_single_query)
executeUnion(streams); executeUnion(streams);
@ -610,6 +616,8 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(BlockInpu
interpreter_subquery = new InterpreterSelectQuery( interpreter_subquery = new InterpreterSelectQuery(
query.table, subquery_context, required_columns, QueryProcessingStage::Complete, subquery_depth + 1); query.table, subquery_context, required_columns, QueryProcessingStage::Complete, subquery_depth + 1);
/// Если во внешнем запросе есть аггрегация, то WITH TOTALS игнорируется в подзапросе.
if (query_analyzer->hasAggregation()) if (query_analyzer->hasAggregation())
interpreter_subquery->ignoreWithTotals(); interpreter_subquery->ignoreWithTotals();
} }
@ -640,9 +648,13 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(BlockInpu
* - эти настройки будут переданы на удалённые серверы при распределённой обработке запроса, * - эти настройки будут переданы на удалённые серверы при распределённой обработке запроса,
* и там должно быть оригинальное значение max_threads, а не увеличенное. * и там должно быть оригинальное значение max_threads, а не увеличенное.
*/ */
bool is_remote = false;
Settings settings_for_storage = settings; Settings settings_for_storage = settings;
if (storage && storage->isRemote()) if (storage && storage->isRemote())
{
is_remote = true;
settings.max_threads = settings.max_distributed_connections; settings.max_threads = settings.max_distributed_connections;
}
/// Ограничение на количество столбцов для чтения. /// Ограничение на количество столбцов для чтения.
if (settings.limits.max_columns_to_read && required_columns.size() > settings.limits.max_columns_to_read) if (settings.limits.max_columns_to_read && required_columns.size() > settings.limits.max_columns_to_read)
@ -657,7 +669,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(BlockInpu
/** Оптимизация - если не указаны DISTINCT, WHERE, GROUP, HAVING, ORDER, но указан LIMIT, и limit + offset < max_block_size, /** Оптимизация - если не указаны DISTINCT, WHERE, GROUP, HAVING, ORDER, но указан LIMIT, и limit + offset < max_block_size,
* то в качестве размера блока будем использовать limit + offset (чтобы не читать из таблицы больше, чем запрошено), * то в качестве размера блока будем использовать limit + offset (чтобы не читать из таблицы больше, чем запрошено),
* а также установим количество потоков в 1 и отменим асинхронное выполнение конвейера запроса. * а также установим количество потоков в 1.
*/ */
if (!query.distinct if (!query.distinct
&& !query.prewhere_expression && !query.prewhere_expression
@ -680,9 +692,15 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(BlockInpu
/// Инициализируем изначальные потоки данных, на которые накладываются преобразования запроса. Таблица или подзапрос? /// Инициализируем изначальные потоки данных, на которые накладываются преобразования запроса. Таблица или подзапрос?
if (!interpreter_subquery) if (!interpreter_subquery)
{ {
size_t max_streams = settings.max_threads;
/// Если надо - запрашиваем больше источников, чем количество потоков - для более равномерного распределения работы по потокам.
if (max_streams > 1 && !is_remote)
max_streams *= settings.max_streams_to_max_threads_ratio;
streams = storage->read(required_columns, query_ptr, streams = storage->read(required_columns, query_ptr,
context, settings_for_storage, from_stage, context, settings_for_storage, from_stage,
settings.max_block_size, settings.max_threads); settings.max_block_size, max_streams);
for (auto & stream : streams) for (auto & stream : streams)
stream->addTableLock(table_lock); stream->addTableLock(table_lock);
@ -1004,4 +1022,10 @@ BlockInputStreamPtr InterpreterSelectQuery::executeAndFormat(WriteBuffer & buf)
} }
void InterpreterSelectQuery::ignoreWithTotals()
{
query.group_by_with_totals = false;
}
} }

View File

@ -73,6 +73,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
if (!s_eq.ignore(pos, end, max_parsed_pos, expected)) if (!s_eq.ignore(pos, end, max_parsed_pos, expected))
return false; return false;
ws.ignore(pos, end);
if (!id_p.parse(pos, end, id, max_parsed_pos, expected)) if (!id_p.parse(pos, end, id, max_parsed_pos, expected))
return false; return false;
} }
@ -82,9 +84,13 @@ bool ParserInsertQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
/// Есть ли список столбцов /// Есть ли список столбцов
if (s_lparen.ignore(pos, end, max_parsed_pos, expected)) if (s_lparen.ignore(pos, end, max_parsed_pos, expected))
{ {
ws.ignore(pos, end);
if (!columns_p.parse(pos, end, columns, max_parsed_pos, expected)) if (!columns_p.parse(pos, end, columns, max_parsed_pos, expected))
return false; return false;
ws.ignore(pos, end); ws.ignore(pos, end);
if (!s_rparen.ignore(pos, end, max_parsed_pos, expected)) if (!s_rparen.ignore(pos, end, max_parsed_pos, expected))
return false; return false;
} }

View File

@ -181,7 +181,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
ws.ignore(pos, end); ws.ignore(pos, end);
} }
/// [GLOBAL] ANY|ALL INNER|LEFT JOIN (subquery) USING (tuple) /// [GLOBAL] ANY|ALL INNER|LEFT JOIN (subquery) USING tuple
join.parse(pos, end, select_query->join, max_parsed_pos, expected); join.parse(pos, end, select_query->join, max_parsed_pos, expected);
if (!parse_final_and_sample()) if (!parse_final_and_sample())

View File

@ -33,6 +33,7 @@
#include <DB/Storages/StorageSystemDictionaries.h> #include <DB/Storages/StorageSystemDictionaries.h>
#include <DB/Storages/StorageSystemColumns.h> #include <DB/Storages/StorageSystemColumns.h>
#include <DB/Storages/StorageSystemFunctions.h> #include <DB/Storages/StorageSystemFunctions.h>
#include <DB/Storages/StorageSystemClusters.h>
#include <DB/IO/copyData.h> #include <DB/IO/copyData.h>
#include <DB/IO/LimitReadBuffer.h> #include <DB/IO/LimitReadBuffer.h>
@ -541,6 +542,7 @@ int Server::main(const std::vector<std::string> & args)
global_context->addTable("system", "dictionaries", StorageSystemDictionaries::create("dictionaries")); global_context->addTable("system", "dictionaries", StorageSystemDictionaries::create("dictionaries"));
global_context->addTable("system", "columns", StorageSystemColumns::create("columns")); global_context->addTable("system", "columns", StorageSystemColumns::create("columns"));
global_context->addTable("system", "functions", StorageSystemFunctions::create("functions")); global_context->addTable("system", "functions", StorageSystemFunctions::create("functions"));
global_context->addTable("system", "clusters", StorageSystemClusters::create("clusters", *global_context));
if (has_zookeeper) if (has_zookeeper)
global_context->addTable("system", "zookeeper", StorageSystemZooKeeper::create("zookeeper")); global_context->addTable("system", "zookeeper", StorageSystemZooKeeper::create("zookeeper"));

View File

@ -27,6 +27,7 @@
#include "TCPHandler.h" #include "TCPHandler.h"
#include <statdaemons/NetException.h>
namespace DB namespace DB
{ {
@ -443,7 +444,7 @@ void TCPHandler::receiveHello()
throw Exception("Client has connected to wrong port", ErrorCodes::CLIENT_HAS_CONNECTED_TO_WRONG_PORT); throw Exception("Client has connected to wrong port", ErrorCodes::CLIENT_HAS_CONNECTED_TO_WRONG_PORT);
} }
else else
throw Exception("Unexpected packet from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); throw NetException("Unexpected packet from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
} }
readStringBinary(client_name, *in); readStringBinary(client_name, *in);
@ -492,13 +493,13 @@ bool TCPHandler::receivePacket()
{ {
case Protocol::Client::Query: case Protocol::Client::Query:
if (!state.empty()) if (!state.empty())
throw Exception("Unexpected packet Query received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); throw NetException("Unexpected packet Query received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
receiveQuery(); receiveQuery();
return true; return true;
case Protocol::Client::Data: case Protocol::Client::Data:
if (state.empty()) if (state.empty())
throw Exception("Unexpected packet Data received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); throw NetException("Unexpected packet Data received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
return receiveData(); return receiveData();
case Protocol::Client::Ping: case Protocol::Client::Ping:
@ -647,13 +648,13 @@ bool TCPHandler::isQueryCancelled()
{ {
case Protocol::Client::Cancel: case Protocol::Client::Cancel:
if (state.empty()) if (state.empty())
throw Exception("Unexpected packet Cancel received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); throw NetException("Unexpected packet Cancel received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
LOG_INFO(log, "Query was cancelled."); LOG_INFO(log, "Query was cancelled.");
state.is_cancelled = true; state.is_cancelled = true;
return true; return true;
default: default:
throw Exception("Unknown packet from client", ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); throw NetException("Unknown packet from client", ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT);
} }
} }

View File

@ -1,6 +1,5 @@
#include <DB/Storages/MergeTree/MergeTreeDataSelectExecutor.h> #include <DB/Storages/MergeTree/MergeTreeDataSelectExecutor.h>
#include <DB/Storages/MergeTree/MergeTreeBlockInputStream.h> #include <DB/Storages/MergeTree/MergeTreeBlockInputStream.h>
#include <DB/Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <DB/Interpreters/ExpressionAnalyzer.h> #include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Parsers/ASTIdentifier.h> #include <DB/Parsers/ASTIdentifier.h>
#include <DB/DataStreams/ExpressionBlockInputStream.h> #include <DB/DataStreams/ExpressionBlockInputStream.h>
@ -63,13 +62,6 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
if (real_column_names.empty()) if (real_column_names.empty())
real_column_names.push_back(ExpressionActions::getSmallestColumn(data.getColumnsList())); real_column_names.push_back(ExpressionActions::getSmallestColumn(data.getColumnsList()));
ASTSelectQuery & select = *typeid_cast<ASTSelectQuery*>(&*query);
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere)
if (select.where_expression && !select.prewhere_expression)
MergeTreeWhereOptimizer{select, data, column_names_to_return, log};
Block virtual_columns_block = getBlockWithVirtualColumns(parts); Block virtual_columns_block = getBlockWithVirtualColumns(parts);
/// Если запрошен хотя бы один виртуальный столбец, пробуем индексировать /// Если запрошен хотя бы один виртуальный столбец, пробуем индексировать
@ -114,6 +106,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::read(
ExpressionActionsPtr filter_expression; ExpressionActionsPtr filter_expression;
double relative_sample_size = 0; double relative_sample_size = 0;
ASTSelectQuery & select = *typeid_cast<ASTSelectQuery*>(&*query);
if (select.sample_size) if (select.sample_size)
{ {
relative_sample_size = apply_visitor(FieldVisitorConvertToNumber<double>(), relative_sample_size = apply_visitor(FieldVisitorConvertToNumber<double>(),

View File

@ -164,18 +164,21 @@ public:
{ {
destination = storage.context.tryGetTable(storage.destination_database, storage.destination_table); destination = storage.context.tryGetTable(storage.destination_database, storage.destination_table);
if (destination.get() == &storage) if (destination)
throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP); {
if (destination.get() == &storage)
throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP);
/// Проверяем структуру таблицы. /// Проверяем структуру таблицы.
try try
{ {
destination->check(block, true); destination->check(block, true);
} }
catch (Exception & e) catch (Exception & e)
{ {
e.addMessage("(when looking at destination table " + storage.destination_database + "." + storage.destination_table + ")"); e.addMessage("(when looking at destination table " + storage.destination_database + "." + storage.destination_table + ")");
throw; throw;
}
} }
} }

View File

@ -7,9 +7,11 @@
#include <DB/Storages/Distributed/DirectoryMonitor.h> #include <DB/Storages/Distributed/DirectoryMonitor.h>
#include <DB/Common/escapeForFileName.h> #include <DB/Common/escapeForFileName.h>
#include <DB/Parsers/ASTInsertQuery.h> #include <DB/Parsers/ASTInsertQuery.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Interpreters/InterpreterSelectQuery.h> #include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Interpreters/InterpreterAlterQuery.h> #include <DB/Interpreters/InterpreterAlterQuery.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Core/Field.h> #include <DB/Core/Field.h>

View File

@ -287,18 +287,17 @@ StoragePtr StorageFactory::get(
*/ */
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children; ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
const auto params_error_message = "Storage Distributed requires 3 or 4 parameters"
" - name of configuration section with list of remote servers, name of remote database, name of remote table,"
" sharding key expression (optional).";
if (args_func.size() != 1) if (args_func.size() != 1)
throw Exception("Storage Distributed requires 3 parameters" throw Exception(params_error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
" - name of configuration section with list of remote servers, name of remote database, name of remote table.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children; ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
if (args.size() != 3 && args.size() != 4) if (args.size() != 3 && args.size() != 4)
throw Exception("Storage Distributed requires 3 or 4 parameters" throw Exception(params_error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
" - name of configuration section with list of remote servers, name of remote database, name of remote table,"
" sharding key expression (optional).",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
String cluster_name = typeid_cast<ASTIdentifier &>(*args[0]).name; String cluster_name = typeid_cast<ASTIdentifier &>(*args[0]).name;
String remote_database = reinterpretAsIdentifier(args[1], local_context).name; String remote_database = reinterpretAsIdentifier(args[1], local_context).name;
@ -433,7 +432,7 @@ SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType
ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192) ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
For further info please read the documentation: http://clickhouse.yandex-team.ru/ For further info please read the documentation: https://clickhouse.yandex-team.ru/
)"; )";
String name_part = name.substr(0, name.size() - strlen("MergeTree")); String name_part = name.substr(0, name.size() - strlen("MergeTree"));

View File

@ -2,6 +2,7 @@
#include <DB/Storages/StorageMerge.h> #include <DB/Storages/StorageMerge.h>
#include <DB/Common/VirtualColumnUtils.h> #include <DB/Common/VirtualColumnUtils.h>
#include <DB/Interpreters/InterpreterAlterQuery.h> #include <DB/Interpreters/InterpreterAlterQuery.h>
#include <DB/Interpreters/ExpressionActions.h>
#include <DB/Storages/VirtualColumnFactory.h> #include <DB/Storages/VirtualColumnFactory.h>
#include <DB/Parsers/ASTSelectQuery.h> #include <DB/Parsers/ASTSelectQuery.h>
#include <DB/DataTypes/DataTypeString.h> #include <DB/DataTypes/DataTypeString.h>

View File

@ -2,6 +2,7 @@
#include <DB/Storages/MergeTree/MergeTreeBlockOutputStream.h> #include <DB/Storages/MergeTree/MergeTreeBlockOutputStream.h>
#include <DB/Storages/MergeTree/DiskSpaceMonitor.h> #include <DB/Storages/MergeTree/DiskSpaceMonitor.h>
#include <DB/Storages/MergeTree/MergeList.h> #include <DB/Storages/MergeTree/MergeList.h>
#include <DB/Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <DB/Common/escapeForFileName.h> #include <DB/Common/escapeForFileName.h>
#include <DB/Interpreters/InterpreterAlterQuery.h> #include <DB/Interpreters/InterpreterAlterQuery.h>
#include <Poco/DirectoryIterator.h> #include <Poco/DirectoryIterator.h>
@ -98,6 +99,13 @@ BlockInputStreams StorageMergeTree::read(
const size_t max_block_size, const size_t max_block_size,
const unsigned threads) const unsigned threads)
{ {
ASTSelectQuery & select = *typeid_cast<ASTSelectQuery*>(&*query);
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere)
if (select.where_expression && !select.prewhere_expression)
MergeTreeWhereOptimizer{select, data, column_names, log};
return reader.read(column_names, query, context, settings, processed_stage, max_block_size, threads); return reader.read(column_names, query, context, settings, processed_stage, max_block_size, threads);
} }
@ -262,7 +270,8 @@ void StorageMergeTree::dropPartition(const Field & partition, bool detach, bool
/// Просит завершить мерджи и не позволяет им начаться. /// Просит завершить мерджи и не позволяет им начаться.
/// Это защищает от "оживания" данных за удалённую партицию после завершения мерджа. /// Это защищает от "оживания" данных за удалённую партицию после завершения мерджа.
const MergeTreeMergeBlocker merge_blocker{merger}; const MergeTreeMergeBlocker merge_blocker{merger};
auto structure_lock = lockStructure(true); /// Дожидается завершения мерджей и не даёт начаться новым.
auto lock = lockForAlter();
DayNum_t month = MergeTreeData::getMonthDayNum(partition); DayNum_t month = MergeTreeData::getMonthDayNum(partition);

View File

@ -5,6 +5,7 @@
#include <DB/Storages/MergeTree/ReplicatedMergeTreePartsExchange.h> #include <DB/Storages/MergeTree/ReplicatedMergeTreePartsExchange.h>
#include <DB/Storages/MergeTree/MergeTreePartChecker.h> #include <DB/Storages/MergeTree/MergeTreePartChecker.h>
#include <DB/Storages/MergeTree/MergeList.h> #include <DB/Storages/MergeTree/MergeList.h>
#include <DB/Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <DB/Parsers/formatAST.h> #include <DB/Parsers/formatAST.h>
#include <DB/IO/WriteBufferFromOStream.h> #include <DB/IO/WriteBufferFromOStream.h>
#include <DB/IO/ReadBufferFromString.h> #include <DB/IO/ReadBufferFromString.h>
@ -1996,6 +1997,13 @@ BlockInputStreams StorageReplicatedMergeTree::read(
else else
real_column_names.push_back(it); real_column_names.push_back(it);
ASTSelectQuery & select = *typeid_cast<ASTSelectQuery*>(&*query);
/// Try transferring some condition from WHERE to PREWHERE if enabled and viable
if (settings.optimize_move_to_prewhere)
if (select.where_expression && !select.prewhere_expression)
MergeTreeWhereOptimizer{select, data, real_column_names, log};
Block virtual_columns_block; Block virtual_columns_block;
ColumnUInt8 * column = new ColumnUInt8(2); ColumnUInt8 * column = new ColumnUInt8(2);
ColumnPtr column_ptr = column; ColumnPtr column_ptr = column;

View File

@ -0,0 +1,128 @@
#include <DB/Storages/StorageSystemClusters.h>
#include <DB/Interpreters/Cluster.h>
#include <DB/Columns/ColumnString.h>
#include <DB/Columns/ColumnsNumber.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/DataStreams/OneBlockInputStream.h>
#include <DB/Interpreters/Context.h>
namespace DB
{
StorageSystemClusters::StorageSystemClusters(const std::string & name_, Context & context_)
: name(name_)
, columns{
{ "cluster", new DataTypeString },
{ "shard_num", new DataTypeUInt32 },
{ "shard_weight", new DataTypeUInt32 },
{ "replica_num", new DataTypeUInt32 },
{ "host_name", new DataTypeString },
{ "host_address", new DataTypeString },
{ "port", new DataTypeUInt16 },
{ "user", new DataTypeString }
}
, context(context_)
{
}
StoragePtr StorageSystemClusters::create(const std::string & name_, Context & context_)
{
context_.initClusters();
return (new StorageSystemClusters{name_, context_})->thisPtr();
}
BlockInputStreams StorageSystemClusters::read(
const Names & column_names,
ASTPtr query,
const Context & context_,
const Settings & settings,
QueryProcessingStage::Enum & processed_stage,
const size_t max_block_size,
const unsigned threads)
{
check(column_names);
processed_stage = QueryProcessingStage::FetchColumns;
ColumnPtr cluster_column = new ColumnString;
ColumnPtr shard_num_column = new ColumnUInt32;
ColumnPtr shard_weight_column = new ColumnUInt32;
ColumnPtr replica_num_column = new ColumnUInt32;
ColumnPtr host_name_column = new ColumnString;
ColumnPtr host_address_column = new ColumnString;
ColumnPtr port_column = new ColumnUInt16;
ColumnPtr user_column = new ColumnString;
auto updateColumns = [&](const std::string & cluster_name, const Cluster::ShardInfo & shard_info,
const Cluster::Address & address)
{
cluster_column->insert(cluster_name);
shard_num_column->insert(static_cast<UInt64>(shard_info.shard_num));
shard_weight_column->insert(static_cast<UInt64>(shard_info.weight));
replica_num_column->insert(static_cast<UInt64>(address.replica_num));
host_name_column->insert(address.host_name);
host_address_column->insert(address.host_port.host().toString());
port_column->insert(static_cast<UInt64>(address.host_port.port()));
user_column->insert(address.user);
};
const auto & clusters = context.getClusters();
for (const auto & entry : clusters->impl)
{
const std::string cluster_name = entry.first;
const Cluster & cluster = entry.second;
const auto & addresses = cluster.getShardsInfo();
const auto & addresses_with_failover = cluster.getShardsWithFailoverInfo();
const auto & shards_info = cluster.shard_info_vec;
if (!addresses.empty())
{
auto it1 = addresses.cbegin();
auto it2 = shards_info.cbegin();
while (it1 != addresses.cend())
{
const auto & address = *it1;
const auto & shard_info = *it2;
updateColumns(cluster_name, shard_info, address);
++it1;
++it2;
}
}
else if (!addresses_with_failover.empty())
{
auto it1 = addresses_with_failover.cbegin();
auto it2 = shards_info.cbegin();
while (it1 != addresses_with_failover.cend())
{
const auto & addresses = *it1;
const auto & shard_info = *it2;
for (const auto & address : addresses)
updateColumns(cluster_name, shard_info, address);
++it1;
++it2;
}
}
}
Block block;
block.insert(ColumnWithNameAndType(cluster_column, new DataTypeString, "cluster"));
block.insert(ColumnWithNameAndType(shard_num_column, new DataTypeUInt32, "shard_num"));
block.insert(ColumnWithNameAndType(shard_weight_column, new DataTypeUInt32, "shard_weight"));
block.insert(ColumnWithNameAndType(replica_num_column, new DataTypeUInt32, "replica_num"));
block.insert(ColumnWithNameAndType(host_name_column, new DataTypeString, "host_name"));
block.insert(ColumnWithNameAndType(host_address_column, new DataTypeString, "host_address"));
block.insert(ColumnWithNameAndType(port_column, new DataTypeUInt16, "port"));
block.insert(ColumnWithNameAndType(user_column, new DataTypeString, "user"));
return BlockInputStreams{ 1, new OneBlockInputStream(block) };
}
}

View File

@ -27,6 +27,7 @@ StorageSystemDictionaries::StorageSystemDictionaries(const std::string & name)
{ "attribute.types", new DataTypeArray{new DataTypeString} }, { "attribute.types", new DataTypeArray{new DataTypeString} },
{ "has_hierarchy", new DataTypeUInt8 }, { "has_hierarchy", new DataTypeUInt8 },
{ "bytes_allocated", new DataTypeUInt64 }, { "bytes_allocated", new DataTypeUInt64 },
{ "query_count", new DataTypeUInt64 },
{ "hit_rate", new DataTypeFloat64 }, { "hit_rate", new DataTypeFloat64 },
{ "element_count", new DataTypeUInt64 }, { "element_count", new DataTypeUInt64 },
{ "load_factor", new DataTypeFloat64 }, { "load_factor", new DataTypeFloat64 },
@ -69,6 +70,7 @@ BlockInputStreams StorageSystemDictionaries::read(
}; };
ColumnWithNameAndType col_has_hierarchy{new ColumnUInt8, new DataTypeUInt8, "has_hierarchy"}; ColumnWithNameAndType col_has_hierarchy{new ColumnUInt8, new DataTypeUInt8, "has_hierarchy"};
ColumnWithNameAndType col_bytes_allocated{new ColumnUInt64, new DataTypeUInt64, "bytes_allocated"}; ColumnWithNameAndType col_bytes_allocated{new ColumnUInt64, new DataTypeUInt64, "bytes_allocated"};
ColumnWithNameAndType col_query_count{new ColumnUInt64, new DataTypeUInt64, "query_count"};
ColumnWithNameAndType col_hit_rate{new ColumnFloat64, new DataTypeFloat64, "hit_rate"}; ColumnWithNameAndType col_hit_rate{new ColumnFloat64, new DataTypeFloat64, "hit_rate"};
ColumnWithNameAndType col_element_count{new ColumnUInt64, new DataTypeUInt64, "element_count"}; ColumnWithNameAndType col_element_count{new ColumnUInt64, new DataTypeUInt64, "element_count"};
ColumnWithNameAndType col_load_factor{new ColumnFloat64, new DataTypeFloat64, "load_factor"}; ColumnWithNameAndType col_load_factor{new ColumnFloat64, new DataTypeFloat64, "load_factor"};
@ -97,6 +99,7 @@ BlockInputStreams StorageSystemDictionaries::read(
})); }));
col_has_hierarchy.column->insert(UInt64{dict_ptr->hasHierarchy()}); col_has_hierarchy.column->insert(UInt64{dict_ptr->hasHierarchy()});
col_bytes_allocated.column->insert(dict_ptr->getBytesAllocated()); col_bytes_allocated.column->insert(dict_ptr->getBytesAllocated());
col_query_count.column->insert(dict_ptr->getQueryCount());
col_hit_rate.column->insert(dict_ptr->getHitRate()); col_hit_rate.column->insert(dict_ptr->getHitRate());
col_element_count.column->insert(dict_ptr->getElementCount()); col_element_count.column->insert(dict_ptr->getElementCount());
col_load_factor.column->insert(dict_ptr->getLoadFactor()); col_load_factor.column->insert(dict_ptr->getLoadFactor());
@ -140,6 +143,7 @@ BlockInputStreams StorageSystemDictionaries::read(
col_attribute_types, col_attribute_types,
col_has_hierarchy, col_has_hierarchy,
col_bytes_allocated, col_bytes_allocated,
col_query_count,
col_hit_rate, col_hit_rate,
col_element_count, col_element_count,
col_load_factor, col_load_factor,

View File

@ -0,0 +1,104 @@
0
1
2
111
4
222
6
333
8
9
9999
9999
9999
111
9999
222
9999
333
9999
9999
hello
world
abc
0
1
2
hello
4
world
6
abc
8
9
hello
world
abc
-
-
-
hello
-
world
-
abc
-
-
0
0
0
111
0
222
0
333
0
0
-1
-1
-1
111
-1
222
-1
333
-1
-1
-1.1
-1.1
-1.1
111
-1.1
222
-1.1
333
-1.1
-1.1
1
1
1
111
1
222.2
1
333
1
1
Остальные
Яндекс
Google
Остальные

View File

@ -0,0 +1,14 @@
SELECT transform(number, [3, 5, 7], [111, 222, 333]) FROM system.numbers LIMIT 10;
SELECT transform(number, [3, 5, 7], [111, 222, 333], 9999) FROM system.numbers LIMIT 10;
SELECT transform(number, [3, 5, 7], ['hello', 'world', 'abc'], '') FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc']) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc'], '') FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], ['hello', 'world', 'abc'], '-') FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1.1) FROM system.numbers LIMIT 10;
SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], 1) FROM system.numbers LIMIT 10;
SELECT transform(1, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;
SELECT transform(2, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;
SELECT transform(3, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;
SELECT transform(4, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title;

View File

@ -0,0 +1,10 @@
0
1
2
3
4
5
6
7
8
9

View File

@ -0,0 +1 @@
SELECT DISTINCT number FROM remote('127.0.0.{1,2}', system.numbers) LIMIT 10

View File

@ -0,0 +1,120 @@
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1
1 1

View File

@ -0,0 +1,64 @@
#!/bin/bash
function create {
clickhouse-client --query="DROP TABLE IF EXISTS test.summing"
clickhouse-client --query="DROP TABLE IF EXISTS test.collapsing"
clickhouse-client --query="DROP TABLE IF EXISTS test.aggregating"
clickhouse-client --query="CREATE TABLE test.summing (d Date DEFAULT today(), x UInt64, s UInt64 DEFAULT 1) ENGINE = SummingMergeTree(d, x, 8192)"
clickhouse-client --query="CREATE TABLE test.collapsing (d Date DEFAULT today(), x UInt64, s UInt64 DEFAULT 1) ENGINE = CollapsingMergeTree(d, x, 8192, s)"
clickhouse-client --query="CREATE TABLE test.aggregating (d Date DEFAULT today(), x UInt64, s AggregateFunction(sum, UInt64)) ENGINE = AggregatingMergeTree(d, x, 8192)"
}
function cleanup {
clickhouse-client --query="DROP TABLE test.summing"
clickhouse-client --query="DROP TABLE test.collapsing"
clickhouse-client --query="DROP TABLE test.aggregating"
}
function test {
create
SUM=$(( $1 + $2 ))
MAX=$(( $1 > $2 ? $1 : $2 ))
clickhouse-client --query="INSERT INTO test.summing (x) SELECT number AS x FROM system.numbers LIMIT $1"
clickhouse-client --query="INSERT INTO test.summing (x) SELECT number AS x FROM system.numbers LIMIT $2"
clickhouse-client --query="INSERT INTO test.collapsing (x) SELECT number AS x FROM system.numbers LIMIT $1"
clickhouse-client --query="INSERT INTO test.collapsing (x) SELECT number AS x FROM system.numbers LIMIT $2"
clickhouse-client --query="INSERT INTO test.aggregating (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $1) GROUP BY number"
clickhouse-client --query="INSERT INTO test.aggregating (d, x, s) SELECT today() AS d, number AS x, sumState(materialize(toUInt64(1))) AS s FROM (SELECT number FROM system.numbers LIMIT $2) GROUP BY number"
clickhouse-client --query="SELECT count() = $SUM, sum(s) = $SUM FROM test.summing"
clickhouse-client --query="OPTIMIZE TABLE test.summing"
clickhouse-client --query="SELECT count() = $MAX, sum(s) = $SUM FROM test.summing"
echo
clickhouse-client --query="SELECT count() = $SUM, sum(s) = $SUM FROM test.collapsing"
clickhouse-client --query="OPTIMIZE TABLE test.collapsing"
clickhouse-client --query="SELECT count() = $MAX, sum(s) = $MAX FROM test.collapsing"
echo
clickhouse-client --query="SELECT count() = $SUM, sumMerge(s) = $SUM FROM test.aggregating"
clickhouse-client --query="OPTIMIZE TABLE test.aggregating"
clickhouse-client --query="SELECT count() = $MAX, sumMerge(s) = $SUM FROM test.aggregating"
echo
echo
}
test 8191 8191
test 8191 8192
test 8192 8191
test 8192 8192
test 8192 8193
test 8193 8192
test 8193 8193
test 8191 8193
test 8193 8191
test 8193 8194
test 8194 8193
test 8194 8194
cleanup

View File

@ -0,0 +1,4 @@
[1] 123
[3] 123
[1,1] 123
[3,4] 123

View File

@ -0,0 +1,4 @@
SELECT arrayMap(x -> 1, [2]), 123 AS y;
SELECT arrayMap(x -> x + 1, [2]), 123 AS y;
SELECT arrayMap(x -> 1, [2, 3]), 123 AS y;
SELECT arrayMap(x -> x + 1, [2, 3]), 123 AS y;

View File

@ -0,0 +1 @@
SELECT arrayMap(x -> 1, [2]), 123 AS x, x + 1;

View File

@ -0,0 +1,6 @@
CREATE DATABASE IF NOT EXISTS test2;
DROP TABLE IF EXISTS test2.mt_buffer;
CREATE TABLE test2.mt_buffer (d Date DEFAULT today(), x UInt64) ENGINE = Buffer(test2, mt, 16, 100, 100, 1000000, 1000000, 1000000000, 1000000000);
INSERT INTO test2.mt_buffer (x) SELECT number AS x FROM system.numbers LIMIT 100000;
INSERT INTO test2.mt_buffer (x) SELECT number AS x FROM system.numbers LIMIT 1000000;
DROP DATABASE test2;

View File

@ -0,0 +1,6 @@
1
2
3
4
5
6

View File

@ -0,0 +1,13 @@
DROP TABLE IF EXISTS test.memory;
CREATE TABLE test.memory (x UInt8) ENGINE = Memory;
INSERT INTO test.memory VALUES (1);
INSERT INTO test.memory (x) VALUES (2);
INSERT INTO test.memory ( x) VALUES (3);
INSERT INTO test.memory (x ) VALUES (4);
INSERT INTO test.memory ( x ) VALUES (5);
INSERT INTO test.memory(x)VALUES(6);
SELECT * FROM test.memory ORDER BY x;
DROP TABLE test.memory;

View File

@ -0,0 +1,4 @@
2015-05-01 12345 1
2015-05-01 67890 1
2015-05-01 12345 1
2015-05-01 67890 1

View File

@ -0,0 +1,14 @@
DROP TABLE IF EXISTS test.mt;
DROP TABLE IF EXISTS test.merge;
CREATE TABLE test.mt (d Date DEFAULT toDate('2015-05-01'), x UInt64) ENGINE = MergeTree(d, x, 1);
CREATE TABLE test.merge (d Date, x UInt64) ENGINE = Merge(test, '^mt$');
SET max_block_size = 1000000;
INSERT INTO test.mt SELECT number AS x FROM system.numbers LIMIT 100000;
SELECT *, b FROM test.mt WHERE x IN (12345, 67890) AND NOT ignore(blockSize() < 10 AS b) ORDER BY x;
SELECT *, b FROM test.merge WHERE x IN (12345, 67890) AND NOT ignore(blockSize() < 10 AS b) ORDER BY x;
DROP TABLE test.merge;
DROP TABLE test.mt;

View File

@ -100,6 +100,11 @@ public:
*/ */
void remove(const std::string & path, int32_t version = -1); void remove(const std::string & path, int32_t version = -1);
/** Удаляет ноду. В случае сетевых ошибок пробует удалять повторно.
* Ошибка ZNONODE для второй и последующих попыток игнорируется
*/
void removeWithRetries(const std::string & path, int32_t version = -1);
/** Не бросает исключение при следующих ошибках: /** Не бросает исключение при следующих ошибках:
* - Такой ноды нет. * - Такой ноды нет.
* - У ноды другая версия. * - У ноды другая версия.

View File

@ -294,6 +294,15 @@ void ZooKeeper::remove(const std::string & path, int32_t version)
check(tryRemove(path, version), path); check(tryRemove(path, version), path);
} }
void ZooKeeper::removeWithRetries(const std::string & path, int32_t version)
{
size_t attempt;
int code = tryRemoveWithRetries(path, version, &attempt);
if (!(code == ZOK || (code == ZNONODE && attempt > 0)))
throw KeeperException(code, path);
}
int32_t ZooKeeper::tryRemove(const std::string & path, int32_t version) int32_t ZooKeeper::tryRemove(const std::string & path, int32_t version)
{ {
int32_t code = removeImpl(path, version); int32_t code = removeImpl(path, version);