2015-04-29 20:31:28 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <AggregateFunctions/IAggregateFunction.h>
|
|
|
|
#include <DataTypes/DataTypeDateTime.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
2017-06-06 17:18:32 +00:00
|
|
|
#include <ext/range.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/PODArray.h>
|
2017-07-13 20:58:19 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
2015-04-29 20:31:28 +00:00
|
|
|
#include <bitset>
|
|
|
|
#include <stack>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-12 02:21:15 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int TOO_SLOW;
|
|
|
|
extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION;
|
|
|
|
extern const int TOO_MUCH_ARGUMENTS_FOR_FUNCTION;
|
|
|
|
extern const int SYNTAX_ERROR;
|
2017-04-08 01:32:05 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2017-07-27 23:23:13 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2016-01-12 02:21:15 +00:00
|
|
|
}
|
|
|
|
|
2015-04-29 20:31:28 +00:00
|
|
|
/// helper type for comparing `std::pair`s using solely the .first member
|
|
|
|
template <template <typename> class Comparator>
|
|
|
|
struct ComparePairFirst final
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
template <typename T1, typename T2>
|
|
|
|
bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
|
|
|
|
{
|
|
|
|
return Comparator<T1>{}(lhs.first, rhs.first);
|
|
|
|
}
|
2015-04-29 20:31:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct AggregateFunctionSequenceMatchData final
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
static constexpr auto max_events = 32;
|
|
|
|
|
|
|
|
using Timestamp = std::uint32_t;
|
|
|
|
using Events = std::bitset<max_events>;
|
|
|
|
using TimestampEvents = std::pair<Timestamp, Events>;
|
|
|
|
using Comparator = ComparePairFirst<std::less>;
|
|
|
|
|
|
|
|
bool sorted = true;
|
|
|
|
static constexpr size_t bytes_in_arena = 64;
|
2017-04-20 16:41:03 +00:00
|
|
|
PODArray<TimestampEvents, bytes_in_arena, AllocatorWithStackMemory<Allocator<false>, bytes_in_arena>> events_list;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
void add(const Timestamp timestamp, const Events & events)
|
|
|
|
{
|
|
|
|
/// store information exclusively for rows with at least one event
|
|
|
|
if (events.any())
|
|
|
|
{
|
2017-04-20 16:41:03 +00:00
|
|
|
events_list.emplace_back(timestamp, events);
|
2017-04-01 07:20:54 +00:00
|
|
|
sorted = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void merge(const AggregateFunctionSequenceMatchData & other)
|
|
|
|
{
|
2017-04-20 16:41:03 +00:00
|
|
|
const auto size = events_list.size();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-20 16:41:03 +00:00
|
|
|
events_list.insert(std::begin(other.events_list), std::end(other.events_list));
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// either sort whole container or do so partially merging ranges afterwards
|
|
|
|
if (!sorted && !other.sorted)
|
2017-04-20 16:41:03 +00:00
|
|
|
std::sort(std::begin(events_list), std::end(events_list), Comparator{});
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
|
|
|
{
|
2017-04-20 16:41:03 +00:00
|
|
|
const auto begin = std::begin(events_list);
|
2017-04-01 07:20:54 +00:00
|
|
|
const auto middle = std::next(begin, size);
|
2017-04-20 16:41:03 +00:00
|
|
|
const auto end = std::end(events_list);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (!sorted)
|
|
|
|
std::sort(begin, middle, Comparator{});
|
|
|
|
|
|
|
|
if (!other.sorted)
|
|
|
|
std::sort(middle, end, Comparator{});
|
|
|
|
|
|
|
|
std::inplace_merge(begin, middle, end, Comparator{});
|
|
|
|
}
|
|
|
|
|
|
|
|
sorted = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void sort()
|
|
|
|
{
|
|
|
|
if (!sorted)
|
|
|
|
{
|
2017-04-20 16:41:03 +00:00
|
|
|
std::sort(std::begin(events_list), std::end(events_list), Comparator{});
|
2017-04-01 07:20:54 +00:00
|
|
|
sorted = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(WriteBuffer & buf) const
|
|
|
|
{
|
|
|
|
writeBinary(sorted, buf);
|
2017-04-20 16:41:03 +00:00
|
|
|
writeBinary(events_list.size(), buf);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-20 16:41:03 +00:00
|
|
|
for (const auto & events : events_list)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
writeBinary(events.first, buf);
|
|
|
|
writeBinary(events.second.to_ulong(), buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void deserialize(ReadBuffer & buf)
|
|
|
|
{
|
|
|
|
readBinary(sorted, buf);
|
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t size;
|
2017-04-01 07:20:54 +00:00
|
|
|
readBinary(size, buf);
|
|
|
|
|
2017-04-20 16:41:03 +00:00
|
|
|
events_list.clear();
|
|
|
|
events_list.reserve(size);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
for (size_t i = 0; i < size; ++i)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
std::uint32_t timestamp;
|
|
|
|
readBinary(timestamp, buf);
|
|
|
|
|
|
|
|
UInt64 events;
|
|
|
|
readBinary(events, buf);
|
|
|
|
|
2017-04-20 16:41:03 +00:00
|
|
|
events_list.emplace_back(timestamp, Events{events});
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
2015-04-29 20:31:28 +00:00
|
|
|
};
|
|
|
|
|
2015-08-18 16:11:21 +00:00
|
|
|
|
|
|
|
/// Max number of iterations to match the pattern against a sequence, exception thrown when exceeded
|
|
|
|
constexpr auto sequence_match_max_iterations = 1000000;
|
|
|
|
|
2017-12-20 07:36:30 +00:00
|
|
|
|
|
|
|
template <typename Derived>
|
|
|
|
class AggregateFunctionSequenceBase : public IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData, Derived>
|
2015-04-29 20:31:28 +00:00
|
|
|
{
|
|
|
|
public:
|
2017-12-20 20:25:22 +00:00
|
|
|
AggregateFunctionSequenceBase(const DataTypes & arguments, const String & pattern)
|
|
|
|
: pattern(pattern)
|
|
|
|
{
|
|
|
|
arg_count = arguments.size();
|
|
|
|
|
|
|
|
if (!sufficientArgs(arg_count))
|
|
|
|
throw Exception{"Aggregate function " + derived().getName() + " requires at least 3 arguments.",
|
|
|
|
ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION};
|
|
|
|
|
|
|
|
if (arg_count - 1 > AggregateFunctionSequenceMatchData::max_events)
|
|
|
|
throw Exception{"Aggregate function " + derived().getName() + " supports up to " +
|
|
|
|
toString(AggregateFunctionSequenceMatchData::max_events) + " event arguments.",
|
|
|
|
ErrorCodes::TOO_MUCH_ARGUMENTS_FOR_FUNCTION};
|
|
|
|
|
|
|
|
const auto time_arg = arguments.front().get();
|
|
|
|
if (!typeid_cast<const DataTypeDateTime *>(time_arg))
|
|
|
|
throw Exception{"Illegal type " + time_arg->getName() + " of first argument of aggregate function "
|
|
|
|
+ derived().getName() + ", must be DateTime",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
|
|
|
|
|
|
|
for (const auto i : ext::range(1, arg_count))
|
|
|
|
{
|
|
|
|
const auto cond_arg = arguments[i].get();
|
|
|
|
if (!typeid_cast<const DataTypeUInt8 *>(cond_arg))
|
2018-02-18 02:22:32 +00:00
|
|
|
throw Exception{"Illegal type " + cond_arg->getName() + " of argument " + toString(i + 1) +
|
2017-12-20 20:25:22 +00:00
|
|
|
" of aggregate function " + derived().getName() + ", must be UInt8",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
|
|
|
}
|
|
|
|
|
|
|
|
parsePattern();
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
|
|
|
|
{
|
|
|
|
const auto timestamp = static_cast<const ColumnUInt32 *>(columns[0])->getData()[row_num];
|
|
|
|
|
2017-12-20 07:36:30 +00:00
|
|
|
AggregateFunctionSequenceMatchData::Events events;
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto i : ext::range(1, arg_count))
|
|
|
|
{
|
|
|
|
const auto event = static_cast<const ColumnUInt8 *>(columns[i])->getData()[row_num];
|
|
|
|
events.set(i - 1, event);
|
|
|
|
}
|
|
|
|
|
2017-12-20 07:36:30 +00:00
|
|
|
this->data(place).add(timestamp, events);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2017-12-01 21:51:50 +00:00
|
|
|
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-12-20 20:25:22 +00:00
|
|
|
this->data(place).merge(this->data(rhs));
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
|
|
|
|
{
|
2017-12-20 07:36:30 +00:00
|
|
|
this->data(place).serialize(buf);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override
|
|
|
|
{
|
2017-12-20 07:36:30 +00:00
|
|
|
this->data(place).deserialize(buf);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2017-09-17 20:22:39 +00:00
|
|
|
const char * getHeaderFilePath() const override { return __FILE__; }
|
|
|
|
|
2015-04-29 20:31:28 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
enum class PatternActionType
|
|
|
|
{
|
|
|
|
SpecificEvent,
|
|
|
|
AnyEvent,
|
|
|
|
KleeneStar,
|
|
|
|
TimeLessOrEqual,
|
|
|
|
TimeLess,
|
|
|
|
TimeGreaterOrEqual,
|
|
|
|
TimeGreater
|
|
|
|
};
|
|
|
|
|
|
|
|
struct PatternAction final
|
|
|
|
{
|
|
|
|
PatternActionType type;
|
|
|
|
std::uint32_t extra;
|
|
|
|
|
|
|
|
PatternAction() = default;
|
|
|
|
PatternAction(const PatternActionType type, const std::uint32_t extra = 0) : type{type}, extra{extra} {}
|
|
|
|
};
|
|
|
|
|
|
|
|
static constexpr size_t bytes_on_stack = 64;
|
|
|
|
using PatternActions = PODArray<PatternAction, bytes_on_stack, AllocatorWithStackMemory<Allocator<false>, bytes_on_stack>>;
|
|
|
|
|
2017-12-20 20:25:22 +00:00
|
|
|
static bool sufficientArgs(const size_t arg_count) { return arg_count >= 3; }
|
|
|
|
|
|
|
|
Derived & derived() { return static_cast<Derived &>(*this); }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
void parsePattern()
|
|
|
|
{
|
|
|
|
actions.clear();
|
|
|
|
actions.emplace_back(PatternActionType::KleeneStar);
|
|
|
|
|
2017-05-04 11:56:13 +00:00
|
|
|
const char * pos = pattern.data();
|
2017-07-12 02:40:28 +00:00
|
|
|
const char * begin = pos;
|
|
|
|
const char * end = pos + pattern.size();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-07-12 02:40:28 +00:00
|
|
|
auto throw_exception = [&](const std::string & msg)
|
2017-04-20 18:35:39 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
throw Exception{
|
2017-12-20 20:25:22 +00:00
|
|
|
msg + " '" + std::string(pos, end) + "' at position " + toString(pos - begin),
|
2017-04-20 18:35:39 +00:00
|
|
|
ErrorCodes::SYNTAX_ERROR};
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
|
|
|
|
2017-07-13 02:48:34 +00:00
|
|
|
auto match = [&pos, end](const char * str) mutable
|
2017-07-12 02:40:28 +00:00
|
|
|
{
|
|
|
|
size_t length = strlen(str);
|
2017-07-13 01:54:51 +00:00
|
|
|
if (pos + length <= end && 0 == memcmp(pos, str, length))
|
2017-07-12 02:40:28 +00:00
|
|
|
{
|
|
|
|
pos += length;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
while (pos < end)
|
|
|
|
{
|
2017-07-12 02:40:28 +00:00
|
|
|
if (match("(?"))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-07-12 02:40:28 +00:00
|
|
|
if (match("t"))
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
PatternActionType type;
|
|
|
|
|
2017-07-12 02:40:28 +00:00
|
|
|
if (match("<="))
|
2017-04-01 07:20:54 +00:00
|
|
|
type = PatternActionType::TimeLessOrEqual;
|
2017-07-12 02:40:28 +00:00
|
|
|
else if (match("<"))
|
2017-04-01 07:20:54 +00:00
|
|
|
type = PatternActionType::TimeLess;
|
2017-07-12 02:40:28 +00:00
|
|
|
else if (match(">="))
|
2017-04-01 07:20:54 +00:00
|
|
|
type = PatternActionType::TimeGreaterOrEqual;
|
2017-07-12 02:40:28 +00:00
|
|
|
else if (match(">"))
|
2017-04-01 07:20:54 +00:00
|
|
|
type = PatternActionType::TimeGreater;
|
|
|
|
else
|
|
|
|
throw_exception("Unknown time condition");
|
|
|
|
|
2017-07-12 02:40:28 +00:00
|
|
|
UInt64 duration = 0;
|
|
|
|
auto prev_pos = pos;
|
|
|
|
pos = tryReadIntText(duration, pos, end);
|
|
|
|
if (pos == prev_pos)
|
2017-04-01 07:20:54 +00:00
|
|
|
throw_exception("Could not parse number");
|
|
|
|
|
|
|
|
if (actions.back().type != PatternActionType::SpecificEvent &&
|
|
|
|
actions.back().type != PatternActionType::AnyEvent &&
|
|
|
|
actions.back().type != PatternActionType::KleeneStar)
|
|
|
|
throw Exception{
|
|
|
|
"Temporal condition should be preceeded by an event condition",
|
2017-07-12 02:40:28 +00:00
|
|
|
ErrorCodes::BAD_ARGUMENTS};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-07-12 02:40:28 +00:00
|
|
|
actions.emplace_back(type, duration);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-07-12 02:40:28 +00:00
|
|
|
else
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-07-12 02:40:28 +00:00
|
|
|
UInt64 event_number = 0;
|
|
|
|
auto prev_pos = pos;
|
|
|
|
pos = tryReadIntText(event_number, pos, end);
|
|
|
|
if (pos == prev_pos)
|
|
|
|
throw_exception("Could not parse number");
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (event_number > arg_count - 1)
|
|
|
|
throw Exception{
|
2017-12-20 20:25:22 +00:00
|
|
|
"Event number " + toString(event_number) + " is out of range",
|
2017-07-12 02:40:28 +00:00
|
|
|
ErrorCodes::BAD_ARGUMENTS};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1);
|
|
|
|
}
|
|
|
|
|
2017-07-12 02:40:28 +00:00
|
|
|
if (!match(")"))
|
2017-04-01 07:20:54 +00:00
|
|
|
throw_exception("Expected closing parenthesis, found");
|
|
|
|
|
|
|
|
}
|
2017-07-12 02:40:28 +00:00
|
|
|
else if (match(".*"))
|
2017-04-01 07:20:54 +00:00
|
|
|
actions.emplace_back(PatternActionType::KleeneStar);
|
2017-07-12 02:40:28 +00:00
|
|
|
else if (match("."))
|
2017-04-01 07:20:54 +00:00
|
|
|
actions.emplace_back(PatternActionType::AnyEvent);
|
|
|
|
else
|
|
|
|
throw_exception("Could not parse pattern, unexpected starting symbol");
|
|
|
|
}
|
|
|
|
}
|
2015-04-29 20:31:28 +00:00
|
|
|
|
2015-08-21 15:57:26 +00:00
|
|
|
protected:
|
2017-04-01 07:20:54 +00:00
|
|
|
template <typename T>
|
|
|
|
bool match(T & events_it, const T events_end) const
|
|
|
|
{
|
|
|
|
const auto action_begin = std::begin(actions);
|
|
|
|
const auto action_end = std::end(actions);
|
|
|
|
auto action_it = action_begin;
|
|
|
|
|
|
|
|
const auto events_begin = events_it;
|
|
|
|
auto base_it = events_it;
|
|
|
|
|
|
|
|
/// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence
|
|
|
|
using backtrack_info = std::tuple<decltype(action_it), T, T>;
|
|
|
|
std::stack<backtrack_info> back_stack;
|
|
|
|
|
|
|
|
/// backtrack if possible
|
2018-01-06 18:10:44 +00:00
|
|
|
const auto do_backtrack = [&]
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
while (!back_stack.empty())
|
|
|
|
{
|
|
|
|
auto & top = back_stack.top();
|
|
|
|
|
|
|
|
action_it = std::get<0>(top);
|
|
|
|
events_it = std::next(std::get<1>(top));
|
|
|
|
base_it = std::get<2>(top);
|
|
|
|
|
|
|
|
back_stack.pop();
|
|
|
|
|
|
|
|
if (events_it != events_end)
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t i = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
while (action_it != action_end && events_it != events_end)
|
|
|
|
{
|
|
|
|
if (action_it->type == PatternActionType::SpecificEvent)
|
|
|
|
{
|
|
|
|
if (events_it->second.test(action_it->extra))
|
|
|
|
{
|
|
|
|
/// move to the next action and events
|
|
|
|
base_it = events_it;
|
|
|
|
++action_it, ++events_it;
|
|
|
|
}
|
|
|
|
else if (!do_backtrack())
|
|
|
|
/// backtracking failed, bail out
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (action_it->type == PatternActionType::AnyEvent)
|
|
|
|
{
|
|
|
|
base_it = events_it;
|
|
|
|
++action_it, ++events_it;
|
|
|
|
}
|
|
|
|
else if (action_it->type == PatternActionType::KleeneStar)
|
|
|
|
{
|
|
|
|
back_stack.emplace(action_it, events_it, base_it);
|
|
|
|
base_it = events_it;
|
|
|
|
++action_it;
|
|
|
|
}
|
|
|
|
else if (action_it->type == PatternActionType::TimeLessOrEqual)
|
|
|
|
{
|
|
|
|
if (events_it->first - base_it->first <= action_it->extra)
|
|
|
|
{
|
|
|
|
/// condition satisfied, move onto next action
|
|
|
|
back_stack.emplace(action_it, events_it, base_it);
|
|
|
|
base_it = events_it;
|
|
|
|
++action_it;
|
|
|
|
}
|
|
|
|
else if (!do_backtrack())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (action_it->type == PatternActionType::TimeLess)
|
|
|
|
{
|
|
|
|
if (events_it->first - base_it->first < action_it->extra)
|
|
|
|
{
|
|
|
|
back_stack.emplace(action_it, events_it, base_it);
|
|
|
|
base_it = events_it;
|
|
|
|
++action_it;
|
|
|
|
}
|
|
|
|
else if (!do_backtrack())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (action_it->type == PatternActionType::TimeGreaterOrEqual)
|
|
|
|
{
|
|
|
|
if (events_it->first - base_it->first >= action_it->extra)
|
|
|
|
{
|
|
|
|
back_stack.emplace(action_it, events_it, base_it);
|
|
|
|
base_it = events_it;
|
|
|
|
++action_it;
|
|
|
|
}
|
|
|
|
else if (++events_it == events_end && !do_backtrack())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if (action_it->type == PatternActionType::TimeGreater)
|
|
|
|
{
|
|
|
|
if (events_it->first - base_it->first > action_it->extra)
|
|
|
|
{
|
|
|
|
back_stack.emplace(action_it, events_it, base_it);
|
|
|
|
base_it = events_it;
|
|
|
|
++action_it;
|
|
|
|
}
|
|
|
|
else if (++events_it == events_end && !do_backtrack())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
throw Exception{
|
|
|
|
"Unknown PatternActionType",
|
2017-07-27 23:23:13 +00:00
|
|
|
ErrorCodes::LOGICAL_ERROR};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (++i > sequence_match_max_iterations)
|
|
|
|
throw Exception{
|
|
|
|
"Pattern application proves too difficult, exceeding max iterations (" + toString(sequence_match_max_iterations) + ")",
|
2017-07-27 23:23:13 +00:00
|
|
|
ErrorCodes::TOO_SLOW};
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// if there are some actions remaining
|
|
|
|
if (action_it != action_end)
|
|
|
|
{
|
|
|
|
/// match multiple empty strings at end
|
|
|
|
while (action_it->type == PatternActionType::KleeneStar ||
|
|
|
|
action_it->type == PatternActionType::TimeLessOrEqual ||
|
|
|
|
action_it->type == PatternActionType::TimeLess ||
|
|
|
|
(action_it->type == PatternActionType::TimeGreaterOrEqual && action_it->extra == 0))
|
|
|
|
++action_it;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (events_it == events_begin)
|
|
|
|
++events_it;
|
|
|
|
|
|
|
|
return action_it == action_end;
|
|
|
|
}
|
2015-04-29 20:31:28 +00:00
|
|
|
|
2015-08-21 15:57:26 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
std::string pattern;
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t arg_count;
|
2017-04-01 07:20:54 +00:00
|
|
|
PatternActions actions;
|
2015-04-29 20:31:28 +00:00
|
|
|
};
|
|
|
|
|
2017-12-20 07:36:30 +00:00
|
|
|
|
|
|
|
class AggregateFunctionSequenceMatch final : public AggregateFunctionSequenceBase<AggregateFunctionSequenceMatch>
|
|
|
|
{
|
|
|
|
public:
|
2017-12-20 20:25:22 +00:00
|
|
|
using AggregateFunctionSequenceBase<AggregateFunctionSequenceMatch>::AggregateFunctionSequenceBase;
|
|
|
|
|
2017-12-20 07:36:30 +00:00
|
|
|
String getName() const override { return "sequenceMatch"; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeUInt8>(); }
|
|
|
|
|
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
|
|
|
{
|
|
|
|
const_cast<Data &>(data(place)).sort();
|
|
|
|
|
|
|
|
const auto & data_ref = data(place);
|
|
|
|
|
|
|
|
const auto events_begin = std::begin(data_ref.events_list);
|
|
|
|
const auto events_end = std::end(data_ref.events_list);
|
|
|
|
auto events_it = events_begin;
|
|
|
|
|
|
|
|
static_cast<ColumnUInt8 &>(to).getData().push_back(match(events_it, events_end));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class AggregateFunctionSequenceCount final : public AggregateFunctionSequenceBase<AggregateFunctionSequenceCount>
|
2015-08-21 15:57:26 +00:00
|
|
|
{
|
|
|
|
public:
|
2017-12-20 20:25:22 +00:00
|
|
|
using AggregateFunctionSequenceBase<AggregateFunctionSequenceCount>::AggregateFunctionSequenceBase;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
String getName() const override { return "sequenceCount"; }
|
2015-08-21 15:57:26 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeUInt64>(); }
|
2015-08-21 15:57:26 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
|
|
|
|
{
|
|
|
|
const_cast<Data &>(data(place)).sort();
|
|
|
|
static_cast<ColumnUInt64 &>(to).getData().push_back(count(place));
|
|
|
|
}
|
2015-08-21 15:57:26 +00:00
|
|
|
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
UInt64 count(const ConstAggregateDataPtr & place) const
|
|
|
|
{
|
|
|
|
const auto & data_ref = data(place);
|
2015-08-21 15:57:26 +00:00
|
|
|
|
2017-04-20 16:41:03 +00:00
|
|
|
const auto events_begin = std::begin(data_ref.events_list);
|
|
|
|
const auto events_end = std::end(data_ref.events_list);
|
2017-04-01 07:20:54 +00:00
|
|
|
auto events_it = events_begin;
|
2015-08-21 15:57:26 +00:00
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t count = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
while (events_it != events_end && match(events_it, events_end))
|
|
|
|
++count;
|
2015-08-21 15:57:26 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return count;
|
|
|
|
}
|
2015-08-21 15:57:26 +00:00
|
|
|
};
|
|
|
|
|
2015-04-29 20:31:28 +00:00
|
|
|
}
|