Add 'strict_order' option to windowFunnel()

This commit is contained in:
philip.han 2020-03-20 19:31:47 +09:00
parent c41c527bdf
commit 7549c575b1
3 changed files with 57 additions and 3 deletions

View File

@ -139,7 +139,9 @@ class AggregateFunctionWindowFunnel final
private:
UInt64 window;
UInt8 events_size;
UInt8 strict;
UInt8 strict; // When the 'strict' is set, it applies conditions only for the not repeating values.
UInt8 strict_order; // When the 'strict_order' is set, it doesn't allow interventions of other events.
// In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
// Loop through the entire events_list, update the event timestamp value
@ -150,7 +152,7 @@ private:
{
if (data.size() == 0)
return 0;
if (events_size == 1)
if (!strict_order && events_size == 1)
return 1;
const_cast<Data &>(data).sort();
@ -159,13 +161,24 @@ private:
/// timestamp defaults to -1, which unsigned timestamp value never meet
/// there may be some bugs when UInt64 type timstamp overflows Int64, but it works on most cases.
std::vector<Int64> events_timestamp(events_size, -1);
bool first_event = false;
for (const auto & pair : data.events_list)
{
const T & timestamp = pair.first;
const auto & event_idx = pair.second - 1;
if (event_idx == 0)
if (strict_order && event_idx == -1)
{
if (first_event)
break;
else
continue;
}
else if (event_idx == 0)
{
events_timestamp[0] = timestamp;
first_event = true;
}
else if (strict && events_timestamp[event_idx] >= 0)
{
return event_idx + 1;
@ -198,11 +211,14 @@ public:
window = params.at(0).safeGet<UInt64>();
strict = 0;
strict_order = 0;
for (size_t i = 1; i < params.size(); ++i)
{
String option = params.at(i).safeGet<String>();
if (option.compare("strict") == 0)
strict = 1;
else if (option.compare("strict_order") == 0)
strict_order = 1;
else
throw Exception{"Aggregate function " + getName() + " doesn't support a parameter: " + option, ErrorCodes::BAD_ARGUMENTS};
}
@ -215,14 +231,21 @@ public:
void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override
{
bool has_event = false;
const auto timestamp = assert_cast<const ColumnVector<T> *>(columns[0])->getData()[row_num];
// reverse iteration and stable sorting are needed for events that are qualified by more than one condition.
for (auto i = events_size; i > 0; --i)
{
auto event = assert_cast<const ColumnVector<UInt8> *>(columns[i])->getData()[row_num];
if (event)
{
this->data(place).add(timestamp, i);
has_event = true;
}
}
if (strict_order && !has_event)
this->data(place).add(timestamp, 0);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override

View File

@ -18,3 +18,21 @@
1
1
1
[1, 3]
[2, 3]
[3, 3]
[4, 3]
[5, 3]
[6, 1]
[1, 3]
[2, 1]
[3, 3]
[4, 3]
[5, 2]
[6, 1]
[1, 3]
[2, 1]
[3, 2]
[4, 2]
[5, 2]
[6, 1]

View File

@ -51,3 +51,16 @@ drop table funnel_test;
drop table funnel_test2;
drop table funnel_test_u64;
drop table funnel_test_strict;
drop table if exists funnel_test_strict_order;
create table funnel_test_strict_order (dt DateTime, user int, event String) engine = MergeTree() partition by dt order by user;
insert into funnel_test_strict_order values (1, 1, 'a') (2, 1, 'b') (3, 1, 'c');
insert into funnel_test_strict_order values (1, 2, 'a') (2, 2, 'd') (3, 2, 'b') (4, 2, 'c');
insert into funnel_test_strict_order values (1, 3, 'a') (2, 3, 'a') (3, 3, 'b') (4, 3, 'b') (5, 3, 'c') (6, 3, 'c');
insert into funnel_test_strict_order values (1, 4, 'a') (2, 4, 'a') (3, 4, 'a') (4, 4, 'a') (5, 4, 'b') (6, 4, 'b') (7, 4, 'c') (8, 4, 'c');
insert into funnel_test_strict_order values (1, 5, 'a') (2, 5, 'a') (3, 5, 'b') (4, 5, 'b') (5, 5, 'd') (6, 5, 'c') (7, 5, 'c');
insert into funnel_test_strict_order values (1, 6, 'c') (2, 6, 'c') (3, 6, 'b') (4, 6, 'b') (5, 6, 'a') (6, 6, 'a');
select user, windowFunnel(86400)(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
select user, windowFunnel(86400, 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
select user, windowFunnel(86400, 'strict', 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
drop table funnel_test_strict_order;