mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Merge pull request #27563 from achimbab/bugfix_wf_strict
Bugfix for windowFunnel's "strict" mode.
This commit is contained in:
commit
e49d0c4533
@ -255,7 +255,7 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN)
|
||||
|
||||
- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`.
|
||||
- `mode` — It is an optional argument. One or more modes can be set.
|
||||
- `'strict'` — If same condition holds for sequence of events then such non-unique events would be skipped.
|
||||
- `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing.
|
||||
- `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2.
|
||||
- `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps.
|
||||
|
||||
|
@ -137,8 +137,8 @@ class AggregateFunctionWindowFunnel final
|
||||
private:
|
||||
UInt64 window;
|
||||
UInt8 events_size;
|
||||
/// When the 'strict' is set, it applies conditions only for the not repeating values.
|
||||
bool strict;
|
||||
/// When the 'strict_deduplication' is set, it applies conditions only for the not repeating values.
|
||||
bool strict_deduplication;
|
||||
|
||||
/// When the 'strict_order' is set, it doesn't allow interventions of other events.
|
||||
/// In the case of 'A->B->D->C', it stops finding 'A->B->C' at the 'D' and the max event level is 2.
|
||||
@ -150,7 +150,7 @@ private:
|
||||
/// Loop through the entire events_list, update the event timestamp value
|
||||
/// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
|
||||
/// If found, returns the max event level, else return 0.
|
||||
/// The Algorithm complexity is O(n).
|
||||
/// The algorithm works in O(n) time, but the overall function works in O(n * log(n)) due to sorting.
|
||||
UInt8 getEventLevel(Data & data) const
|
||||
{
|
||||
if (data.size() == 0)
|
||||
@ -163,10 +163,10 @@ private:
|
||||
/// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window
|
||||
std::vector<std::optional<std::pair<UInt64, UInt64>>> events_timestamp(events_size);
|
||||
bool first_event = false;
|
||||
for (const auto & pair : data.events_list)
|
||||
for (size_t i = 0; i < data.events_list.size(); ++i)
|
||||
{
|
||||
const T & timestamp = pair.first;
|
||||
const auto & event_idx = pair.second - 1;
|
||||
const T & timestamp = data.events_list[i].first;
|
||||
const auto & event_idx = data.events_list[i].second - 1;
|
||||
if (strict_order && event_idx == -1)
|
||||
{
|
||||
if (first_event)
|
||||
@ -179,9 +179,9 @@ private:
|
||||
events_timestamp[0] = std::make_pair(timestamp, timestamp);
|
||||
first_event = true;
|
||||
}
|
||||
else if (strict && events_timestamp[event_idx].has_value())
|
||||
else if (strict_deduplication && events_timestamp[event_idx].has_value())
|
||||
{
|
||||
return event_idx + 1;
|
||||
return data.events_list[i - 1].second;
|
||||
}
|
||||
else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value())
|
||||
{
|
||||
@ -226,18 +226,20 @@ public:
|
||||
events_size = arguments.size() - 1;
|
||||
window = params.at(0).safeGet<UInt64>();
|
||||
|
||||
strict = false;
|
||||
strict_deduplication = false;
|
||||
strict_order = false;
|
||||
strict_increase = false;
|
||||
for (size_t i = 1; i < params.size(); ++i)
|
||||
{
|
||||
String option = params.at(i).safeGet<String>();
|
||||
if (option == "strict")
|
||||
strict = true;
|
||||
if (option == "strict_deduplication")
|
||||
strict_deduplication = true;
|
||||
else if (option == "strict_order")
|
||||
strict_order = true;
|
||||
else if (option == "strict_increase")
|
||||
strict_increase = true;
|
||||
else if (option == "strict")
|
||||
throw Exception{"strict is replaced with strict_deduplication in Aggregate function " + getName(), ErrorCodes::BAD_ARGUMENTS};
|
||||
else
|
||||
throw Exception{"Aggregate function " + getName() + " doesn't support a parameter: " + option, ErrorCodes::BAD_ARGUMENTS};
|
||||
}
|
||||
|
@ -37,6 +37,7 @@
|
||||
[5, 2]
|
||||
[6, 1]
|
||||
[7, 1]
|
||||
[1]
|
||||
[1, 2]
|
||||
[2, 2]
|
||||
[3, 0]
|
||||
|
@ -43,7 +43,7 @@ drop table if exists funnel_test_strict;
|
||||
create table funnel_test_strict (timestamp UInt32, event UInt32) engine=Memory;
|
||||
insert into funnel_test_strict values (00,1000),(10,1001),(20,1002),(30,1003),(40,1004),(50,1005),(51,1005),(60,1006),(70,1007),(80,1008);
|
||||
|
||||
select 6 = windowFunnel(10000, 'strict')(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict;
|
||||
select 6 = windowFunnel(10000, 'strict_deduplication')(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict;
|
||||
select 7 = windowFunnel(10000)(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict;
|
||||
|
||||
|
||||
@ -62,11 +62,18 @@ insert into funnel_test_strict_order values (1, 5, 'a') (2, 5, 'a') (3, 5, 'b')
|
||||
insert into funnel_test_strict_order values (1, 6, 'c') (2, 6, 'c') (3, 6, 'b') (4, 6, 'b') (5, 6, 'a') (6, 6, 'a');
|
||||
select user, windowFunnel(86400)(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
|
||||
select user, windowFunnel(86400, 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
|
||||
select user, windowFunnel(86400, 'strict', 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
|
||||
select user, windowFunnel(86400, 'strict_deduplication', 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
|
||||
insert into funnel_test_strict_order values (1, 7, 'a') (2, 7, 'c') (3, 7, 'b');
|
||||
select user, windowFunnel(10, 'strict_order')(dt, event = 'a', event = 'b', event = 'c') as s from funnel_test_strict_order where user = 7 group by user format JSONCompactEachRow;
|
||||
drop table funnel_test_strict_order;
|
||||
|
||||
--https://github.com/ClickHouse/ClickHouse/issues/27469
|
||||
drop table if exists strict_BiteTheDDDD;
|
||||
create table strict_BiteTheDDDD (ts UInt64, event String) engine = Log();
|
||||
insert into strict_BiteTheDDDD values (1,'a') (2,'b') (3,'c') (4,'b') (5,'d');
|
||||
select 3 = windowFunnel(86400, 'strict_deduplication')(ts, event='a', event='b', event='c', event='d') from strict_BiteTheDDDD format JSONCompactEachRow;
|
||||
drop table strict_BiteTheDDDD;
|
||||
|
||||
drop table if exists funnel_test_non_null;
|
||||
create table funnel_test_non_null (`dt` DateTime, `u` int, `a` Nullable(String), `b` Nullable(String)) engine = MergeTree() partition by dt order by u;
|
||||
insert into funnel_test_non_null values (1, 1, 'a1', 'b1') (2, 1, 'a2', 'b2');
|
||||
|
Loading…
Reference in New Issue
Block a user