mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-24 10:40:49 +00:00
Fix comparator of sequenceNextNode for more deterministic sorting
This commit is contained in:
parent
ee5d76a82b
commit
fad1a4ccb3
@ -115,7 +115,7 @@ auto createAggregateFunctionSequenceNodeMaxArgs(UInt64 max_args)
|
||||
void registerAggregateFunctionSequenceNextNode(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
|
||||
factory.registerFunction("sequenceNextNode", { createAggregateFunctionSequenceNodeMaxArgs(31), properties });
|
||||
factory.registerFunction("sequenceNextNode", { createAggregateFunctionSequenceNodeMaxArgs(MAX_EVENTS_SIZE), properties });
|
||||
factory.registerFunction("sequenceFirstNode", { createAggregateFunctionSequenceNodeMaxArgs(0), properties });
|
||||
}
|
||||
|
||||
|
@ -27,19 +27,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* When sorting the list of events the EMPTY_EVENTS_BITSET will be moved to the last.
|
||||
* In the case of events,
|
||||
* dt action
|
||||
* 2020-01-01 00:00:01 'D'
|
||||
* 2020-01-01 00:00:01 'A'
|
||||
* 2020-01-01 00:00:01 'B'
|
||||
* 2020-01-01 00:00:01 'C'
|
||||
* The next node of a chain of events 'A' -> 'B' -> 'C' is expected to be the 'D'.
|
||||
* Because EMPTY_EVENTS_BITSET is 0x80000000 the order of the sorted events is ['A", 'B', 'C', 'D']. The result value of this aggregation is 'D'.
|
||||
* If EMPTY_EVENTS_BITSET is 0 hen the order of the sorted events is ['D', 'A', 'B', 'C']. This time, the result value is NULL.
|
||||
*/
|
||||
static const UInt32 EMPTY_EVENTS_BITSET = 0x80000000;
|
||||
const UInt32 MAX_EVENTS_SIZE = 64;
|
||||
|
||||
/// NodeBase used to implement a linked list for storage of SequenceNextNodeImpl
|
||||
template <typename Node>
|
||||
@ -48,9 +36,7 @@ struct NodeBase
|
||||
UInt64 size; /// size of payload
|
||||
|
||||
DataTypeDateTime::FieldType event_time;
|
||||
UInt32 events_bitset; /// Bitsets of UInt32 are easy to compare. (< operator on bitsets)
|
||||
/// Nodes in the list must be sorted in order to find a chain of events at the method getNextNodeIndex().
|
||||
/// While sorting, events_bitset is one of sorting criteria.
|
||||
std::bitset<MAX_EVENTS_SIZE> events_bitset;
|
||||
|
||||
char * data() { return reinterpret_cast<char *>(this) + sizeof(Node); }
|
||||
|
||||
@ -68,7 +54,8 @@ struct NodeBase
|
||||
buf.write(data(), size);
|
||||
|
||||
writeBinary(event_time, buf);
|
||||
writeBinary(events_bitset, buf);
|
||||
UInt64 ulong_bitset = events_bitset.to_ulong();
|
||||
writeBinary(ulong_bitset, buf);
|
||||
}
|
||||
|
||||
static Node * read(ReadBuffer & buf, Arena * arena)
|
||||
@ -81,7 +68,9 @@ struct NodeBase
|
||||
buf.read(node->data(), size);
|
||||
|
||||
readBinary(node->event_time, buf);
|
||||
readBinary(node->events_bitset, buf);
|
||||
UInt64 ulong_bitset;
|
||||
readBinary(ulong_bitset, buf);
|
||||
node->events_bitset = ulong_bitset;
|
||||
|
||||
return node;
|
||||
}
|
||||
@ -130,13 +119,9 @@ struct SequenceNextNodeGeneralData
|
||||
bool operator()(const Node * lhs, const Node * rhs) const
|
||||
{
|
||||
if constexpr (Descending)
|
||||
return lhs->event_time == rhs->event_time ?
|
||||
(lhs->events_bitset == rhs->events_bitset ? lhs->compare(rhs) : lhs->events_bitset < rhs->events_bitset)
|
||||
: lhs->event_time > rhs->event_time;
|
||||
return lhs->event_time == rhs->event_time ? !lhs->compare(rhs) : lhs->event_time > rhs->event_time;
|
||||
else
|
||||
return lhs->event_time == rhs->event_time ?
|
||||
(lhs->events_bitset == rhs->events_bitset ? lhs->compare(rhs) : lhs->events_bitset < rhs->events_bitset)
|
||||
: lhs->event_time < rhs->event_time;
|
||||
return lhs->event_time == rhs->event_time ? lhs->compare(rhs) : lhs->event_time < rhs->event_time;
|
||||
}
|
||||
};
|
||||
|
||||
@ -179,7 +164,7 @@ public:
|
||||
|
||||
AggregateFunctionPtr getOwnNullAdapter(
|
||||
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params,
|
||||
const AggregateFunctionProperties & /*properties*/) const override
|
||||
const AggregateFunctionProperties &) const override
|
||||
{
|
||||
/// This aggregate function sets insertion_requires_nullable_column on.
|
||||
/// Even though some values are mapped to aggregating key, it could return nulls for the below case.
|
||||
@ -213,14 +198,11 @@ public:
|
||||
/// 0x00000000
|
||||
/// + 1 (bit of event1)
|
||||
/// + 4 (bit of event3)
|
||||
UInt32 events_bitset = 0;
|
||||
node->events_bitset.reset();
|
||||
for (UInt8 i = 0; i < events_size; ++i)
|
||||
if (assert_cast<const ColumnVector<UInt8> *>(columns[2 + i])->getData()[row_num])
|
||||
events_bitset += (1 << i);
|
||||
if (events_bitset == 0) events_bitset = EMPTY_EVENTS_BITSET; // Any events are not matched.
|
||||
|
||||
node->events_bitset.set(i);
|
||||
node->event_time = timestamp;
|
||||
node->events_bitset = events_bitset;
|
||||
|
||||
data(place).value.push_back(node, arena);
|
||||
}
|
||||
@ -295,7 +277,7 @@ public:
|
||||
{
|
||||
UInt32 k = 0;
|
||||
for (; k < events_size - j; ++k)
|
||||
if (data.value[i - j]->events_bitset & (1 << (events_size - 1 - j - k)))
|
||||
if (data.value[i - j]->events_bitset.test(events_size - 1 - j - k))
|
||||
return k;
|
||||
return k;
|
||||
}
|
||||
@ -322,7 +304,7 @@ public:
|
||||
for (; j < events_size; ++j)
|
||||
/// It compares each matched events.
|
||||
/// The lower bitmask is the former matched event.
|
||||
if (!(data.value[i - j]->events_bitset & (1 << (events_size - 1 - j))))
|
||||
if (data.value[i - j]->events_bitset.test(events_size - 1 - j) == false)
|
||||
break;
|
||||
|
||||
/// If the chain of events are matched returns the index of result value.
|
||||
@ -413,12 +395,11 @@ public:
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
|
||||
if (is_first)
|
||||
{
|
||||
Node * node = Node::allocate(*columns[1], row_num, arena);
|
||||
node->event_time = timestamp;
|
||||
node->events_bitset = EMPTY_EVENTS_BITSET;
|
||||
node->events_bitset.reset();
|
||||
|
||||
data(place).value.push_back(node, arena);
|
||||
}
|
||||
|
@ -124,14 +124,14 @@
|
||||
(0, A) id >= 10 10 B
|
||||
(0, A) id >= 10 10 B
|
||||
(0, A) id >= 10 10 A
|
||||
(0, A) id = 11 0
|
||||
(0, C) id = 11 0
|
||||
(0, B->C) id = 11 0
|
||||
(0, A->B->C) id = 11 0
|
||||
(0, A) id = 11 0
|
||||
(0, C) id = 11 0
|
||||
(0, C->B) id = 11 0
|
||||
(0, C->B->A) id = 11 0
|
||||
(0, A) id = 11 1
|
||||
(0, C) id = 11 1
|
||||
(0, B->C) id = 11 1
|
||||
(0, A->B->C) id = 11 1
|
||||
(0, A) id = 11 1
|
||||
(0, C) id = 11 1
|
||||
(0, C->B) id = 11 1
|
||||
(0, C->B->A) id = 11 1
|
||||
(0) id < 10 1 A
|
||||
(0) id < 10 2 A
|
||||
(0) id < 10 3 A
|
||||
|
@ -61,14 +61,19 @@ SELECT '(0, A) id >= 10', id, sequenceNextNode(1)(dt, action, action = 'C') AS n
|
||||
SELECT '(0, A) id >= 10', id, sequenceNextNode(1)(dt, action, action = 'D', action = 'C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id >= 10 GROUP BY id ORDER BY id;
|
||||
SELECT '(0, A) id >= 10', id, sequenceNextNode(1)(dt, action, action = 'C', action = 'B') AS next_node FROM test_sequenceNextNode_Nullable WHERE id >= 10 GROUP BY id ORDER BY id;
|
||||
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'C', 'D'));
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'A', 'D'));
|
||||
SELECT '(0, B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'B', action ='C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('A', 'D'));
|
||||
SELECT '(0, A->B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A', action = 'B', action = 'C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('D'));
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'C', 'D'));
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'A', 'D'));
|
||||
SELECT '(0, C->B) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action ='B') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('A', 'D'));
|
||||
SELECT '(0, C->B->A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action = 'B', action = 'A') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node in ('D'));
|
||||
INSERT INTO test_sequenceNextNode_Nullable values ('1970-01-01 09:00:01',11,'A');
|
||||
INSERT INTO test_sequenceNextNode_Nullable values ('1970-01-01 09:00:01',11,'B');
|
||||
INSERT INTO test_sequenceNextNode_Nullable values ('1970-01-01 09:00:01',11,'C');
|
||||
INSERT INTO test_sequenceNextNode_Nullable values ('1970-01-01 09:00:01',11,'D');
|
||||
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node = 'B');
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node = 'D');
|
||||
SELECT '(0, B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'B', action ='C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node = 'D');
|
||||
SELECT '(0, A->B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A', action = 'B', action = 'C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node = 'D');
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node is NULL);
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node = 'B');
|
||||
SELECT '(0, C->B) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action ='B') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node = 'A');
|
||||
SELECT '(0, C->B->A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action = 'B', action = 'A') AS next_node FROM test_sequenceNextNode_Nullable WHERE id = 11 GROUP BY id HAVING next_node is null);
|
||||
|
||||
SELECT '(0) id < 10', id, sequenceNextNode(0)(dt, action) AS next_node FROM test_sequenceNextNode_Nullable WHERE id < 10 GROUP BY id ORDER BY id;
|
||||
SELECT '(0) id < 10', id, sequenceFirstNode(0)(dt, action) AS next_node FROM test_sequenceNextNode_Nullable WHERE id < 10 GROUP BY id ORDER BY id;
|
||||
@ -146,14 +151,14 @@ INSERT INTO test_sequenceNextNode values ('1970-01-01 09:00:01',11,'B');
|
||||
INSERT INTO test_sequenceNextNode values ('1970-01-01 09:00:01',11,'C');
|
||||
INSERT INTO test_sequenceNextNode values ('1970-01-01 09:00:01',11,'D');
|
||||
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'C', 'D'));
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'A', 'D'));
|
||||
SELECT '(0, B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'B', action ='C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('A', 'D'));
|
||||
SELECT '(0, A->B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A', action = 'B', action = 'C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('D'));
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'C', 'D'));
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('B', 'A', 'D'));
|
||||
SELECT '(0, C->B) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action ='B') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('A', 'D'));
|
||||
SELECT '(0, C->B->A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action = 'B', action = 'A') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node in ('D'));
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node = 'B');
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node = 'D');
|
||||
SELECT '(0, B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'B', action ='C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node = 'D');
|
||||
SELECT '(0, A->B->C) id = 11', count() FROM (SELECT id, sequenceNextNode(0)(dt, action, action = 'A', action = 'B', action = 'C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node = 'D');
|
||||
SELECT '(0, A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'A') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node is NULL);
|
||||
SELECT '(0, C) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node = 'B');
|
||||
SELECT '(0, C->B) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action ='B') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node = 'A');
|
||||
SELECT '(0, C->B->A) id = 11', count() FROM (SELECT id, sequenceNextNode(1)(dt, action, action = 'C', action = 'B', action = 'A') AS next_node FROM test_sequenceNextNode WHERE id = 11 GROUP BY id HAVING next_node is null);
|
||||
|
||||
SELECT '(0) id < 10', id, sequenceNextNode(0)(dt, action) AS next_node FROM test_sequenceNextNode WHERE id < 10 GROUP BY id ORDER BY id;
|
||||
SELECT '(0) id < 10', id, sequenceFirstNode(0)(dt, action) AS next_node FROM test_sequenceNextNode WHERE id < 10 GROUP BY id ORDER BY id;
|
||||
|
Loading…
Reference in New Issue
Block a user