bug fix. add unit tests.#

This commit is contained in:
pingyu 2021-04-04 22:22:47 +08:00
parent 86d8bc1c54
commit 1ea74a1947
23 changed files with 618 additions and 102 deletions

View File

@ -138,7 +138,7 @@ struct AggregateFunctionUniqThetaSketchData
/// For a function that takes multiple arguments. Such a function pre-hashes them in advance, so TrivialHash is used here.
struct AggregateFunctionUniqThetaSketchDataForVariadic
{
using Set = ThetaSketchData<UInt64, TrivialHash>;
using Set = ThetaSketchData<UInt64>;
Set set;
static String getName() { return "uniqThetaSketch"; }

View File

@ -4,119 +4,106 @@
#include <theta_sketch.hpp>
#include <theta_union.hpp>
#include <common/logger_useful.h>
#include <memory>
namespace DB
{
template
<
typename Key,
typename Hash = IntHash32<Key>>
template <typename Key>
class ThetaSketchData : private boost::noncopyable
{
private:
mutable datasketches::update_theta_sketch sk_update;
mutable datasketches::theta_union sk_union;
bool is_merged;
Poco::Logger * log;
std::unique_ptr<datasketches::update_theta_sketch> sk_update;
std::unique_ptr<datasketches::theta_union> sk_union;
// void internal_merge() const
// {
// if (!sk_update.is_empty())
// {
// sk_union.update(sk_update);
// sk_update = datasketches::update_theta_sketch::builder().build();
// }
// }
inline datasketches::update_theta_sketch * get_sk_update()
{
if (!sk_update)
sk_update = std::make_unique<datasketches::update_theta_sketch>(datasketches::update_theta_sketch::builder().build());
return sk_update.get();
}
inline datasketches::theta_union * get_sk_union()
{
if (!sk_union)
sk_union = std::make_unique<datasketches::theta_union>(datasketches::theta_union::builder().build());
return sk_union.get();
}
public:
using value_type = Key;
ThetaSketchData()
: sk_update(datasketches::update_theta_sketch::builder().build()),
sk_union(datasketches::theta_union::builder().build()),
is_merged(false),
log(&Poco::Logger::get("ThetaSketchData"))
{
}
ThetaSketchData() = default;
~ThetaSketchData() = default;
/// Insert original value without hash, as `datasketches::update_theta_sketch.update` will do the hash internal.
void insert_original(const StringRef & value)
{
sk_update.update(value.data, value.size);
LOG_WARNING(log, "insert_origin() {}", value.toString());
get_sk_update()->update(value.data, value.size);
}
/// Note that `datasketches::update_theta_sketch.update` will do the hash again.
void insert(Key value)
{
sk_update.update(value);
LOG_WARNING(log, "insert() {}", value);
get_sk_update()->update(value);
}
UInt64 size() const
{
LOG_WARNING(log, "size() update:{}, union:{}", sk_update.get_estimate(), sk_union.get_result().get_estimate());
if (!is_merged)
return static_cast<UInt64>(sk_update.get_estimate());
if (sk_union)
return static_cast<UInt64>(sk_union->get_result().get_estimate());
else if (sk_update)
return static_cast<UInt64>(sk_update->get_estimate());
else
return static_cast<UInt64>(sk_union.get_result().get_estimate());
return 0;
}
void merge(const ThetaSketchData & rhs)
{
if (!is_merged && !sk_update.is_empty())
datasketches::theta_union * u = get_sk_union();
if (sk_update)
{
sk_union.update(sk_update);
u->update(*sk_update);
sk_update.reset(nullptr);
}
is_merged = true;
if (!rhs.is_merged && !rhs.sk_update.is_empty())
sk_union.update(rhs.sk_update);
else if (rhs.is_merged)
sk_union.update(rhs.sk_union.get_result());
LOG_WARNING(log, "merge() result:{}", sk_union.get_result().to_string());
if (rhs.sk_update)
u->update(*rhs.sk_update);
else if (rhs.sk_union)
u->update(rhs.sk_union->get_result());
}
/// You can only call for an empty object.
void read(DB::ReadBuffer & in)
{
LOG_WARNING(log, "read() {}", sk_union.get_result().to_string());
datasketches::compact_theta_sketch::vector_bytes bytes;
readVectorBinary(bytes, in);
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
sk_union = datasketches::theta_union::builder().build();
sk_union.update(sk);
is_merged = true;
LOG_WARNING(log, "read()[after] {}", sk_union.get_result().to_string());
if (!bytes.empty())
{
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
get_sk_union()->update(sk);
}
}
// void readAndMerge(DB::ReadBuffer &)
// {
// LOG_WARNING(log, "readAndMerge() {}", sk_union.get_result().to_string());
// }
void write(DB::WriteBuffer & out) const
{
if (!is_merged)
if (sk_update)
{
auto bytes = sk_update.compact().serialize();
auto bytes = sk_update->compact().serialize();
writeVectorBinary(bytes, out);
}
else if (sk_union)
{
auto bytes = sk_union->get_result().serialize();
writeVectorBinary(bytes, out);
}
else
{
auto bytes = sk_union.get_result().serialize();
datasketches::compact_theta_sketch::vector_bytes bytes;
writeVectorBinary(bytes, out);
}
LOG_WARNING(log, "write() {}", sk_union.get_result().to_string());
}
};

View File

@ -13,7 +13,7 @@ static bool isUniq(const ASTFunction & func)
{
return func.name == "uniq" || func.name == "uniqExact" || func.name == "uniqHLL12"
|| func.name == "uniqCombined" || func.name == "uniqCombined64"
|| func.name == "uniqThetaSketchState";
|| func.name == "uniqThetaSketch";
}
/// Remove injective functions of one argument: replace with a child

View File

@ -1065,3 +1065,217 @@ uniqCombined remote()
1
1
1
uniqThetaSketch
1 1
3 1
6 1
7 1
9 1
11 1
14 1
17 1
19 1
20 2
26 1
31 1
35 1
36 1
0 162
1 162
3 162
6 162
7 163
9 163
10 81
11 163
13 162
14 162
17 162
19 162
20 162
21 162
22 162
26 162
31 162
35 162
36 162
0 55018
1 54020
3 53774
6 53947
7 53839
9 54408
10 26876
11 54985
13 53479
14 53516
17 53331
19 53680
20 54211
21 53054
22 54690
26 53716
31 54139
35 52331
36 53766
uniqThetaSketch round(float)
0.125 1
0.5 1
0.05 1
0.143 1
0.056 1
0.048 2
0.083 1
0.25 1
0.1 1
0.028 1
0.027 1
0.031 1
0.067 1
0.037 1
0.045 162
0.125 163
0.5 162
0.05 162
0.143 162
0.091 81
0.056 162
0.048 162
0.083 163
0.25 162
1 162
0.1 163
0.028 162
0.027 162
0.031 162
0.067 162
0.043 162
0.037 162
0.071 162
0.045 53054
0.125 53839
0.5 54020
0.05 53680
0.143 53947
0.091 26876
0.056 53331
0.048 54211
0.083 54985
0.25 53774
1 55018
0.1 54408
0.028 52331
0.027 53766
0.031 54139
0.067 53516
0.043 54690
0.037 53716
0.071 53479
uniqThetaSketch round(toFloat32())
0.5 1
0.05 1
0.25 1
0.048 2
0.083 1
0.125 1
0.031 1
0.143 1
0.028 1
0.067 1
0.027 1
0.056 1
0.037 1
0.1 1
0.5 162
0.05 162
0.25 162
0.048 162
0.091 81
0.043 162
0.071 162
0.083 163
0.125 163
0.031 162
0.143 162
0.028 162
0.067 162
0.045 162
0.027 162
0.056 162
0.037 162
0.1 163
1 162
0.5 54020
0.05 53680
0.25 53774
0.048 54211
0.091 26876
0.043 54690
0.071 53479
0.083 54985
0.125 53839
0.031 54139
0.143 53947
0.028 52331
0.067 53516
0.045 53054
0.027 53766
0.056 53331
0.037 53716
0.1 54408
1 55018
uniqThetaSketch IPv4NumToString
1 1
3 1
6 1
7 1
9 1
11 1
14 1
17 1
19 1
20 2
26 1
31 1
35 1
36 1
0 162
1 162
3 162
6 162
7 163
9 163
10 81
11 163
13 162
14 162
17 162
19 162
20 162
21 162
22 162
26 162
31 162
35 162
36 162
0 54929
1 53802
3 54706
6 54700
7 53592
9 54036
10 27392
11 53768
13 54566
14 53104
17 54243
19 55003
20 53398
21 53831
22 54603
26 54607
31 54012
35 54826
36 54910
uniqThetaSketch remote()
1

View File

@ -132,3 +132,33 @@ SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one);
SELECT uniqCombined(12)(dummy) FROM remote('127.0.0.{2,3}', system.one);
SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one);
SELECT uniqCombined(20)(dummy) FROM remote('127.0.0.{2,3}', system.one);
-- uniqThetaSketch
SELECT 'uniqThetaSketch';
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT 'uniqThetaSketch round(float)';
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT 'uniqThetaSketch round(toFloat32())';
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT 'uniqThetaSketch IPv4NumToString';
SELECT Y, uniqThetaSketch(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
SELECT Y, uniqThetaSketch(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
SELECT Y, uniqThetaSketch(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
SELECT 'uniqThetaSketch remote()';
SELECT uniqThetaSketch(dummy) FROM remote('127.0.0.{2,3}', system.one);

View File

@ -1,11 +1,11 @@
10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6
17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 766
52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 766
5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 765
9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 765
13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 765
46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 765
48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 765
50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765
54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 765
56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765
10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6 10 10 100 100 1000 1000
17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 10 10 100 100 610 610 766
52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 766
5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 10 10 100 100 608 608 765
9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 765
13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 10 10 100 100 607 607 765
46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 10 10 100 100 607 607 765
48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 10 10 100 100 609 609 765
50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 765
54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 10 10 100 100 609 609 765
56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 765

View File

@ -4,7 +4,8 @@ SELECT
uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)),
uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)),
uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)),
uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z))
uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)),
uniqThetaSketch(x), uniqThetaSketch((x)), uniqThetaSketch(x, y), uniqThetaSketch((x, y)), uniqThetaSketch(x, y, z), uniqThetaSketch((x, y, z))
FROM
(
SELECT
@ -22,6 +23,7 @@ SELECT k,
uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)),
uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)),
uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)),
uniqThetaSketch(x), uniqThetaSketch((x)), uniqThetaSketch(x, y), uniqThetaSketch((x, y)), uniqThetaSketch(x, y, z), uniqThetaSketch((x, y, z)),
count() AS c
FROM
(

View File

@ -4,3 +4,5 @@
143
123
143
123
143

View File

@ -7,3 +7,6 @@ SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM
SET count_distinct_implementation = 'uniqExact';
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
SET count_distinct_implementation = 'uniqThetaSketch';
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);

View File

@ -4,3 +4,5 @@
3
2
3
2
3

View File

@ -4,3 +4,5 @@ SELECT uniqExactArray([0, 1, 1], [0, 1, 1], [0, 1, 1]);
SELECT uniqExactArray([0, 1, 1], [0, 1, 1], [0, 1, 0]);
SELECT uniqUpToArray(10)([0, 1, 1], [0, 1, 1], [0, 1, 1]);
SELECT uniqUpToArray(10)([0, 1, 1], [0, 1, 1], [0, 1, 0]);
SELECT uniqThetaSketchArray([0, 1, 1], [0, 1, 1], [0, 1, 1]);
SELECT uniqThetaSketchArray([0, 1, 1], [0, 1, 1], [0, 1, 0]);

View File

@ -1,3 +1,4 @@
3
3
3
3

View File

@ -1,3 +1,4 @@
SELECT uniq(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);
SELECT uniqExact(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);
SELECT uniqUpTo(2)(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);

View File

@ -25,3 +25,12 @@
3
3
3
3
3
3
3
3
3
3
3
3

View File

@ -27,3 +27,13 @@ SELECT uniqUpTo(3)((x, x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) A
SELECT uniqUpTo(3)((x, arrayMap(elem -> [elem, elem], x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqUpTo(3)((x, toString(x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqUpTo(3)(x) FROM (SELECT arrayJoin([[], ['a'], ['a', NULL, 'b'], []]) AS x);
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[[]], [['a', 'b']], [['a'], ['b']], [['a', 'b']]]) AS x);
SELECT uniqThetaSketch(x, x) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqThetaSketch(x, arrayMap(elem -> [elem, elem], x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqThetaSketch(x, toString(x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqThetaSketch((x, x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqThetaSketch((x, arrayMap(elem -> [elem, elem], x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqThetaSketch((x, toString(x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[], ['a'], ['a', NULL, 'b'], []]) AS x);

View File

@ -5,7 +5,7 @@
-1275.0000 -424.99999983 -255.00000000 -1275.0000 -424.99999983 -255.00000000
101.0000 101.00000000 101.00000000 101.0000 101.00000000 101.00000000
-101.0000 -101.00000000 -101.00000000 -101.0000 -101.00000000 -101.00000000
(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101)
(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101) (101,101,101)
5 5 5
10 10 10
-50.0000 -50.0000 -16.66666666 -16.66666666 -10.00000000 -10.00000000

View File

@ -24,7 +24,8 @@ SELECT (uniq(a), uniq(b), uniq(c)),
(uniqCombined(a), uniqCombined(b), uniqCombined(c)),
(uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
(uniqExact(a), uniqExact(b), uniqExact(c)),
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c))
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)),
(uniqThetaSketch(a), uniqThetaSketch(b), uniqThetaSketch(c))
FROM (SELECT * FROM decimal ORDER BY a);
SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 5;

View File

@ -5,7 +5,7 @@
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
(0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0)
(0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0)
0 0 0
0 0 0
0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000

View File

@ -20,7 +20,8 @@ SELECT (uniq(a), uniq(b), uniq(c)),
(uniqCombined(a), uniqCombined(b), uniqCombined(c)),
(uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
(uniqExact(a), uniqExact(b), uniqExact(c)),
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c))
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)),
(uniqThetaSketch(a), uniqThetaSketch(b), uniqThetaSketch(c))
FROM (SELECT * FROM decimal ORDER BY a);
SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 5;

View File

@ -3,7 +3,8 @@ SELECT
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
uniqCombined64(x),
uniqThetaSketch(x)
FROM
(
SELECT number % 2 AS x
@ -14,7 +15,8 @@ SELECT
uniqExact(x + y),
uniqHLL12(x + y),
uniqCombined(x + y),
uniqCombined64(x + y)
uniqCombined64(x + y),
uniqThetaSketch(x + y)
FROM
(
SELECT
@ -27,7 +29,8 @@ SELECT
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
uniqCombined64(x),
uniqThetaSketch(x)
FROM
(
SELECT number % 2 AS x
@ -38,7 +41,8 @@ SELECT
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
uniqCombined64(x),
uniqThetaSketch(x)
FROM
(
SELECT number % 2 AS x
@ -49,7 +53,8 @@ SELECT
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
uniqCombined64(x),
uniqThetaSketch(x)
FROM
(
SELECT number % 2 AS x
@ -60,7 +65,8 @@ SELECT
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
uniqCombined64(x),
uniqThetaSketch(x)
FROM
(
SELECT number % 2 AS x
@ -79,7 +85,8 @@ SELECT
uniqExact(x),
uniqHLL12(x),
uniqCombined(x),
uniqCombined64(x)
uniqCombined64(x),
uniqThetaSketch(x)
FROM
(
SELECT number % 2 AS x
@ -90,7 +97,8 @@ SELECT
uniqExact(x + y),
uniqHLL12(x + y),
uniqCombined(x + y),
uniqCombined64(x + y)
uniqCombined64(x + y),
uniqThetaSketch(x + y)
FROM
(
SELECT
@ -103,7 +111,8 @@ SELECT
uniqExact(-x),
uniqHLL12(-x),
uniqCombined(-x),
uniqCombined64(-x)
uniqCombined64(-x),
uniqThetaSketch(-x)
FROM
(
SELECT number % 2 AS x
@ -114,7 +123,8 @@ SELECT
uniqExact(bitNot(x)),
uniqHLL12(bitNot(x)),
uniqCombined(bitNot(x)),
uniqCombined64(bitNot(x))
uniqCombined64(bitNot(x)),
uniqThetaSketch(bitNot(x))
FROM
(
SELECT number % 2 AS x
@ -125,7 +135,8 @@ SELECT
uniqExact(bitNot(-x)),
uniqHLL12(bitNot(-x)),
uniqCombined(bitNot(-x)),
uniqCombined64(bitNot(-x))
uniqCombined64(bitNot(-x)),
uniqThetaSketch(bitNot(-x))
FROM
(
SELECT number % 2 AS x
@ -136,7 +147,8 @@ SELECT
uniqExact(-bitNot(-x)),
uniqHLL12(-bitNot(-x)),
uniqCombined(-bitNot(-x)),
uniqCombined64(-bitNot(-x))
uniqCombined64(-bitNot(-x)),
uniqThetaSketch(-bitNot(-x))
FROM
(
SELECT number % 2 AS x

View File

@ -1,21 +1,21 @@
set optimize_injective_functions_inside_uniq = 1;
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x)
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x), uniqThetaSketch(x)
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y)
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y), uniqThetaSketch(x + y)
from (select number % 2 as x, number % 3 y from numbers(10));
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x)
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x), uniqThetaSketch(-x)
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x))
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x)), uniqThetaSketch(bitNot(x))
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x))
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x)), uniqThetaSketch(bitNot(-x))
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x))
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x)), uniqThetaSketch(-bitNot(-x))
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select count(distinct -bitNot(-x)) from (select number % 2 as x from numbers(10));
@ -24,22 +24,22 @@ EXPLAIN SYNTAX select uniq(concatAssumeInjective('x', 'y')) from numbers(10);
set optimize_injective_functions_inside_uniq = 0;
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x)
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x), uniqThetaSketch(x)
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y)
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y), uniqThetaSketch(x + y)
from (select number % 2 as x, number % 3 y from numbers(10));
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x)
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x), uniqThetaSketch(-x)
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x))
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x)), uniqThetaSketch(bitNot(x))
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x))
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x)), uniqThetaSketch(bitNot(-x))
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x))
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x)), uniqThetaSketch(-bitNot(-x))
from (select number % 2 as x from numbers(10));
EXPLAIN SYNTAX select count(distinct -bitNot(-x)) from (select number % 2 as x from numbers(10));

View File

@ -0,0 +1,125 @@
1000 1000
2014-06-01 1000 1000
1000 1000
2014-06-01 1000 1000
2014-06-01 0 0 7 7
2014-06-01 0 1 7 7
2014-06-01 0 2 7 7
2014-06-01 0 3 7 7
2014-06-01 0 4 7 7
2014-06-01 0 5 7 7
2014-06-01 0 6 7 7
2014-06-01 0 7 7 7
2014-06-01 0 8 7 7
2014-06-01 0 9 7 7
2014-06-01 1 10 7 7
2014-06-01 1 11 7 7
2014-06-01 1 12 7 7
2014-06-01 1 13 7 7
2014-06-01 1 14 7 7
2014-06-01 1 15 7 7
2014-06-01 1 16 7 7
2014-06-01 1 17 7 7
2014-06-01 1 18 7 7
2014-06-01 1 19 7 7
2014-06-01 2 20 7 7
2014-06-01 2 21 7 7
2014-06-01 2 22 7 7
2014-06-01 2 23 7 7
2014-06-01 2 24 7 7
2014-06-01 2 25 7 7
2014-06-01 2 26 7 7
2014-06-01 2 27 7 7
2014-06-01 2 28 7 7
2014-06-01 2 29 7 7
2014-06-01 3 30 7 7
2014-06-01 3 31 7 7
2014-06-01 3 32 7 7
2014-06-01 3 33 7 7
2014-06-01 3 34 7 7
2014-06-01 3 35 7 7
2014-06-01 3 36 7 7
2014-06-01 3 37 7 7
2014-06-01 3 38 7 7
2014-06-01 3 39 7 7
2014-06-01 4 40 7 7
2014-06-01 4 41 7 7
2014-06-01 4 42 7 7
2014-06-01 4 43 7 7
2014-06-01 4 44 7 7
2014-06-01 4 45 7 7
2014-06-01 4 46 7 7
2014-06-01 4 47 7 7
2014-06-01 4 48 7 7
2014-06-01 4 49 7 7
2014-06-01 5 50 7 7
2014-06-01 5 51 7 7
2014-06-01 5 52 7 7
2014-06-01 5 53 7 7
2014-06-01 5 54 7 7
2014-06-01 5 55 7 7
2014-06-01 5 56 7 7
2014-06-01 5 57 7 7
2014-06-01 5 58 7 7
2014-06-01 5 59 7 7
2014-06-01 6 60 7 7
2014-06-01 6 61 7 7
2014-06-01 6 62 7 7
2014-06-01 6 63 7 7
2014-06-01 6 64 7 7
2014-06-01 6 65 7 7
2014-06-01 6 66 7 7
2014-06-01 6 67 7 7
2014-06-01 6 68 7 7
2014-06-01 6 69 7 7
2014-06-01 7 70 7 7
2014-06-01 7 71 7 7
2014-06-01 7 72 7 7
2014-06-01 7 73 7 7
2014-06-01 7 74 7 7
2014-06-01 7 75 7 7
2014-06-01 7 76 7 7
2014-06-01 7 77 7 7
2014-06-01 7 78 7 7
2014-06-01 7 79 7 7
2014-06-01 8 80 7 7
2014-06-01 8 81 7 7
2014-06-01 8 82 7 7
2014-06-01 8 83 7 7
2014-06-01 8 84 7 7
2014-06-01 8 85 7 7
2014-06-01 8 86 7 7
2014-06-01 8 87 7 7
2014-06-01 8 88 7 7
2014-06-01 8 89 7 7
2014-06-01 9 90 7 7
2014-06-01 9 91 7 7
2014-06-01 9 92 7 7
2014-06-01 9 93 7 7
2014-06-01 9 94 7 7
2014-06-01 9 95 7 7
2014-06-01 9 96 7 7
2014-06-01 9 97 7 7
2014-06-01 9 98 7 7
2014-06-01 9 99 7 7
2014-06-01 0 7 7
2014-06-01 1 7 7
2014-06-01 2 7 7
2014-06-01 3 7 7
2014-06-01 4 7 7
2014-06-01 5 7 7
2014-06-01 6 7 7
2014-06-01 7 7 7
2014-06-01 8 7 7
2014-06-01 9 7 7
2014-06-01 7 7
0 333333 53 53
1 333333 53 53
2 333333 53 53
0 333333 53 53
1 333333 53 53
2 333333 53 53
10000000
10021957
10021969
10094819

View File

@ -0,0 +1,114 @@
DROP TABLE IF EXISTS stored_aggregates;
-- simple
CREATE TABLE stored_aggregates
(
d Date,
Uniq AggregateFunction(uniq, UInt64),
UniqThetaSketch AggregateFunction(uniqThetaSketch, UInt64)
)
ENGINE = AggregatingMergeTree(d, d, 8192);
INSERT INTO stored_aggregates
SELECT
toDate('2014-06-01') AS d,
uniqState(number) AS Uniq,
uniqThetaSketchState(number) AS UniqThetaSketch
FROM
(
SELECT * FROM system.numbers LIMIT 1000
);
SELECT uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates;
SELECT d, uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates GROUP BY d ORDER BY d;
OPTIMIZE TABLE stored_aggregates;
SELECT uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates;
SELECT d, uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates GROUP BY d ORDER BY d;
DROP TABLE stored_aggregates;
-- complex
CREATE TABLE stored_aggregates
(
d Date,
k1 UInt64,
k2 String,
Uniq AggregateFunction(uniq, UInt64),
UniqThetaSketch AggregateFunction(uniqThetaSketch, UInt64)
)
ENGINE = AggregatingMergeTree(d, (d, k1, k2), 8192);
INSERT INTO stored_aggregates
SELECT
toDate('2014-06-01') AS d,
intDiv(number, 100) AS k1,
toString(intDiv(number, 10)) AS k2,
uniqState(toUInt64(number % 7)) AS Uniq,
uniqThetaSketchState(toUInt64(number % 7)) AS UniqThetaSketch
FROM
(
SELECT * FROM system.numbers LIMIT 1000
)
GROUP BY d, k1, k2
ORDER BY d, k1, k2;
SELECT d, k1, k2,
uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch)
FROM stored_aggregates
GROUP BY d, k1, k2
ORDER BY d, k1, k2;
SELECT d, k1,
uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch)
FROM stored_aggregates
GROUP BY d, k1
ORDER BY d, k1;
SELECT d,
uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch)
FROM stored_aggregates
GROUP BY d
ORDER BY d;
DROP TABLE stored_aggregates;
---- sum + uniq with more data
drop table if exists summing_merge_tree_null;
drop table if exists summing_merge_tree_aggregate_function;
create table summing_merge_tree_null (
d materialized today(),
k UInt64,
c UInt64,
u UInt64
) engine=Null;
create materialized view summing_merge_tree_aggregate_function (
d Date,
k UInt64,
c UInt64,
un AggregateFunction(uniq, UInt64),
ut AggregateFunction(uniqThetaSketch, UInt64)
) engine=SummingMergeTree(d, k, 8192)
as select d, k, sum(c) as c, uniqState(u) as un, uniqThetaSketchState(u) as ut
from summing_merge_tree_null
group by d, k;
-- prime number 53 to avoid resonanse between %3 and %53
insert into summing_merge_tree_null select number % 3, 1, number % 53 from numbers(999999);
select k, sum(c), uniqMerge(un), uniqThetaSketchMerge(ut) from summing_merge_tree_aggregate_function group by k order by k;
optimize table summing_merge_tree_aggregate_function;
select k, sum(c), uniqMerge(un), uniqThetaSketchMerge(ut) from summing_merge_tree_aggregate_function group by k order by k;
drop table summing_merge_tree_aggregate_function;
drop table summing_merge_tree_null;
-- precise
SELECT uniqExact(number) FROM numbers(1e7);
SELECT uniqCombined(number) FROM numbers(1e7);
SELECT uniqCombined64(number) FROM numbers(1e7);
SELECT uniqThetaSketch(number) FROM numbers(1e7);