mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
bug fix. add unit tests.#
This commit is contained in:
parent
86d8bc1c54
commit
1ea74a1947
@ -138,7 +138,7 @@ struct AggregateFunctionUniqThetaSketchData
|
||||
/// For a function that takes multiple arguments. Such a function pre-hashes them in advance, so TrivialHash is used here.
|
||||
struct AggregateFunctionUniqThetaSketchDataForVariadic
|
||||
{
|
||||
using Set = ThetaSketchData<UInt64, TrivialHash>;
|
||||
using Set = ThetaSketchData<UInt64>;
|
||||
Set set;
|
||||
|
||||
static String getName() { return "uniqThetaSketch"; }
|
||||
|
@ -4,119 +4,106 @@
|
||||
#include <theta_sketch.hpp>
|
||||
#include <theta_union.hpp>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
#include <memory>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
template
|
||||
<
|
||||
typename Key,
|
||||
typename Hash = IntHash32<Key>>
|
||||
template <typename Key>
|
||||
class ThetaSketchData : private boost::noncopyable
|
||||
{
|
||||
private:
|
||||
mutable datasketches::update_theta_sketch sk_update;
|
||||
mutable datasketches::theta_union sk_union;
|
||||
bool is_merged;
|
||||
Poco::Logger * log;
|
||||
std::unique_ptr<datasketches::update_theta_sketch> sk_update;
|
||||
std::unique_ptr<datasketches::theta_union> sk_union;
|
||||
|
||||
// void internal_merge() const
|
||||
// {
|
||||
// if (!sk_update.is_empty())
|
||||
// {
|
||||
// sk_union.update(sk_update);
|
||||
// sk_update = datasketches::update_theta_sketch::builder().build();
|
||||
// }
|
||||
// }
|
||||
inline datasketches::update_theta_sketch * get_sk_update()
|
||||
{
|
||||
if (!sk_update)
|
||||
sk_update = std::make_unique<datasketches::update_theta_sketch>(datasketches::update_theta_sketch::builder().build());
|
||||
return sk_update.get();
|
||||
}
|
||||
|
||||
inline datasketches::theta_union * get_sk_union()
|
||||
{
|
||||
if (!sk_union)
|
||||
sk_union = std::make_unique<datasketches::theta_union>(datasketches::theta_union::builder().build());
|
||||
return sk_union.get();
|
||||
}
|
||||
|
||||
public:
|
||||
using value_type = Key;
|
||||
|
||||
ThetaSketchData()
|
||||
: sk_update(datasketches::update_theta_sketch::builder().build()),
|
||||
sk_union(datasketches::theta_union::builder().build()),
|
||||
is_merged(false),
|
||||
log(&Poco::Logger::get("ThetaSketchData"))
|
||||
{
|
||||
}
|
||||
ThetaSketchData() = default;
|
||||
~ThetaSketchData() = default;
|
||||
|
||||
/// Insert original value without hash, as `datasketches::update_theta_sketch.update` will do the hash internal.
|
||||
void insert_original(const StringRef & value)
|
||||
{
|
||||
sk_update.update(value.data, value.size);
|
||||
LOG_WARNING(log, "insert_origin() {}", value.toString());
|
||||
get_sk_update()->update(value.data, value.size);
|
||||
}
|
||||
|
||||
/// Note that `datasketches::update_theta_sketch.update` will do the hash again.
|
||||
void insert(Key value)
|
||||
{
|
||||
sk_update.update(value);
|
||||
LOG_WARNING(log, "insert() {}", value);
|
||||
get_sk_update()->update(value);
|
||||
}
|
||||
|
||||
UInt64 size() const
|
||||
{
|
||||
LOG_WARNING(log, "size() update:{}, union:{}", sk_update.get_estimate(), sk_union.get_result().get_estimate());
|
||||
if (!is_merged)
|
||||
return static_cast<UInt64>(sk_update.get_estimate());
|
||||
if (sk_union)
|
||||
return static_cast<UInt64>(sk_union->get_result().get_estimate());
|
||||
else if (sk_update)
|
||||
return static_cast<UInt64>(sk_update->get_estimate());
|
||||
else
|
||||
return static_cast<UInt64>(sk_union.get_result().get_estimate());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void merge(const ThetaSketchData & rhs)
|
||||
{
|
||||
if (!is_merged && !sk_update.is_empty())
|
||||
datasketches::theta_union * u = get_sk_union();
|
||||
|
||||
if (sk_update)
|
||||
{
|
||||
sk_union.update(sk_update);
|
||||
u->update(*sk_update);
|
||||
sk_update.reset(nullptr);
|
||||
}
|
||||
is_merged = true;
|
||||
|
||||
if (!rhs.is_merged && !rhs.sk_update.is_empty())
|
||||
sk_union.update(rhs.sk_update);
|
||||
else if (rhs.is_merged)
|
||||
sk_union.update(rhs.sk_union.get_result());
|
||||
|
||||
LOG_WARNING(log, "merge() result:{}", sk_union.get_result().to_string());
|
||||
if (rhs.sk_update)
|
||||
u->update(*rhs.sk_update);
|
||||
else if (rhs.sk_union)
|
||||
u->update(rhs.sk_union->get_result());
|
||||
}
|
||||
|
||||
/// You can only call for an empty object.
|
||||
void read(DB::ReadBuffer & in)
|
||||
{
|
||||
LOG_WARNING(log, "read() {}", sk_union.get_result().to_string());
|
||||
|
||||
datasketches::compact_theta_sketch::vector_bytes bytes;
|
||||
readVectorBinary(bytes, in);
|
||||
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
||||
|
||||
sk_union = datasketches::theta_union::builder().build();
|
||||
sk_union.update(sk);
|
||||
is_merged = true;
|
||||
|
||||
LOG_WARNING(log, "read()[after] {}", sk_union.get_result().to_string());
|
||||
if (!bytes.empty())
|
||||
{
|
||||
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
||||
get_sk_union()->update(sk);
|
||||
}
|
||||
}
|
||||
|
||||
// void readAndMerge(DB::ReadBuffer &)
|
||||
// {
|
||||
// LOG_WARNING(log, "readAndMerge() {}", sk_union.get_result().to_string());
|
||||
// }
|
||||
|
||||
void write(DB::WriteBuffer & out) const
|
||||
{
|
||||
if (!is_merged)
|
||||
if (sk_update)
|
||||
{
|
||||
auto bytes = sk_update.compact().serialize();
|
||||
auto bytes = sk_update->compact().serialize();
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
else if (sk_union)
|
||||
{
|
||||
auto bytes = sk_union->get_result().serialize();
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto bytes = sk_union.get_result().serialize();
|
||||
datasketches::compact_theta_sketch::vector_bytes bytes;
|
||||
writeVectorBinary(bytes, out);
|
||||
}
|
||||
|
||||
LOG_WARNING(log, "write() {}", sk_union.get_result().to_string());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -13,7 +13,7 @@ static bool isUniq(const ASTFunction & func)
|
||||
{
|
||||
return func.name == "uniq" || func.name == "uniqExact" || func.name == "uniqHLL12"
|
||||
|| func.name == "uniqCombined" || func.name == "uniqCombined64"
|
||||
|| func.name == "uniqThetaSketchState";
|
||||
|| func.name == "uniqThetaSketch";
|
||||
}
|
||||
|
||||
/// Remove injective functions of one argument: replace with a child
|
||||
|
@ -1065,3 +1065,217 @@ uniqCombined remote()
|
||||
1
|
||||
1
|
||||
1
|
||||
uniqThetaSketch
|
||||
1 1
|
||||
3 1
|
||||
6 1
|
||||
7 1
|
||||
9 1
|
||||
11 1
|
||||
14 1
|
||||
17 1
|
||||
19 1
|
||||
20 2
|
||||
26 1
|
||||
31 1
|
||||
35 1
|
||||
36 1
|
||||
0 162
|
||||
1 162
|
||||
3 162
|
||||
6 162
|
||||
7 163
|
||||
9 163
|
||||
10 81
|
||||
11 163
|
||||
13 162
|
||||
14 162
|
||||
17 162
|
||||
19 162
|
||||
20 162
|
||||
21 162
|
||||
22 162
|
||||
26 162
|
||||
31 162
|
||||
35 162
|
||||
36 162
|
||||
0 55018
|
||||
1 54020
|
||||
3 53774
|
||||
6 53947
|
||||
7 53839
|
||||
9 54408
|
||||
10 26876
|
||||
11 54985
|
||||
13 53479
|
||||
14 53516
|
||||
17 53331
|
||||
19 53680
|
||||
20 54211
|
||||
21 53054
|
||||
22 54690
|
||||
26 53716
|
||||
31 54139
|
||||
35 52331
|
||||
36 53766
|
||||
uniqThetaSketch round(float)
|
||||
0.125 1
|
||||
0.5 1
|
||||
0.05 1
|
||||
0.143 1
|
||||
0.056 1
|
||||
0.048 2
|
||||
0.083 1
|
||||
0.25 1
|
||||
0.1 1
|
||||
0.028 1
|
||||
0.027 1
|
||||
0.031 1
|
||||
0.067 1
|
||||
0.037 1
|
||||
0.045 162
|
||||
0.125 163
|
||||
0.5 162
|
||||
0.05 162
|
||||
0.143 162
|
||||
0.091 81
|
||||
0.056 162
|
||||
0.048 162
|
||||
0.083 163
|
||||
0.25 162
|
||||
1 162
|
||||
0.1 163
|
||||
0.028 162
|
||||
0.027 162
|
||||
0.031 162
|
||||
0.067 162
|
||||
0.043 162
|
||||
0.037 162
|
||||
0.071 162
|
||||
0.045 53054
|
||||
0.125 53839
|
||||
0.5 54020
|
||||
0.05 53680
|
||||
0.143 53947
|
||||
0.091 26876
|
||||
0.056 53331
|
||||
0.048 54211
|
||||
0.083 54985
|
||||
0.25 53774
|
||||
1 55018
|
||||
0.1 54408
|
||||
0.028 52331
|
||||
0.027 53766
|
||||
0.031 54139
|
||||
0.067 53516
|
||||
0.043 54690
|
||||
0.037 53716
|
||||
0.071 53479
|
||||
uniqThetaSketch round(toFloat32())
|
||||
0.5 1
|
||||
0.05 1
|
||||
0.25 1
|
||||
0.048 2
|
||||
0.083 1
|
||||
0.125 1
|
||||
0.031 1
|
||||
0.143 1
|
||||
0.028 1
|
||||
0.067 1
|
||||
0.027 1
|
||||
0.056 1
|
||||
0.037 1
|
||||
0.1 1
|
||||
0.5 162
|
||||
0.05 162
|
||||
0.25 162
|
||||
0.048 162
|
||||
0.091 81
|
||||
0.043 162
|
||||
0.071 162
|
||||
0.083 163
|
||||
0.125 163
|
||||
0.031 162
|
||||
0.143 162
|
||||
0.028 162
|
||||
0.067 162
|
||||
0.045 162
|
||||
0.027 162
|
||||
0.056 162
|
||||
0.037 162
|
||||
0.1 163
|
||||
1 162
|
||||
0.5 54020
|
||||
0.05 53680
|
||||
0.25 53774
|
||||
0.048 54211
|
||||
0.091 26876
|
||||
0.043 54690
|
||||
0.071 53479
|
||||
0.083 54985
|
||||
0.125 53839
|
||||
0.031 54139
|
||||
0.143 53947
|
||||
0.028 52331
|
||||
0.067 53516
|
||||
0.045 53054
|
||||
0.027 53766
|
||||
0.056 53331
|
||||
0.037 53716
|
||||
0.1 54408
|
||||
1 55018
|
||||
uniqThetaSketch IPv4NumToString
|
||||
1 1
|
||||
3 1
|
||||
6 1
|
||||
7 1
|
||||
9 1
|
||||
11 1
|
||||
14 1
|
||||
17 1
|
||||
19 1
|
||||
20 2
|
||||
26 1
|
||||
31 1
|
||||
35 1
|
||||
36 1
|
||||
0 162
|
||||
1 162
|
||||
3 162
|
||||
6 162
|
||||
7 163
|
||||
9 163
|
||||
10 81
|
||||
11 163
|
||||
13 162
|
||||
14 162
|
||||
17 162
|
||||
19 162
|
||||
20 162
|
||||
21 162
|
||||
22 162
|
||||
26 162
|
||||
31 162
|
||||
35 162
|
||||
36 162
|
||||
0 54929
|
||||
1 53802
|
||||
3 54706
|
||||
6 54700
|
||||
7 53592
|
||||
9 54036
|
||||
10 27392
|
||||
11 53768
|
||||
13 54566
|
||||
14 53104
|
||||
17 54243
|
||||
19 55003
|
||||
20 53398
|
||||
21 53831
|
||||
22 54603
|
||||
26 54607
|
||||
31 54012
|
||||
35 54826
|
||||
36 54910
|
||||
uniqThetaSketch remote()
|
||||
1
|
||||
|
@ -132,3 +132,33 @@ SELECT uniqCombined(dummy) FROM remote('127.0.0.{2,3}', system.one);
|
||||
SELECT uniqCombined(12)(dummy) FROM remote('127.0.0.{2,3}', system.one);
|
||||
SELECT uniqCombined(17)(dummy) FROM remote('127.0.0.{2,3}', system.one);
|
||||
SELECT uniqCombined(20)(dummy) FROM remote('127.0.0.{2,3}', system.one);
|
||||
|
||||
-- uniqThetaSketch
|
||||
|
||||
SELECT 'uniqThetaSketch';
|
||||
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
|
||||
|
||||
SELECT 'uniqThetaSketch round(float)';
|
||||
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
|
||||
|
||||
SELECT 'uniqThetaSketch round(toFloat32())';
|
||||
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
|
||||
|
||||
SELECT 'uniqThetaSketch IPv4NumToString';
|
||||
|
||||
SELECT Y, uniqThetaSketch(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y;
|
||||
SELECT Y, uniqThetaSketch(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y;
|
||||
|
||||
SELECT 'uniqThetaSketch remote()';
|
||||
|
||||
SELECT uniqThetaSketch(dummy) FROM remote('127.0.0.{2,3}', system.one);
|
||||
|
@ -1,11 +1,11 @@
|
||||
10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6
|
||||
17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 766
|
||||
52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 766
|
||||
5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 765
|
||||
9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 765
|
||||
13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 765
|
||||
46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 765
|
||||
48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 765
|
||||
50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765
|
||||
54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 765
|
||||
56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 765
|
||||
10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 100 100 1000 1000 10 10 101 101 1006 1006 10 10 100 100 1000 1000 6 6 6 6 6 6 10 10 100 100 1000 1000
|
||||
17 10 10 100 100 610 610 10 10 100 100 610 610 10 10 100 100 610 610 10 10 101 101 616 616 10 10 100 100 610 610 6 6 6 6 6 6 10 10 100 100 610 610 766
|
||||
52 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 611 611 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 766
|
||||
5 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 608 608 10 10 100 100 609 609 6 6 6 6 6 6 10 10 100 100 608 608 765
|
||||
9 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 618 618 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 765
|
||||
13 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 610 610 10 10 100 100 607 607 6 6 6 6 6 6 10 10 100 100 607 607 765
|
||||
46 10 10 100 100 607 607 10 10 100 100 607 607 10 10 100 100 607 607 10 10 101 101 611 611 10 10 100 100 607 607 6 6 6 6 6 6 10 10 100 100 607 607 765
|
||||
48 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 617 617 10 10 100 100 609 609 6 6 6 6 6 6 10 10 100 100 609 609 765
|
||||
50 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 765
|
||||
54 10 10 100 100 609 609 10 10 100 100 609 609 10 10 100 100 609 609 10 10 101 101 615 615 10 10 100 100 609 609 6 6 6 6 6 6 10 10 100 100 609 609 765
|
||||
56 10 10 100 100 608 608 10 10 100 100 608 608 10 10 100 100 608 608 10 10 101 101 614 614 10 10 100 100 608 608 6 6 6 6 6 6 10 10 100 100 608 608 765
|
||||
|
@ -4,7 +4,8 @@ SELECT
|
||||
uniqCombined(17)(x), uniqCombined(17)((x)), uniqCombined(17)(x, y), uniqCombined(17)((x, y)), uniqCombined(17)(x, y, z), uniqCombined(17)((x, y, z)),
|
||||
uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)),
|
||||
uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)),
|
||||
uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z))
|
||||
uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)),
|
||||
uniqThetaSketch(x), uniqThetaSketch((x)), uniqThetaSketch(x, y), uniqThetaSketch((x, y)), uniqThetaSketch(x, y, z), uniqThetaSketch((x, y, z))
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
@ -22,6 +23,7 @@ SELECT k,
|
||||
uniqHLL12(x), uniqHLL12((x)), uniqHLL12(x, y), uniqHLL12((x, y)), uniqHLL12(x, y, z), uniqHLL12((x, y, z)),
|
||||
uniqExact(x), uniqExact((x)), uniqExact(x, y), uniqExact((x, y)), uniqExact(x, y, z), uniqExact((x, y, z)),
|
||||
uniqUpTo(5)(x), uniqUpTo(5)((x)), uniqUpTo(5)(x, y), uniqUpTo(5)((x, y)), uniqUpTo(5)(x, y, z), uniqUpTo(5)((x, y, z)),
|
||||
uniqThetaSketch(x), uniqThetaSketch((x)), uniqThetaSketch(x, y), uniqThetaSketch((x, y)), uniqThetaSketch(x, y, z), uniqThetaSketch((x, y, z)),
|
||||
count() AS c
|
||||
FROM
|
||||
(
|
||||
|
@ -4,3 +4,5 @@
|
||||
143
|
||||
123
|
||||
143
|
||||
123
|
||||
143
|
||||
|
@ -7,3 +7,6 @@ SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM
|
||||
SET count_distinct_implementation = 'uniqExact';
|
||||
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
||||
SET count_distinct_implementation = 'uniqThetaSketch';
|
||||
SELECT count(DISTINCT x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 1000);
|
||||
SELECT count(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000);
|
||||
|
@ -4,3 +4,5 @@
|
||||
3
|
||||
2
|
||||
3
|
||||
2
|
||||
3
|
||||
|
@ -4,3 +4,5 @@ SELECT uniqExactArray([0, 1, 1], [0, 1, 1], [0, 1, 1]);
|
||||
SELECT uniqExactArray([0, 1, 1], [0, 1, 1], [0, 1, 0]);
|
||||
SELECT uniqUpToArray(10)([0, 1, 1], [0, 1, 1], [0, 1, 1]);
|
||||
SELECT uniqUpToArray(10)([0, 1, 1], [0, 1, 1], [0, 1, 0]);
|
||||
SELECT uniqThetaSketchArray([0, 1, 1], [0, 1, 1], [0, 1, 1]);
|
||||
SELECT uniqThetaSketchArray([0, 1, 1], [0, 1, 1], [0, 1, 0]);
|
||||
|
@ -1,3 +1,4 @@
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
|
@ -1,3 +1,4 @@
|
||||
SELECT uniq(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);
|
||||
SELECT uniqExact(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);
|
||||
SELECT uniqUpTo(2)(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);
|
||||
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[1, 2], [1, 2], [1, 2, 3], []]) AS x);
|
||||
|
@ -25,3 +25,12 @@
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
3
|
||||
|
@ -27,3 +27,13 @@ SELECT uniqUpTo(3)((x, x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) A
|
||||
SELECT uniqUpTo(3)((x, arrayMap(elem -> [elem, elem], x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqUpTo(3)((x, toString(x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqUpTo(3)(x) FROM (SELECT arrayJoin([[], ['a'], ['a', NULL, 'b'], []]) AS x);
|
||||
|
||||
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[[]], [['a', 'b']], [['a'], ['b']], [['a', 'b']]]) AS x);
|
||||
SELECT uniqThetaSketch(x, x) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqThetaSketch(x, arrayMap(elem -> [elem, elem], x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqThetaSketch(x, toString(x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqThetaSketch((x, x)) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqThetaSketch((x, arrayMap(elem -> [elem, elem], x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqThetaSketch((x, toString(x))) FROM (SELECT arrayJoin([[], ['a'], ['a', 'b'], []]) AS x);
|
||||
SELECT uniqThetaSketch(x) FROM (SELECT arrayJoin([[], ['a'], ['a', NULL, 'b'], []]) AS x);
|
||||
|
@ -5,7 +5,7 @@
|
||||
-1275.0000 -424.99999983 -255.00000000 -1275.0000 -424.99999983 -255.00000000
|
||||
101.0000 101.00000000 101.00000000 101.0000 101.00000000 101.00000000
|
||||
-101.0000 -101.00000000 -101.00000000 -101.0000 -101.00000000 -101.00000000
|
||||
(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101)
|
||||
(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101) (101,101,101)
|
||||
5 5 5
|
||||
10 10 10
|
||||
-50.0000 -50.0000 -16.66666666 -16.66666666 -10.00000000 -10.00000000
|
||||
|
@ -24,7 +24,8 @@ SELECT (uniq(a), uniq(b), uniq(c)),
|
||||
(uniqCombined(a), uniqCombined(b), uniqCombined(c)),
|
||||
(uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
|
||||
(uniqExact(a), uniqExact(b), uniqExact(c)),
|
||||
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c))
|
||||
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)),
|
||||
(uniqThetaSketch(a), uniqThetaSketch(b), uniqThetaSketch(c))
|
||||
FROM (SELECT * FROM decimal ORDER BY a);
|
||||
|
||||
SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 5;
|
||||
|
@ -5,7 +5,7 @@
|
||||
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
|
||||
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
|
||||
0.0000 0.0000000 0.00000000 0.0000 0.0000000 0.00000000
|
||||
(0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0)
|
||||
(0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0) (0,0,0)
|
||||
0 0 0
|
||||
0 0 0
|
||||
0.0000 0.0000 0.0000000 0.0000000 0.00000000 0.00000000
|
||||
|
@ -20,7 +20,8 @@ SELECT (uniq(a), uniq(b), uniq(c)),
|
||||
(uniqCombined(a), uniqCombined(b), uniqCombined(c)),
|
||||
(uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
|
||||
(uniqExact(a), uniqExact(b), uniqExact(c)),
|
||||
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c))
|
||||
(uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)),
|
||||
(uniqThetaSketch(a), uniqThetaSketch(b), uniqThetaSketch(c))
|
||||
FROM (SELECT * FROM decimal ORDER BY a);
|
||||
|
||||
SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 5;
|
||||
|
@ -3,7 +3,8 @@ SELECT
|
||||
uniqExact(x),
|
||||
uniqHLL12(x),
|
||||
uniqCombined(x),
|
||||
uniqCombined64(x)
|
||||
uniqCombined64(x),
|
||||
uniqThetaSketch(x)
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -14,7 +15,8 @@ SELECT
|
||||
uniqExact(x + y),
|
||||
uniqHLL12(x + y),
|
||||
uniqCombined(x + y),
|
||||
uniqCombined64(x + y)
|
||||
uniqCombined64(x + y),
|
||||
uniqThetaSketch(x + y)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
@ -27,7 +29,8 @@ SELECT
|
||||
uniqExact(x),
|
||||
uniqHLL12(x),
|
||||
uniqCombined(x),
|
||||
uniqCombined64(x)
|
||||
uniqCombined64(x),
|
||||
uniqThetaSketch(x)
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -38,7 +41,8 @@ SELECT
|
||||
uniqExact(x),
|
||||
uniqHLL12(x),
|
||||
uniqCombined(x),
|
||||
uniqCombined64(x)
|
||||
uniqCombined64(x),
|
||||
uniqThetaSketch(x)
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -49,7 +53,8 @@ SELECT
|
||||
uniqExact(x),
|
||||
uniqHLL12(x),
|
||||
uniqCombined(x),
|
||||
uniqCombined64(x)
|
||||
uniqCombined64(x),
|
||||
uniqThetaSketch(x)
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -60,7 +65,8 @@ SELECT
|
||||
uniqExact(x),
|
||||
uniqHLL12(x),
|
||||
uniqCombined(x),
|
||||
uniqCombined64(x)
|
||||
uniqCombined64(x),
|
||||
uniqThetaSketch(x)
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -79,7 +85,8 @@ SELECT
|
||||
uniqExact(x),
|
||||
uniqHLL12(x),
|
||||
uniqCombined(x),
|
||||
uniqCombined64(x)
|
||||
uniqCombined64(x),
|
||||
uniqThetaSketch(x)
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -90,7 +97,8 @@ SELECT
|
||||
uniqExact(x + y),
|
||||
uniqHLL12(x + y),
|
||||
uniqCombined(x + y),
|
||||
uniqCombined64(x + y)
|
||||
uniqCombined64(x + y),
|
||||
uniqThetaSketch(x + y)
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
@ -103,7 +111,8 @@ SELECT
|
||||
uniqExact(-x),
|
||||
uniqHLL12(-x),
|
||||
uniqCombined(-x),
|
||||
uniqCombined64(-x)
|
||||
uniqCombined64(-x),
|
||||
uniqThetaSketch(-x)
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -114,7 +123,8 @@ SELECT
|
||||
uniqExact(bitNot(x)),
|
||||
uniqHLL12(bitNot(x)),
|
||||
uniqCombined(bitNot(x)),
|
||||
uniqCombined64(bitNot(x))
|
||||
uniqCombined64(bitNot(x)),
|
||||
uniqThetaSketch(bitNot(x))
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -125,7 +135,8 @@ SELECT
|
||||
uniqExact(bitNot(-x)),
|
||||
uniqHLL12(bitNot(-x)),
|
||||
uniqCombined(bitNot(-x)),
|
||||
uniqCombined64(bitNot(-x))
|
||||
uniqCombined64(bitNot(-x)),
|
||||
uniqThetaSketch(bitNot(-x))
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
@ -136,7 +147,8 @@ SELECT
|
||||
uniqExact(-bitNot(-x)),
|
||||
uniqHLL12(-bitNot(-x)),
|
||||
uniqCombined(-bitNot(-x)),
|
||||
uniqCombined64(-bitNot(-x))
|
||||
uniqCombined64(-bitNot(-x)),
|
||||
uniqThetaSketch(-bitNot(-x))
|
||||
FROM
|
||||
(
|
||||
SELECT number % 2 AS x
|
||||
|
@ -1,21 +1,21 @@
|
||||
set optimize_injective_functions_inside_uniq = 1;
|
||||
|
||||
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x)
|
||||
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x), uniqThetaSketch(x)
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y)
|
||||
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y), uniqThetaSketch(x + y)
|
||||
from (select number % 2 as x, number % 3 y from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x)
|
||||
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x), uniqThetaSketch(-x)
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x))
|
||||
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x)), uniqThetaSketch(bitNot(x))
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x))
|
||||
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x)), uniqThetaSketch(bitNot(-x))
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x))
|
||||
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x)), uniqThetaSketch(-bitNot(-x))
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select count(distinct -bitNot(-x)) from (select number % 2 as x from numbers(10));
|
||||
@ -24,22 +24,22 @@ EXPLAIN SYNTAX select uniq(concatAssumeInjective('x', 'y')) from numbers(10);
|
||||
|
||||
set optimize_injective_functions_inside_uniq = 0;
|
||||
|
||||
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x)
|
||||
EXPLAIN SYNTAX select uniq(x), uniqExact(x), uniqHLL12(x), uniqCombined(x), uniqCombined64(x), uniqThetaSketch(x)
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y)
|
||||
EXPLAIN SYNTAX select uniq(x + y), uniqExact(x + y), uniqHLL12(x + y), uniqCombined(x + y), uniqCombined64(x + y), uniqThetaSketch(x + y)
|
||||
from (select number % 2 as x, number % 3 y from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x)
|
||||
EXPLAIN SYNTAX select uniq(-x), uniqExact(-x), uniqHLL12(-x), uniqCombined(-x), uniqCombined64(-x), uniqThetaSketch(-x)
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x))
|
||||
EXPLAIN SYNTAX select uniq(bitNot(x)), uniqExact(bitNot(x)), uniqHLL12(bitNot(x)), uniqCombined(bitNot(x)), uniqCombined64(bitNot(x)), uniqThetaSketch(bitNot(x))
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x))
|
||||
EXPLAIN SYNTAX select uniq(bitNot(-x)), uniqExact(bitNot(-x)), uniqHLL12(bitNot(-x)), uniqCombined(bitNot(-x)), uniqCombined64(bitNot(-x)), uniqThetaSketch(bitNot(-x))
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x))
|
||||
EXPLAIN SYNTAX select uniq(-bitNot(-x)), uniqExact(-bitNot(-x)), uniqHLL12(-bitNot(-x)), uniqCombined(-bitNot(-x)), uniqCombined64(-bitNot(-x)), uniqThetaSketch(-bitNot(-x))
|
||||
from (select number % 2 as x from numbers(10));
|
||||
|
||||
EXPLAIN SYNTAX select count(distinct -bitNot(-x)) from (select number % 2 as x from numbers(10));
|
||||
|
125
tests/queries/0_stateless/01781_uniq_theta_sketch.reference
Normal file
125
tests/queries/0_stateless/01781_uniq_theta_sketch.reference
Normal file
@ -0,0 +1,125 @@
|
||||
1000 1000
|
||||
2014-06-01 1000 1000
|
||||
1000 1000
|
||||
2014-06-01 1000 1000
|
||||
2014-06-01 0 0 7 7
|
||||
2014-06-01 0 1 7 7
|
||||
2014-06-01 0 2 7 7
|
||||
2014-06-01 0 3 7 7
|
||||
2014-06-01 0 4 7 7
|
||||
2014-06-01 0 5 7 7
|
||||
2014-06-01 0 6 7 7
|
||||
2014-06-01 0 7 7 7
|
||||
2014-06-01 0 8 7 7
|
||||
2014-06-01 0 9 7 7
|
||||
2014-06-01 1 10 7 7
|
||||
2014-06-01 1 11 7 7
|
||||
2014-06-01 1 12 7 7
|
||||
2014-06-01 1 13 7 7
|
||||
2014-06-01 1 14 7 7
|
||||
2014-06-01 1 15 7 7
|
||||
2014-06-01 1 16 7 7
|
||||
2014-06-01 1 17 7 7
|
||||
2014-06-01 1 18 7 7
|
||||
2014-06-01 1 19 7 7
|
||||
2014-06-01 2 20 7 7
|
||||
2014-06-01 2 21 7 7
|
||||
2014-06-01 2 22 7 7
|
||||
2014-06-01 2 23 7 7
|
||||
2014-06-01 2 24 7 7
|
||||
2014-06-01 2 25 7 7
|
||||
2014-06-01 2 26 7 7
|
||||
2014-06-01 2 27 7 7
|
||||
2014-06-01 2 28 7 7
|
||||
2014-06-01 2 29 7 7
|
||||
2014-06-01 3 30 7 7
|
||||
2014-06-01 3 31 7 7
|
||||
2014-06-01 3 32 7 7
|
||||
2014-06-01 3 33 7 7
|
||||
2014-06-01 3 34 7 7
|
||||
2014-06-01 3 35 7 7
|
||||
2014-06-01 3 36 7 7
|
||||
2014-06-01 3 37 7 7
|
||||
2014-06-01 3 38 7 7
|
||||
2014-06-01 3 39 7 7
|
||||
2014-06-01 4 40 7 7
|
||||
2014-06-01 4 41 7 7
|
||||
2014-06-01 4 42 7 7
|
||||
2014-06-01 4 43 7 7
|
||||
2014-06-01 4 44 7 7
|
||||
2014-06-01 4 45 7 7
|
||||
2014-06-01 4 46 7 7
|
||||
2014-06-01 4 47 7 7
|
||||
2014-06-01 4 48 7 7
|
||||
2014-06-01 4 49 7 7
|
||||
2014-06-01 5 50 7 7
|
||||
2014-06-01 5 51 7 7
|
||||
2014-06-01 5 52 7 7
|
||||
2014-06-01 5 53 7 7
|
||||
2014-06-01 5 54 7 7
|
||||
2014-06-01 5 55 7 7
|
||||
2014-06-01 5 56 7 7
|
||||
2014-06-01 5 57 7 7
|
||||
2014-06-01 5 58 7 7
|
||||
2014-06-01 5 59 7 7
|
||||
2014-06-01 6 60 7 7
|
||||
2014-06-01 6 61 7 7
|
||||
2014-06-01 6 62 7 7
|
||||
2014-06-01 6 63 7 7
|
||||
2014-06-01 6 64 7 7
|
||||
2014-06-01 6 65 7 7
|
||||
2014-06-01 6 66 7 7
|
||||
2014-06-01 6 67 7 7
|
||||
2014-06-01 6 68 7 7
|
||||
2014-06-01 6 69 7 7
|
||||
2014-06-01 7 70 7 7
|
||||
2014-06-01 7 71 7 7
|
||||
2014-06-01 7 72 7 7
|
||||
2014-06-01 7 73 7 7
|
||||
2014-06-01 7 74 7 7
|
||||
2014-06-01 7 75 7 7
|
||||
2014-06-01 7 76 7 7
|
||||
2014-06-01 7 77 7 7
|
||||
2014-06-01 7 78 7 7
|
||||
2014-06-01 7 79 7 7
|
||||
2014-06-01 8 80 7 7
|
||||
2014-06-01 8 81 7 7
|
||||
2014-06-01 8 82 7 7
|
||||
2014-06-01 8 83 7 7
|
||||
2014-06-01 8 84 7 7
|
||||
2014-06-01 8 85 7 7
|
||||
2014-06-01 8 86 7 7
|
||||
2014-06-01 8 87 7 7
|
||||
2014-06-01 8 88 7 7
|
||||
2014-06-01 8 89 7 7
|
||||
2014-06-01 9 90 7 7
|
||||
2014-06-01 9 91 7 7
|
||||
2014-06-01 9 92 7 7
|
||||
2014-06-01 9 93 7 7
|
||||
2014-06-01 9 94 7 7
|
||||
2014-06-01 9 95 7 7
|
||||
2014-06-01 9 96 7 7
|
||||
2014-06-01 9 97 7 7
|
||||
2014-06-01 9 98 7 7
|
||||
2014-06-01 9 99 7 7
|
||||
2014-06-01 0 7 7
|
||||
2014-06-01 1 7 7
|
||||
2014-06-01 2 7 7
|
||||
2014-06-01 3 7 7
|
||||
2014-06-01 4 7 7
|
||||
2014-06-01 5 7 7
|
||||
2014-06-01 6 7 7
|
||||
2014-06-01 7 7 7
|
||||
2014-06-01 8 7 7
|
||||
2014-06-01 9 7 7
|
||||
2014-06-01 7 7
|
||||
0 333333 53 53
|
||||
1 333333 53 53
|
||||
2 333333 53 53
|
||||
0 333333 53 53
|
||||
1 333333 53 53
|
||||
2 333333 53 53
|
||||
10000000
|
||||
10021957
|
||||
10021969
|
||||
10094819
|
114
tests/queries/0_stateless/01781_uniq_theta_sketch.sql
Normal file
114
tests/queries/0_stateless/01781_uniq_theta_sketch.sql
Normal file
@ -0,0 +1,114 @@
|
||||
DROP TABLE IF EXISTS stored_aggregates;
|
||||
|
||||
-- simple
|
||||
CREATE TABLE stored_aggregates
|
||||
(
|
||||
d Date,
|
||||
Uniq AggregateFunction(uniq, UInt64),
|
||||
UniqThetaSketch AggregateFunction(uniqThetaSketch, UInt64)
|
||||
)
|
||||
ENGINE = AggregatingMergeTree(d, d, 8192);
|
||||
|
||||
INSERT INTO stored_aggregates
|
||||
SELECT
|
||||
toDate('2014-06-01') AS d,
|
||||
uniqState(number) AS Uniq,
|
||||
uniqThetaSketchState(number) AS UniqThetaSketch
|
||||
FROM
|
||||
(
|
||||
SELECT * FROM system.numbers LIMIT 1000
|
||||
);
|
||||
|
||||
SELECT uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates;
|
||||
|
||||
SELECT d, uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates GROUP BY d ORDER BY d;
|
||||
|
||||
OPTIMIZE TABLE stored_aggregates;
|
||||
|
||||
SELECT uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates;
|
||||
|
||||
SELECT d, uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch) FROM stored_aggregates GROUP BY d ORDER BY d;
|
||||
|
||||
DROP TABLE stored_aggregates;
|
||||
|
||||
-- complex
|
||||
CREATE TABLE stored_aggregates
|
||||
(
|
||||
d Date,
|
||||
k1 UInt64,
|
||||
k2 String,
|
||||
Uniq AggregateFunction(uniq, UInt64),
|
||||
UniqThetaSketch AggregateFunction(uniqThetaSketch, UInt64)
|
||||
)
|
||||
ENGINE = AggregatingMergeTree(d, (d, k1, k2), 8192);
|
||||
|
||||
INSERT INTO stored_aggregates
|
||||
SELECT
|
||||
toDate('2014-06-01') AS d,
|
||||
intDiv(number, 100) AS k1,
|
||||
toString(intDiv(number, 10)) AS k2,
|
||||
uniqState(toUInt64(number % 7)) AS Uniq,
|
||||
uniqThetaSketchState(toUInt64(number % 7)) AS UniqThetaSketch
|
||||
FROM
|
||||
(
|
||||
SELECT * FROM system.numbers LIMIT 1000
|
||||
)
|
||||
GROUP BY d, k1, k2
|
||||
ORDER BY d, k1, k2;
|
||||
|
||||
SELECT d, k1, k2,
|
||||
uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch)
|
||||
FROM stored_aggregates
|
||||
GROUP BY d, k1, k2
|
||||
ORDER BY d, k1, k2;
|
||||
|
||||
SELECT d, k1,
|
||||
uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch)
|
||||
FROM stored_aggregates
|
||||
GROUP BY d, k1
|
||||
ORDER BY d, k1;
|
||||
|
||||
SELECT d,
|
||||
uniqMerge(Uniq), uniqThetaSketchMerge(UniqThetaSketch)
|
||||
FROM stored_aggregates
|
||||
GROUP BY d
|
||||
ORDER BY d;
|
||||
|
||||
DROP TABLE stored_aggregates;
|
||||
|
||||
---- sum + uniq with more data
|
||||
drop table if exists summing_merge_tree_null;
|
||||
drop table if exists summing_merge_tree_aggregate_function;
|
||||
create table summing_merge_tree_null (
|
||||
d materialized today(),
|
||||
k UInt64,
|
||||
c UInt64,
|
||||
u UInt64
|
||||
) engine=Null;
|
||||
|
||||
create materialized view summing_merge_tree_aggregate_function (
|
||||
d Date,
|
||||
k UInt64,
|
||||
c UInt64,
|
||||
un AggregateFunction(uniq, UInt64),
|
||||
ut AggregateFunction(uniqThetaSketch, UInt64)
|
||||
) engine=SummingMergeTree(d, k, 8192)
|
||||
as select d, k, sum(c) as c, uniqState(u) as un, uniqThetaSketchState(u) as ut
|
||||
from summing_merge_tree_null
|
||||
group by d, k;
|
||||
|
||||
-- prime number 53 to avoid resonanse between %3 and %53
|
||||
insert into summing_merge_tree_null select number % 3, 1, number % 53 from numbers(999999);
|
||||
|
||||
select k, sum(c), uniqMerge(un), uniqThetaSketchMerge(ut) from summing_merge_tree_aggregate_function group by k order by k;
|
||||
optimize table summing_merge_tree_aggregate_function;
|
||||
select k, sum(c), uniqMerge(un), uniqThetaSketchMerge(ut) from summing_merge_tree_aggregate_function group by k order by k;
|
||||
|
||||
drop table summing_merge_tree_aggregate_function;
|
||||
drop table summing_merge_tree_null;
|
||||
|
||||
-- precise
|
||||
SELECT uniqExact(number) FROM numbers(1e7);
|
||||
SELECT uniqCombined(number) FROM numbers(1e7);
|
||||
SELECT uniqCombined64(number) FROM numbers(1e7);
|
||||
SELECT uniqThetaSketch(number) FROM numbers(1e7);
|
Loading…
Reference in New Issue
Block a user