mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 13:02:00 +00:00
wip #14893
This commit is contained in:
parent
48b745206c
commit
1ac42e003c
@ -19,16 +19,17 @@ class ThetaSketchData : private boost::noncopyable
|
|||||||
private:
|
private:
|
||||||
mutable datasketches::update_theta_sketch sk_update;
|
mutable datasketches::update_theta_sketch sk_update;
|
||||||
mutable datasketches::theta_union sk_union;
|
mutable datasketches::theta_union sk_union;
|
||||||
|
bool is_merged;
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
|
|
||||||
void internal_merge() const
|
// void internal_merge() const
|
||||||
{
|
// {
|
||||||
if (!sk_update.is_empty())
|
// if (!sk_update.is_empty())
|
||||||
{
|
// {
|
||||||
sk_union.update(sk_update);
|
// sk_union.update(sk_update);
|
||||||
sk_update = datasketches::update_theta_sketch::builder().build();
|
// sk_update = datasketches::update_theta_sketch::builder().build();
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using value_type = Key;
|
using value_type = Key;
|
||||||
@ -36,63 +37,84 @@ public:
|
|||||||
ThetaSketchData()
|
ThetaSketchData()
|
||||||
: sk_update(datasketches::update_theta_sketch::builder().build()),
|
: sk_update(datasketches::update_theta_sketch::builder().build()),
|
||||||
sk_union(datasketches::theta_union::builder().build()),
|
sk_union(datasketches::theta_union::builder().build()),
|
||||||
|
is_merged(false),
|
||||||
log(&Poco::Logger::get("ThetaSketchData"))
|
log(&Poco::Logger::get("ThetaSketchData"))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
~ThetaSketchData() = default;
|
~ThetaSketchData() = default;
|
||||||
|
|
||||||
/// Insert original value without hash, as `datasketches::update_theta_sketch.update` will do the hash internal.
|
/// Insert original value without hash, as `datasketches::update_theta_sketch.update` will do the hash internal.
|
||||||
void insert_original(StringRef value)
|
void insert_original(const StringRef & value)
|
||||||
{
|
{
|
||||||
sk_update.update(value.data, value.size);
|
sk_update.update(value.data, value.size);
|
||||||
LOG_WARNING(log, "insert_origin() {}, {}", value.toString(), sk_update.to_string());
|
LOG_WARNING(log, "insert_origin() {}", value.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Note that `datasketches::update_theta_sketch.update` will do the hash again.
|
/// Note that `datasketches::update_theta_sketch.update` will do the hash again.
|
||||||
void insert(Key value)
|
void insert(Key value)
|
||||||
{
|
{
|
||||||
sk_update.update(value);
|
sk_update.update(value);
|
||||||
LOG_WARNING(log, "insert() {}, {}", value, sk_update.to_string());
|
LOG_WARNING(log, "insert() {}", value);
|
||||||
}
|
}
|
||||||
|
|
||||||
UInt64 size() const
|
UInt64 size() const
|
||||||
{
|
{
|
||||||
LOG_WARNING(log, "size() update:{}, union:{}", sk_update.get_estimate(), sk_union.get_result().get_estimate());
|
LOG_WARNING(log, "size() update:{}, union:{}", sk_update.get_estimate(), sk_union.get_result().get_estimate());
|
||||||
internal_merge();
|
if (!is_merged)
|
||||||
|
return static_cast<UInt64>(sk_update.get_estimate());
|
||||||
|
else
|
||||||
return static_cast<UInt64>(sk_union.get_result().get_estimate());
|
return static_cast<UInt64>(sk_union.get_result().get_estimate());
|
||||||
}
|
}
|
||||||
|
|
||||||
void merge(const ThetaSketchData & rhs)
|
void merge(const ThetaSketchData & rhs)
|
||||||
{
|
{
|
||||||
rhs.internal_merge();
|
if (!is_merged && !sk_update.is_empty())
|
||||||
|
{
|
||||||
|
sk_union.update(sk_update);
|
||||||
|
}
|
||||||
|
is_merged = true;
|
||||||
|
|
||||||
|
if (!rhs.is_merged && !rhs.sk_update.is_empty())
|
||||||
|
sk_union.update(rhs.sk_update);
|
||||||
|
else if (rhs.is_merged)
|
||||||
sk_union.update(rhs.sk_union.get_result());
|
sk_union.update(rhs.sk_union.get_result());
|
||||||
|
|
||||||
LOG_WARNING(log, "merge() result:{}, rhs:{}", sk_union.get_result().to_string(), rhs.sk_union.get_result().to_string());
|
LOG_WARNING(log, "merge() result:{}", sk_union.get_result().to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// You can only call for an empty object.
|
/// You can only call for an empty object.
|
||||||
void read(DB::ReadBuffer & in)
|
void read(DB::ReadBuffer & in)
|
||||||
{
|
{
|
||||||
|
LOG_WARNING(log, "read() {}", sk_union.get_result().to_string());
|
||||||
|
|
||||||
datasketches::compact_theta_sketch::vector_bytes bytes;
|
datasketches::compact_theta_sketch::vector_bytes bytes;
|
||||||
readVectorBinary(bytes, in);
|
readVectorBinary(bytes, in);
|
||||||
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
||||||
|
|
||||||
sk_union = datasketches::theta_union::builder().build();
|
sk_union = datasketches::theta_union::builder().build();
|
||||||
sk_union.update(sk);
|
sk_union.update(sk);
|
||||||
|
is_merged = true;
|
||||||
|
|
||||||
LOG_WARNING(log, "read() {}", sk_union.get_result().to_string());
|
LOG_WARNING(log, "read()[after] {}", sk_union.get_result().to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
void readAndMerge(DB::ReadBuffer &)
|
// void readAndMerge(DB::ReadBuffer &)
|
||||||
{
|
// {
|
||||||
assert(0);
|
// LOG_WARNING(log, "readAndMerge() {}", sk_union.get_result().to_string());
|
||||||
}
|
// }
|
||||||
|
|
||||||
void write(DB::WriteBuffer & out) const
|
void write(DB::WriteBuffer & out) const
|
||||||
{
|
{
|
||||||
internal_merge();
|
if (!is_merged)
|
||||||
|
{
|
||||||
|
auto bytes = sk_update.compact().serialize();
|
||||||
|
writeVectorBinary(bytes, out);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
auto bytes = sk_union.get_result().serialize();
|
auto bytes = sk_union.get_result().serialize();
|
||||||
writeVectorBinary(bytes, out);
|
writeVectorBinary(bytes, out);
|
||||||
|
}
|
||||||
|
|
||||||
LOG_WARNING(log, "write() {}", sk_union.get_result().to_string());
|
LOG_WARNING(log, "write() {}", sk_union.get_result().to_string());
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,8 @@ namespace DB
|
|||||||
static bool isUniq(const ASTFunction & func)
|
static bool isUniq(const ASTFunction & func)
|
||||||
{
|
{
|
||||||
return func.name == "uniq" || func.name == "uniqExact" || func.name == "uniqHLL12"
|
return func.name == "uniq" || func.name == "uniqExact" || func.name == "uniqHLL12"
|
||||||
|| func.name == "uniqCombined" || func.name == "uniqCombined64";
|
|| func.name == "uniqCombined" || func.name == "uniqCombined64"
|
||||||
|
|| func.name == "uniqThetaSketchState";
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove injective functions of one argument: replace with a child
|
/// Remove injective functions of one argument: replace with a child
|
||||||
|
Loading…
Reference in New Issue
Block a user