more comments

This commit is contained in:
Han Fei 2023-09-27 21:46:20 +02:00
parent 6ce77b07f7
commit 18f6fd1e5f
3 changed files with 14 additions and 8 deletions

View File

@ -7,6 +7,7 @@ namespace DB
class RPNBuilderTreeNode; class RPNBuilderTreeNode;
/// It estimates the selectivity of a condition.
class ConditionEstimator class ConditionEstimator
{ {
private: private:
@ -20,11 +21,13 @@ private:
UInt64 total_count = 0; UInt64 total_count = 0;
/// Minimum estimator for values in a part. It can contains multiple types of statistics.
/// But right now we only have tdigest;
struct PartColumnEstimator struct PartColumnEstimator
{ {
UInt64 part_count = 0; UInt64 part_count = 0;
std::shared_ptr<TDigestStatistic> t_digest; std::shared_ptr<TDigestStatistic> tdigest;
void merge(StatisticPtr statistic) void merge(StatisticPtr statistic)
{ {
@ -34,25 +37,27 @@ private:
if (typeid_cast<TDigestStatistic *>(statistic.get())) if (typeid_cast<TDigestStatistic *>(statistic.get()))
{ {
t_digest = std::static_pointer_cast<TDigestStatistic>(statistic); tdigest = std::static_pointer_cast<TDigestStatistic>(statistic);
} }
} }
Float64 estimateLess(Float64 val) const Float64 estimateLess(Float64 val) const
{ {
if (t_digest != nullptr) if (tdigest != nullptr)
return t_digest -> estimateLess(val); return tdigest -> estimateLess(val);
return part_count * default_normal_cond_factor; return part_count * default_normal_cond_factor;
} }
Float64 estimateGreator(Float64 val) const Float64 estimateGreator(Float64 val) const
{ {
if (t_digest != nullptr) if (tdigest != nullptr)
return part_count - t_digest -> estimateLess(val); return part_count - tdigest -> estimateLess(val);
return part_count * default_normal_cond_factor; return part_count * default_normal_cond_factor;
} }
}; };
/// An estimator for a column consists of several PartColumnEstimator.
/// We simply get selectivity for every part estimator and combine the result.
struct ColumnEstimator struct ColumnEstimator
{ {
std::map<std::string, PartColumnEstimator> estimators; std::map<std::string, PartColumnEstimator> estimators;

View File

@ -23,7 +23,7 @@ class IStatistic;
using StatisticPtr = std::shared_ptr<IStatistic>; using StatisticPtr = std::shared_ptr<IStatistic>;
using Statistics = std::vector<StatisticPtr>; using Statistics = std::vector<StatisticPtr>;
/// Statistic for a column /// Statistic contains the distribution of values in a column.
/// right now we support /// right now we support
/// - tdigest /// - tdigest
class IStatistic class IStatistic
@ -60,6 +60,7 @@ protected:
}; };
/// TDigestStatistic is a kind of histogram.
class TDigestStatistic : public IStatistic class TDigestStatistic : public IStatistic
{ {
QuantileTDigest<Float64> data; QuantileTDigest<Float64> data;

View File

@ -45,7 +45,7 @@ create temporary table known_short_messages (s String) as select * from (select
'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}',
'Attempt to read after eof', 'String size is too big ({}), maximum: {}', 'Attempt to read after eof', 'String size is too big ({}), maximum: {}',
'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}', 'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}',
'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64' 'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64',
'Unknown statistic column: {}' 'Unknown statistic column: {}'
] as arr) array join arr; ] as arr) array join arr;