more comments

This commit is contained in:
Han Fei 2023-09-27 21:46:20 +02:00
parent 6ce77b07f7
commit 18f6fd1e5f
3 changed files with 14 additions and 8 deletions

View File

@ -7,6 +7,7 @@ namespace DB
class RPNBuilderTreeNode;
/// It estimates the selectivity of a condition.
class ConditionEstimator
{
private:
@ -20,11 +21,13 @@ private:
UInt64 total_count = 0;
/// Minimum estimator for values in a part. It can contains multiple types of statistics.
/// But right now we only have tdigest;
struct PartColumnEstimator
{
UInt64 part_count = 0;
std::shared_ptr<TDigestStatistic> t_digest;
std::shared_ptr<TDigestStatistic> tdigest;
void merge(StatisticPtr statistic)
{
@ -34,25 +37,27 @@ private:
if (typeid_cast<TDigestStatistic *>(statistic.get()))
{
t_digest = std::static_pointer_cast<TDigestStatistic>(statistic);
tdigest = std::static_pointer_cast<TDigestStatistic>(statistic);
}
}
Float64 estimateLess(Float64 val) const
{
if (t_digest != nullptr)
return t_digest -> estimateLess(val);
if (tdigest != nullptr)
return tdigest -> estimateLess(val);
return part_count * default_normal_cond_factor;
}
Float64 estimateGreator(Float64 val) const
{
if (t_digest != nullptr)
return part_count - t_digest -> estimateLess(val);
if (tdigest != nullptr)
return part_count - tdigest -> estimateLess(val);
return part_count * default_normal_cond_factor;
}
};
/// An estimator for a column consists of several PartColumnEstimator.
/// We simply get selectivity for every part estimator and combine the result.
struct ColumnEstimator
{
std::map<std::string, PartColumnEstimator> estimators;

View File

@ -23,7 +23,7 @@ class IStatistic;
using StatisticPtr = std::shared_ptr<IStatistic>;
using Statistics = std::vector<StatisticPtr>;
/// Statistic for a column
/// Statistic contains the distribution of values in a column.
/// right now we support
/// - tdigest
class IStatistic
@ -60,6 +60,7 @@ protected:
};
/// TDigestStatistic is a kind of histogram.
class TDigestStatistic : public IStatistic
{
QuantileTDigest<Float64> data;

View File

@ -45,7 +45,7 @@ create temporary table known_short_messages (s String) as select * from (select
'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}',
'Attempt to read after eof', 'String size is too big ({}), maximum: {}',
'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}',
'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64'
'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64',
'Unknown statistic column: {}'
] as arr) array join arr;