diff --git a/src/Storages/Statistic/Estimator.h b/src/Storages/Statistic/Estimator.h index a52351f4879..53ea46cbfd5 100644 --- a/src/Storages/Statistic/Estimator.h +++ b/src/Storages/Statistic/Estimator.h @@ -7,6 +7,7 @@ namespace DB class RPNBuilderTreeNode; +/// It estimates the selectivity of a condition. class ConditionEstimator { private: @@ -20,11 +21,13 @@ private: UInt64 total_count = 0; + /// Minimum estimator for values in a part. It can contains multiple types of statistics. + /// But right now we only have tdigest; struct PartColumnEstimator { UInt64 part_count = 0; - std::shared_ptr t_digest; + std::shared_ptr tdigest; void merge(StatisticPtr statistic) { @@ -34,25 +37,27 @@ private: if (typeid_cast(statistic.get())) { - t_digest = std::static_pointer_cast(statistic); + tdigest = std::static_pointer_cast(statistic); } } Float64 estimateLess(Float64 val) const { - if (t_digest != nullptr) - return t_digest -> estimateLess(val); + if (tdigest != nullptr) + return tdigest -> estimateLess(val); return part_count * default_normal_cond_factor; } Float64 estimateGreator(Float64 val) const { - if (t_digest != nullptr) - return part_count - t_digest -> estimateLess(val); + if (tdigest != nullptr) + return part_count - tdigest -> estimateLess(val); return part_count * default_normal_cond_factor; } }; + /// An estimator for a column consists of several PartColumnEstimator. + /// We simply get selectivity for every part estimator and combine the result. struct ColumnEstimator { std::map estimators; diff --git a/src/Storages/Statistic/Statistic.h b/src/Storages/Statistic/Statistic.h index 0bb416b9feb..f474fffafa5 100644 --- a/src/Storages/Statistic/Statistic.h +++ b/src/Storages/Statistic/Statistic.h @@ -23,7 +23,7 @@ class IStatistic; using StatisticPtr = std::shared_ptr; using Statistics = std::vector; -/// Statistic for a column +/// Statistic contains the distribution of values in a column. /// right now we support /// - tdigest class IStatistic @@ -60,6 +60,7 @@ protected: }; +/// TDigestStatistic is a kind of histogram. class TDigestStatistic : public IStatistic { QuantileTDigest data; diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index aa7603f8c35..38626fc2928 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -45,7 +45,7 @@ create temporary table known_short_messages (s String) as select * from (select 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', 'Attempt to read after eof', 'String size is too big ({}), maximum: {}', 'Processed: {}%', 'Creating {}: {}', 'Table {}.{} doesn''t exist', 'Invalid cache key hex: {}', -'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64' +'User has been dropped', 'Illegal type {} of argument of function {}. Should be DateTime or DateTime64', 'Unknown statistic column: {}' ] as arr) array join arr;