Cosmetics II

This commit is contained in:
Robert Schulze 2024-08-06 12:33:18 +00:00
parent 2776a515ba
commit d09c82ff76
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
8 changed files with 36 additions and 34 deletions

View File

@ -1005,7 +1005,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
## Column-level Settings {#column-level-settings} ## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level: Certain MergeTree settings can be overridden at column level:
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.

View File

@ -89,15 +89,17 @@ Float64 IStatistics::estimateLess(const Field & /*val*/) const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
} }
/// ------------------------------------- /// Notes:
/// Implementation of the estimation: /// - Statistics object usually only support estimation for certain types of predicates, e.g.
/// Note: Each statistics object supports certain types predicates natively, e.g.
/// - TDigest: '< X' (less-than predicates) /// - TDigest: '< X' (less-than predicates)
/// - Count-min sketches: '= X' (equal predicates) /// - Count-min sketches: '= X' (equal predicates)
/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality) /// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality)
/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way. ///
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics /// If multiple statistics objects in a column support estimating a predicate, we want to try statistics in order of descending accuracy
/// object that supports it natively. /// (e.g. MinMax statistics are simpler than TDigest statistics and thus worse for estimating 'less' predicates).
///
/// Sometimes, it is possible to combine multiple statistics in a clever way. For that reason, all estimation are performed in a central
/// place (here), and we don't simply pass the predicate to the first statistics object that supports it natively.
Float64 ColumnStatistics::estimateLess(const Field & val) const Float64 ColumnStatistics::estimateLess(const Field & val) const
{ {

View File

@ -25,8 +25,8 @@ extern const int ILLEGAL_STATISTICS;
static constexpr auto num_hashes = 7uz; static constexpr auto num_hashes = 7uz;
static constexpr auto num_buckets = 2718uz; static constexpr auto num_buckets = 2718uz;
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_) StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & description, DataTypePtr data_type_)
: IStatistics(stat_) : IStatistics(description)
, sketch(num_hashes, num_buckets) , sketch(num_hashes, num_buckets)
, data_type(data_type_) , data_type(data_type_)
{ {
@ -84,7 +84,7 @@ void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
} }
void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) void countMinSketchStatisticsValidator(const SingleStatisticsDescription & /*description*/, DataTypePtr data_type)
{ {
data_type = removeNullable(data_type); data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type);
@ -92,9 +92,9 @@ void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, Data
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName()); throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
} }
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type)
{ {
return std::make_shared<StatisticsCountMinSketch>(stat, data_type); return std::make_shared<StatisticsCountMinSketch>(description, data_type);
} }
} }

View File

@ -14,7 +14,7 @@ namespace DB
class StatisticsCountMinSketch : public IStatistics class StatisticsCountMinSketch : public IStatistics
{ {
public: public:
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_); StatisticsCountMinSketch(const SingleStatisticsDescription & description, DataTypePtr data_type_);
Float64 estimateEqual(const Field & val) const override; Float64 estimateEqual(const Field & val) const override;
@ -31,8 +31,8 @@ private:
}; };
void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); void countMinSketchStatisticsValidator(const SingleStatisticsDescription & description, DataTypePtr data_type);
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr); StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type);
} }

View File

@ -10,8 +10,8 @@ extern const int ILLEGAL_STATISTICS;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_) StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & description)
: IStatistics(stat_) : IStatistics(description)
{ {
} }
@ -57,7 +57,7 @@ Float64 StatisticsTDigest::estimateEqual(const Field & val) const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName()); throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
} }
void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) void tdigestStatisticsValidator(const SingleStatisticsDescription & /*description*/, DataTypePtr data_type)
{ {
data_type = removeNullable(data_type); data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type);
@ -65,9 +65,9 @@ void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName()); throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
} }
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr) StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr /*data_type*/)
{ {
return std::make_shared<StatisticsTDigest>(stat); return std::make_shared<StatisticsTDigest>(description);
} }
} }

View File

@ -9,7 +9,7 @@ namespace DB
class StatisticsTDigest : public IStatistics class StatisticsTDigest : public IStatistics
{ {
public: public:
explicit StatisticsTDigest(const SingleStatisticsDescription & stat_); explicit StatisticsTDigest(const SingleStatisticsDescription & description);
void update(const ColumnPtr & column) override; void update(const ColumnPtr & column) override;
@ -23,7 +23,7 @@ private:
QuantileTDigest<Float64> t_digest; QuantileTDigest<Float64> t_digest;
}; };
void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); void tdigestStatisticsValidator(const SingleStatisticsDescription & description, DataTypePtr data_type);
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr); StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type);
} }

View File

@ -11,8 +11,8 @@ namespace ErrorCodes
extern const int ILLEGAL_STATISTICS; extern const int ILLEGAL_STATISTICS;
} }
StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type) StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type)
: IStatistics(stat_) : IStatistics(description)
{ {
arena = std::make_unique<Arena>(); arena = std::make_unique<Arena>();
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
@ -52,7 +52,7 @@ UInt64 StatisticsUniq::estimateCardinality() const
return column->getUInt(0); return column->getUInt(0);
} }
void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type) void uniqStatisticsValidator(const SingleStatisticsDescription & /*description*/, DataTypePtr data_type)
{ {
data_type = removeNullable(data_type); data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type); data_type = removeLowCardinalityAndNullable(data_type);
@ -60,9 +60,9 @@ void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr da
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName()); throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
} }
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type) StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type)
{ {
return std::make_shared<StatisticsUniq>(stat, data_type); return std::make_shared<StatisticsUniq>(description, data_type);
} }
} }

View File

@ -10,7 +10,7 @@ namespace DB
class StatisticsUniq : public IStatistics class StatisticsUniq : public IStatistics
{ {
public: public:
StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type); StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
~StatisticsUniq() override; ~StatisticsUniq() override;
void update(const ColumnPtr & column) override; void update(const ColumnPtr & column) override;
@ -27,7 +27,7 @@ private:
}; };
void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type); void uniqStatisticsValidator(const SingleStatisticsDescription & description, DataTypePtr data_type);
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type); StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type);
} }