Cosmetics II

This commit is contained in:
Robert Schulze 2024-08-06 12:33:18 +00:00
parent 2776a515ba
commit d09c82ff76
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
8 changed files with 36 additions and 34 deletions

View File

@ -1005,7 +1005,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:
Certain MergeTree settings can be overridden at column level:
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.

View File

@ -89,15 +89,17 @@ Float64 IStatistics::estimateLess(const Field & /*val*/) const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
}
/// -------------------------------------
/// Implementation of the estimation:
/// Note: Each statistics object supports certain types predicates natively, e.g.
/// - TDigest: '< X' (less-than predicates)
/// - Count-min sketches: '= X' (equal predicates)
/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality)
/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way.
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
/// object that supports it natively.
/// Notes:
/// - Statistics object usually only support estimation for certain types of predicates, e.g.
/// - TDigest: '< X' (less-than predicates)
/// - Count-min sketches: '= X' (equal predicates)
/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality)
///
/// If multiple statistics objects in a column support estimating a predicate, we want to try statistics in order of descending accuracy
/// (e.g. MinMax statistics are simpler than TDigest statistics and thus worse for estimating 'less' predicates).
///
/// Sometimes, it is possible to combine multiple statistics in a clever way. For that reason, all estimation are performed in a central
/// place (here), and we don't simply pass the predicate to the first statistics object that supports it natively.
Float64 ColumnStatistics::estimateLess(const Field & val) const
{

View File

@ -25,8 +25,8 @@ extern const int ILLEGAL_STATISTICS;
static constexpr auto num_hashes = 7uz;
static constexpr auto num_buckets = 2718uz;
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
: IStatistics(stat_)
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & description, DataTypePtr data_type_)
: IStatistics(description)
, sketch(num_hashes, num_buckets)
, data_type(data_type_)
{
@ -84,7 +84,7 @@ void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
}
void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void countMinSketchStatisticsValidator(const SingleStatisticsDescription & /*description*/, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
@ -92,9 +92,9 @@ void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, Data
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
}
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type)
{
return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
return std::make_shared<StatisticsCountMinSketch>(description, data_type);
}
}

View File

@ -14,7 +14,7 @@ namespace DB
class StatisticsCountMinSketch : public IStatistics
{
public:
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
StatisticsCountMinSketch(const SingleStatisticsDescription & description, DataTypePtr data_type_);
Float64 estimateEqual(const Field & val) const override;
@ -31,8 +31,8 @@ private:
};
void countMinSketchStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void countMinSketchStatisticsValidator(const SingleStatisticsDescription & description, DataTypePtr data_type);
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type);
}

View File

@ -10,8 +10,8 @@ extern const int ILLEGAL_STATISTICS;
extern const int LOGICAL_ERROR;
}
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
: IStatistics(stat_)
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & description)
: IStatistics(description)
{
}
@ -57,7 +57,7 @@ Float64 StatisticsTDigest::estimateEqual(const Field & val) const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
}
void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void tdigestStatisticsValidator(const SingleStatisticsDescription & /*description*/, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
@ -65,9 +65,9 @@ void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
}
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr)
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr /*data_type*/)
{
return std::make_shared<StatisticsTDigest>(stat);
return std::make_shared<StatisticsTDigest>(description);
}
}

View File

@ -9,7 +9,7 @@ namespace DB
class StatisticsTDigest : public IStatistics
{
public:
explicit StatisticsTDigest(const SingleStatisticsDescription & stat_);
explicit StatisticsTDigest(const SingleStatisticsDescription & description);
void update(const ColumnPtr & column) override;
@ -23,7 +23,7 @@ private:
QuantileTDigest<Float64> t_digest;
};
void tdigestStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void tdigestStatisticsValidator(const SingleStatisticsDescription & description, DataTypePtr data_type);
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type);
}

View File

@ -11,8 +11,8 @@ namespace ErrorCodes
extern const int ILLEGAL_STATISTICS;
}
StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type)
: IStatistics(stat_)
StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type)
: IStatistics(description)
{
arena = std::make_unique<Arena>();
AggregateFunctionProperties properties;
@ -52,7 +52,7 @@ UInt64 StatisticsUniq::estimateCardinality() const
return column->getUInt(0);
}
void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void uniqStatisticsValidator(const SingleStatisticsDescription & /*description*/, DataTypePtr data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
@ -60,9 +60,9 @@ void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr da
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
}
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type)
{
return std::make_shared<StatisticsUniq>(stat, data_type);
return std::make_shared<StatisticsUniq>(description, data_type);
}
}

View File

@ -10,7 +10,7 @@ namespace DB
class StatisticsUniq : public IStatistics
{
public:
StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type);
StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
~StatisticsUniq() override;
void update(const ColumnPtr & column) override;
@ -27,7 +27,7 @@ private:
};
void uniqStatisticsValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
void uniqStatisticsValidator(const SingleStatisticsDescription & description, DataTypePtr data_type);
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, DataTypePtr data_type);
}