mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Optimize Count() Cond.
Follow up https://github.com/yandex/ClickHouse/pull/6028
This commit is contained in:
parent
7388454eb2
commit
a7db425592
@ -982,6 +982,18 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio
|
||||
aggregates = aggregate_descriptions;
|
||||
}
|
||||
|
||||
struct ColumnExt
|
||||
{
|
||||
size_t compressed_size;
|
||||
size_t type_size;
|
||||
size_t uncompressed_size;
|
||||
String name;
|
||||
bool operator<(const ColumnExt & that) const
|
||||
{
|
||||
return std::tie(compressed_size, type_size, uncompressed_size) < std::tie(that.compressed_size, that.type_size, that.uncompressed_size);
|
||||
}
|
||||
};
|
||||
|
||||
void ExpressionAnalyzer::collectUsedColumns()
|
||||
{
|
||||
/** Calculate which columns are required to execute the expression.
|
||||
@ -1040,23 +1052,23 @@ void ExpressionAnalyzer::collectUsedColumns()
|
||||
/// You need to read at least one column to find the number of rows.
|
||||
if (select_query && required.empty())
|
||||
{
|
||||
/// We will find a column with minimum compressed size. Because it is the column that is cheapest to read.
|
||||
size_t min_data_compressed = 0;
|
||||
String min_column_name;
|
||||
/// We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
|
||||
/// Because it is the column that is cheapest to read.
|
||||
std::vector<ColumnExt> columns;
|
||||
if (storage)
|
||||
{
|
||||
auto column_sizes = storage->getColumnSizes();
|
||||
for (auto & [column_name, column_size] : column_sizes)
|
||||
for (auto & source_column : source_columns)
|
||||
{
|
||||
if (min_data_compressed == 0 || min_data_compressed > column_size.data_compressed)
|
||||
{
|
||||
min_data_compressed = column_size.data_compressed;
|
||||
min_column_name = column_name;
|
||||
}
|
||||
auto c = column_sizes.find(source_column.name);
|
||||
if (c == column_sizes.end())
|
||||
continue;
|
||||
size_t type_size = source_column.type->haveMaximumSizeOfValue() ? source_column.type->getMaximumSizeOfValueInMemory() : 100;
|
||||
columns.emplace_back(ColumnExt{c->second.data_compressed, type_size, c->second.data_uncompressed, source_column.name});
|
||||
}
|
||||
}
|
||||
if (min_data_compressed > 0)
|
||||
required.insert(min_column_name);
|
||||
if (columns.size())
|
||||
required.insert(std::min_element(columns.begin(), columns.end())->name);
|
||||
else
|
||||
/// If we have no information about columns sizes, choose a column of minimum size of its data type.
|
||||
required.insert(ExpressionActions::getSmallestColumn(source_columns));
|
||||
|
Loading…
Reference in New Issue
Block a user